forked from ccfos/huatuo
cgrouputil: support cgroupv2 for runtime
for running huatuo-bamai instance on cgroupv2, check the host cgroup type automatically. Signed-off-by: Tonghao Zhang <tonghao@bamaicloud.com>
This commit is contained in:
parent
230f7e97f1
commit
c74b061c83
|
@ -15,24 +15,80 @@
|
|||
package cgrouputil
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
cgroups "github.com/containerd/cgroups/v3/cgroup1"
|
||||
"huatuo-bamai/internal/log"
|
||||
|
||||
cgroups "github.com/containerd/cgroups/v3"
|
||||
"github.com/containerd/cgroups/v3/cgroup1"
|
||||
"github.com/containerd/cgroups/v3/cgroup2"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
// RuntimeCgroup instance
|
||||
type RuntimeCgroup struct {
|
||||
cg cgroups.Cgroup
|
||||
cgv1 cgroup1.Cgroup
|
||||
cgv2 *cgroup2.Manager
|
||||
mode cgroups.CGMode
|
||||
}
|
||||
|
||||
var runtimeCgroupPeriod uint64 = 100000
|
||||
|
||||
func newRuntimeCgroupV1(cgPath string, cgResources *specs.LinuxResources) (*RuntimeCgroup, error) {
|
||||
cg, err := cgroup1.New(cgroup1.StaticPath(cgPath), cgResources)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := cg.Add(cgroup1.Process{Pid: os.Getpid()}); err != nil {
|
||||
_ = cg.Delete()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &RuntimeCgroup{cgv1: cg, mode: cgroups.Legacy}, nil
|
||||
}
|
||||
|
||||
func newRuntimeCgroupV2(cgPath string, cgResources *specs.LinuxResources) (*RuntimeCgroup, error) {
|
||||
m, err := cgroup2.NewSystemd("/", cgPath+".slice", -1, cgroup2.ToResources(cgResources))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cgroup2 new systemd: %w", err)
|
||||
}
|
||||
|
||||
// enable cpu and memory cgroup controllers
|
||||
if err := m.ToggleControllers([]string{"cpu", "memory"}, cgroup2.Enable); err != nil {
|
||||
_ = m.DeleteSystemd()
|
||||
return nil, fmt.Errorf("cgroup2 enabling cpu and memory controllers: %w", err)
|
||||
}
|
||||
|
||||
if err := m.AddProc(uint64(os.Getpid())); err != nil {
|
||||
_ = m.DeleteSystemd()
|
||||
return nil, fmt.Errorf("cgroup2 adding pids proc: %w", err)
|
||||
}
|
||||
|
||||
log.Debugf("huatuo-bamai use cgroup path: %v", m)
|
||||
|
||||
return &RuntimeCgroup{cgv2: m, mode: cgroups.Unified}, nil
|
||||
}
|
||||
|
||||
func runtimeCgroupMode(mode cgroups.CGMode) string {
|
||||
switch mode {
|
||||
case cgroups.Legacy:
|
||||
return "legacy"
|
||||
case cgroups.Unified:
|
||||
return "unified"
|
||||
case cgroups.Hybrid:
|
||||
return "hybrid"
|
||||
}
|
||||
|
||||
return "unavailable"
|
||||
}
|
||||
|
||||
// NewRuntimeCgroup new instance
|
||||
func NewRuntimeCgroup(cgPath string, cpu float64, mem int64) (*RuntimeCgroup, error) {
|
||||
quota := int64(cpu * float64(runtimeCgroupPeriod))
|
||||
|
||||
cg, err := cgroups.New(cgroups.StaticPath(cgPath), &specs.LinuxResources{
|
||||
cgResources := &specs.LinuxResources{
|
||||
CPU: &specs.LinuxCPU{
|
||||
Period: &runtimeCgroupPeriod,
|
||||
Quota: "a,
|
||||
|
@ -40,34 +96,53 @@ func NewRuntimeCgroup(cgPath string, cpu float64, mem int64) (*RuntimeCgroup, er
|
|||
Memory: &specs.LinuxMemory{
|
||||
Limit: &mem,
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := cg.Add(cgroups.Process{Pid: os.Getpid()}); err != nil {
|
||||
_ = cg.Delete()
|
||||
return nil, err
|
||||
mode := cgroups.Mode()
|
||||
switch mode {
|
||||
case cgroups.Legacy:
|
||||
return newRuntimeCgroupV1(cgPath, cgResources)
|
||||
case cgroups.Unified:
|
||||
return newRuntimeCgroupV2(cgPath, cgResources)
|
||||
default:
|
||||
return nil, fmt.Errorf("cgroup type(%s) not supported", runtimeCgroupMode(mode))
|
||||
}
|
||||
|
||||
return &RuntimeCgroup{cg: cg}, nil
|
||||
}
|
||||
|
||||
// Delete HostCgroup
|
||||
func (host *RuntimeCgroup) Delete() {
|
||||
// move pids to cgroup rootfs temporarily, make sure we can remove cgroup dir
|
||||
rootfs, _ := cgroups.Load(cgroups.RootPath)
|
||||
_ = host.cg.MoveTo(rootfs)
|
||||
_ = host.cg.Delete()
|
||||
// 1. move pids to cgroup rootfs temporarily
|
||||
// 2. delete cgroups.
|
||||
switch host.mode {
|
||||
case cgroups.Legacy:
|
||||
rootfs, _ := cgroup1.Load(cgroup1.RootPath)
|
||||
_ = host.cgv1.MoveTo(rootfs)
|
||||
_ = host.cgv1.Delete()
|
||||
case cgroups.Unified:
|
||||
rootfs, _ := cgroup2.LoadSystemd("/", "")
|
||||
_ = host.cgv2.MoveTo(rootfs)
|
||||
_ = host.cgv2.Delete()
|
||||
_ = host.cgv2.DeleteSystemd()
|
||||
}
|
||||
}
|
||||
|
||||
// UpdateCPU update resource
|
||||
func (host *RuntimeCgroup) UpdateCPU(cpu float64) error {
|
||||
quota := int64(cpu * float64(runtimeCgroupPeriod))
|
||||
return host.cg.Update(&specs.LinuxResources{
|
||||
|
||||
cgResources := &specs.LinuxResources{
|
||||
CPU: &specs.LinuxCPU{
|
||||
Period: &runtimeCgroupPeriod,
|
||||
Quota: "a,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
switch host.mode {
|
||||
case cgroups.Legacy:
|
||||
return host.cgv1.Update(cgResources)
|
||||
case cgroups.Unified:
|
||||
return host.cgv2.Update(cgroup2.ToResources(cgResources))
|
||||
default:
|
||||
return fmt.Errorf("cgroup type(%s) not supported", runtimeCgroupMode(host.mode))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,83 @@
|
|||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cgroup2
|
||||
|
||||
import (
|
||||
"math"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type CPUMax string
|
||||
|
||||
func NewCPUMax(quota *int64, period *uint64) CPUMax {
|
||||
max := "max"
|
||||
if quota != nil {
|
||||
max = strconv.FormatInt(*quota, 10)
|
||||
}
|
||||
return CPUMax(strings.Join([]string{max, strconv.FormatUint(*period, 10)}, " "))
|
||||
}
|
||||
|
||||
type CPU struct {
|
||||
Weight *uint64
|
||||
Max CPUMax
|
||||
Cpus string
|
||||
Mems string
|
||||
}
|
||||
|
||||
func (c CPUMax) extractQuotaAndPeriod() (int64, uint64) {
|
||||
var (
|
||||
quota int64
|
||||
period uint64
|
||||
)
|
||||
values := strings.Split(string(c), " ")
|
||||
if values[0] == "max" {
|
||||
quota = math.MaxInt64
|
||||
} else {
|
||||
quota, _ = strconv.ParseInt(values[0], 10, 64)
|
||||
}
|
||||
period, _ = strconv.ParseUint(values[1], 10, 64)
|
||||
return quota, period
|
||||
}
|
||||
|
||||
func (r *CPU) Values() (o []Value) {
|
||||
if r.Weight != nil {
|
||||
o = append(o, Value{
|
||||
filename: "cpu.weight",
|
||||
value: *r.Weight,
|
||||
})
|
||||
}
|
||||
if r.Max != "" {
|
||||
o = append(o, Value{
|
||||
filename: "cpu.max",
|
||||
value: r.Max,
|
||||
})
|
||||
}
|
||||
if r.Cpus != "" {
|
||||
o = append(o, Value{
|
||||
filename: "cpuset.cpus",
|
||||
value: r.Cpus,
|
||||
})
|
||||
}
|
||||
if r.Mems != "" {
|
||||
o = append(o, Value{
|
||||
filename: "cpuset.mems",
|
||||
value: r.Mems,
|
||||
})
|
||||
}
|
||||
return o
|
||||
}
|
|
@ -0,0 +1,200 @@
|
|||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Devicefilter containes eBPF device filter program
|
||||
//
|
||||
// The implementation is based on https://github.com/containers/crun/blob/0.10.2/src/libcrun/ebpf.c
|
||||
//
|
||||
// Although ebpf.c is originally licensed under LGPL-3.0-or-later, the author (Giuseppe Scrivano)
|
||||
// agreed to relicense the file in Apache License 2.0: https://github.com/opencontainers/runc/issues/2144#issuecomment-543116397
|
||||
//
|
||||
// This particular Go implementation based on runc version
|
||||
// https://github.com/opencontainers/runc/blob/master/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go
|
||||
|
||||
package cgroup2
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
|
||||
"github.com/cilium/ebpf/asm"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
// license string format is same as kernel MODULE_LICENSE macro
|
||||
license = "Apache"
|
||||
)
|
||||
|
||||
// DeviceFilter returns eBPF device filter program and its license string
|
||||
func DeviceFilter(devices []specs.LinuxDeviceCgroup) (asm.Instructions, string, error) {
|
||||
p := &program{}
|
||||
p.init()
|
||||
for i := len(devices) - 1; i >= 0; i-- {
|
||||
if err := p.appendDevice(devices[i]); err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
}
|
||||
insts, err := p.finalize()
|
||||
return insts, license, err
|
||||
}
|
||||
|
||||
type program struct {
|
||||
insts asm.Instructions
|
||||
hasWildCard bool
|
||||
blockID int
|
||||
}
|
||||
|
||||
func (p *program) init() {
|
||||
// struct bpf_cgroup_dev_ctx: https://elixir.bootlin.com/linux/v5.3.6/source/include/uapi/linux/bpf.h#L3423
|
||||
/*
|
||||
u32 access_type
|
||||
u32 major
|
||||
u32 minor
|
||||
*/
|
||||
// R2 <- type (lower 16 bit of u32 access_type at R1[0])
|
||||
p.insts = append(p.insts,
|
||||
asm.LoadMem(asm.R2, asm.R1, 0, asm.Half))
|
||||
|
||||
// R3 <- access (upper 16 bit of u32 access_type at R1[0])
|
||||
p.insts = append(p.insts,
|
||||
asm.LoadMem(asm.R3, asm.R1, 0, asm.Word),
|
||||
// RSh: bitwise shift right
|
||||
asm.RSh.Imm32(asm.R3, 16))
|
||||
|
||||
// R4 <- major (u32 major at R1[4])
|
||||
p.insts = append(p.insts,
|
||||
asm.LoadMem(asm.R4, asm.R1, 4, asm.Word))
|
||||
|
||||
// R5 <- minor (u32 minor at R1[8])
|
||||
p.insts = append(p.insts,
|
||||
asm.LoadMem(asm.R5, asm.R1, 8, asm.Word))
|
||||
}
|
||||
|
||||
// appendDevice needs to be called from the last element of OCI linux.resources.devices to the head element.
|
||||
func (p *program) appendDevice(dev specs.LinuxDeviceCgroup) error {
|
||||
if p.blockID < 0 {
|
||||
return errors.New("the program is finalized")
|
||||
}
|
||||
if p.hasWildCard {
|
||||
// All entries after wildcard entry are ignored
|
||||
return nil
|
||||
}
|
||||
|
||||
bpfType := int32(-1)
|
||||
hasType := true
|
||||
switch dev.Type {
|
||||
case string('c'):
|
||||
bpfType = int32(unix.BPF_DEVCG_DEV_CHAR)
|
||||
case string('b'):
|
||||
bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK)
|
||||
case string('a'):
|
||||
hasType = false
|
||||
default:
|
||||
// if not specified in OCI json, typ is set to DeviceTypeAll
|
||||
return fmt.Errorf("invalid DeviceType %q", dev.Type)
|
||||
}
|
||||
if *dev.Major > math.MaxUint32 {
|
||||
return fmt.Errorf("invalid major %d", *dev.Major)
|
||||
}
|
||||
if *dev.Minor > math.MaxUint32 {
|
||||
return fmt.Errorf("invalid minor %d", *dev.Major)
|
||||
}
|
||||
hasMajor := *dev.Major >= 0 // if not specified in OCI json, major is set to -1
|
||||
hasMinor := *dev.Minor >= 0
|
||||
bpfAccess := int32(0)
|
||||
for _, r := range dev.Access {
|
||||
switch r {
|
||||
case 'r':
|
||||
bpfAccess |= unix.BPF_DEVCG_ACC_READ
|
||||
case 'w':
|
||||
bpfAccess |= unix.BPF_DEVCG_ACC_WRITE
|
||||
case 'm':
|
||||
bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD
|
||||
default:
|
||||
return fmt.Errorf("unknown device access %v", r)
|
||||
}
|
||||
}
|
||||
// If the access is rwm, skip the check.
|
||||
hasAccess := bpfAccess != (unix.BPF_DEVCG_ACC_READ | unix.BPF_DEVCG_ACC_WRITE | unix.BPF_DEVCG_ACC_MKNOD)
|
||||
|
||||
blockSym := fmt.Sprintf("block-%d", p.blockID)
|
||||
nextBlockSym := fmt.Sprintf("block-%d", p.blockID+1)
|
||||
prevBlockLastIdx := len(p.insts) - 1
|
||||
if hasType {
|
||||
p.insts = append(p.insts,
|
||||
// if (R2 != bpfType) goto next
|
||||
asm.JNE.Imm(asm.R2, bpfType, nextBlockSym),
|
||||
)
|
||||
}
|
||||
if hasAccess {
|
||||
p.insts = append(p.insts,
|
||||
// if (R3 & bpfAccess == 0 /* use R1 as a temp var */) goto next
|
||||
asm.Mov.Reg32(asm.R1, asm.R3),
|
||||
asm.And.Imm32(asm.R1, bpfAccess),
|
||||
asm.JEq.Imm(asm.R1, 0, nextBlockSym),
|
||||
)
|
||||
}
|
||||
if hasMajor {
|
||||
p.insts = append(p.insts,
|
||||
// if (R4 != major) goto next
|
||||
asm.JNE.Imm(asm.R4, int32(*dev.Major), nextBlockSym),
|
||||
)
|
||||
}
|
||||
if hasMinor {
|
||||
p.insts = append(p.insts,
|
||||
// if (R5 != minor) goto next
|
||||
asm.JNE.Imm(asm.R5, int32(*dev.Minor), nextBlockSym),
|
||||
)
|
||||
}
|
||||
if !hasType && !hasAccess && !hasMajor && !hasMinor {
|
||||
p.hasWildCard = true
|
||||
}
|
||||
p.insts = append(p.insts, acceptBlock(dev.Allow)...)
|
||||
// set blockSym to the first instruction we added in this iteration
|
||||
p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].WithSymbol(blockSym)
|
||||
p.blockID++
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *program) finalize() (asm.Instructions, error) {
|
||||
if p.hasWildCard {
|
||||
// acceptBlock with asm.Return() is already inserted
|
||||
return p.insts, nil
|
||||
}
|
||||
blockSym := fmt.Sprintf("block-%d", p.blockID)
|
||||
p.insts = append(p.insts,
|
||||
// R0 <- 0
|
||||
asm.Mov.Imm32(asm.R0, 0).WithSymbol(blockSym),
|
||||
asm.Return(),
|
||||
)
|
||||
p.blockID = -1
|
||||
return p.insts, nil
|
||||
}
|
||||
|
||||
func acceptBlock(accept bool) asm.Instructions {
|
||||
v := int32(0)
|
||||
if accept {
|
||||
v = 1
|
||||
}
|
||||
return []asm.Instruction{
|
||||
// R0 <- v
|
||||
asm.Mov.Imm32(asm.R0, v),
|
||||
asm.Return(),
|
||||
}
|
||||
}
|
|
@ -0,0 +1,96 @@
|
|||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cgroup2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/cilium/ebpf"
|
||||
"github.com/cilium/ebpf/asm"
|
||||
"github.com/cilium/ebpf/link"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/<foo> directory.
|
||||
//
|
||||
// Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 .
|
||||
//
|
||||
// https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92
|
||||
func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFD int) (func() error, error) {
|
||||
nilCloser := func() error {
|
||||
return nil
|
||||
}
|
||||
spec := &ebpf.ProgramSpec{
|
||||
Type: ebpf.CGroupDevice,
|
||||
Instructions: insts,
|
||||
License: license,
|
||||
}
|
||||
prog, err := ebpf.NewProgram(spec)
|
||||
if err != nil {
|
||||
return nilCloser, err
|
||||
}
|
||||
err = link.RawAttachProgram(link.RawAttachProgramOptions{
|
||||
Target: dirFD,
|
||||
Program: prog,
|
||||
Attach: ebpf.AttachCGroupDevice,
|
||||
Flags: unix.BPF_F_ALLOW_MULTI,
|
||||
})
|
||||
if err != nil {
|
||||
return nilCloser, fmt.Errorf("failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI): %w", err)
|
||||
}
|
||||
closer := func() error {
|
||||
err = link.RawDetachProgram(link.RawDetachProgramOptions{
|
||||
Target: dirFD,
|
||||
Program: prog,
|
||||
Attach: ebpf.AttachCGroupDevice,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE): %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return closer, nil
|
||||
}
|
||||
|
||||
func isRWM(cgroupPermissions string) bool {
|
||||
r := false
|
||||
w := false
|
||||
m := false
|
||||
for _, rn := range cgroupPermissions {
|
||||
switch rn {
|
||||
case 'r':
|
||||
r = true
|
||||
case 'w':
|
||||
w = true
|
||||
case 'm':
|
||||
m = true
|
||||
}
|
||||
}
|
||||
return r && w && m
|
||||
}
|
||||
|
||||
// the logic is from runc
|
||||
// https://github.com/opencontainers/runc/blob/master/libcontainer/cgroups/fs/devices_v2.go#L44
|
||||
func canSkipEBPFError(devices []specs.LinuxDeviceCgroup) bool {
|
||||
for _, dev := range devices {
|
||||
if dev.Allow || !isRWM(dev.Access) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cgroup2
|
||||
|
||||
import (
|
||||
"errors"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrInvalidFormat = errors.New("cgroups: parsing file with invalid format failed")
|
||||
ErrInvalidGroupPath = errors.New("cgroups: invalid group path")
|
||||
)
|
|
@ -0,0 +1,37 @@
|
|||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cgroup2
|
||||
|
||||
import "strings"
|
||||
|
||||
type HugeTlb []HugeTlbEntry
|
||||
|
||||
type HugeTlbEntry struct {
|
||||
HugePageSize string
|
||||
Limit uint64
|
||||
}
|
||||
|
||||
func (r *HugeTlb) Values() (o []Value) {
|
||||
for _, e := range *r {
|
||||
o = append(o, Value{
|
||||
filename: strings.Join([]string{"hugetlb", e.HugePageSize, "max"}, "."),
|
||||
value: e.Limit,
|
||||
})
|
||||
}
|
||||
|
||||
return o
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cgroup2
|
||||
|
||||
import "fmt"
|
||||
|
||||
type IOType string
|
||||
|
||||
const (
|
||||
ReadBPS IOType = "rbps"
|
||||
WriteBPS IOType = "wbps"
|
||||
ReadIOPS IOType = "riops"
|
||||
WriteIOPS IOType = "wiops"
|
||||
)
|
||||
|
||||
type BFQ struct {
|
||||
Weight uint16
|
||||
}
|
||||
|
||||
type Entry struct {
|
||||
Type IOType
|
||||
Major int64
|
||||
Minor int64
|
||||
Rate uint64
|
||||
}
|
||||
|
||||
func (e Entry) String() string {
|
||||
return fmt.Sprintf("%d:%d %s=%d", e.Major, e.Minor, e.Type, e.Rate)
|
||||
}
|
||||
|
||||
type IO struct {
|
||||
BFQ BFQ
|
||||
Max []Entry
|
||||
}
|
||||
|
||||
func (i *IO) Values() (o []Value) {
|
||||
if i.BFQ.Weight != 0 {
|
||||
o = append(o, Value{
|
||||
filename: "io.bfq.weight",
|
||||
value: i.BFQ.Weight,
|
||||
})
|
||||
}
|
||||
for _, e := range i.Max {
|
||||
o = append(o, Value{
|
||||
filename: "io.max",
|
||||
value: e.String(),
|
||||
})
|
||||
}
|
||||
return o
|
||||
}
|
|
@ -0,0 +1,984 @@
|
|||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cgroup2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/cgroups/v3/cgroup2/stats"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
"github.com/godbus/dbus/v5"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
subtreeControl = "cgroup.subtree_control"
|
||||
controllersFile = "cgroup.controllers"
|
||||
killFile = "cgroup.kill"
|
||||
typeFile = "cgroup.type"
|
||||
defaultCgroup2Path = "/sys/fs/cgroup"
|
||||
defaultSlice = "system.slice"
|
||||
)
|
||||
|
||||
var canDelegate bool
|
||||
|
||||
type Event struct {
|
||||
Low uint64
|
||||
High uint64
|
||||
Max uint64
|
||||
OOM uint64
|
||||
OOMKill uint64
|
||||
}
|
||||
|
||||
// Resources for a cgroups v2 unified hierarchy
|
||||
type Resources struct {
|
||||
CPU *CPU
|
||||
Memory *Memory
|
||||
Pids *Pids
|
||||
IO *IO
|
||||
RDMA *RDMA
|
||||
HugeTlb *HugeTlb
|
||||
// When len(Devices) is zero, devices are not controlled
|
||||
Devices []specs.LinuxDeviceCgroup
|
||||
}
|
||||
|
||||
// Values returns the raw filenames and values that
|
||||
// can be written to the unified hierarchy
|
||||
func (r *Resources) Values() (o []Value) {
|
||||
if r.CPU != nil {
|
||||
o = append(o, r.CPU.Values()...)
|
||||
}
|
||||
if r.Memory != nil {
|
||||
o = append(o, r.Memory.Values()...)
|
||||
}
|
||||
if r.Pids != nil {
|
||||
o = append(o, r.Pids.Values()...)
|
||||
}
|
||||
if r.IO != nil {
|
||||
o = append(o, r.IO.Values()...)
|
||||
}
|
||||
if r.RDMA != nil {
|
||||
o = append(o, r.RDMA.Values()...)
|
||||
}
|
||||
if r.HugeTlb != nil {
|
||||
o = append(o, r.HugeTlb.Values()...)
|
||||
}
|
||||
return o
|
||||
}
|
||||
|
||||
// EnabledControllers returns the list of all not nil resource controllers
|
||||
func (r *Resources) EnabledControllers() (c []string) {
|
||||
if r.CPU != nil {
|
||||
c = append(c, "cpu")
|
||||
if r.CPU.Cpus != "" || r.CPU.Mems != "" {
|
||||
c = append(c, "cpuset")
|
||||
}
|
||||
}
|
||||
if r.Memory != nil {
|
||||
c = append(c, "memory")
|
||||
}
|
||||
if r.Pids != nil {
|
||||
c = append(c, "pids")
|
||||
}
|
||||
if r.IO != nil {
|
||||
c = append(c, "io")
|
||||
}
|
||||
if r.RDMA != nil {
|
||||
c = append(c, "rdma")
|
||||
}
|
||||
if r.HugeTlb != nil {
|
||||
c = append(c, "hugetlb")
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Value of a cgroup setting
|
||||
type Value struct {
|
||||
filename string
|
||||
value interface{}
|
||||
}
|
||||
|
||||
// write the value to the full, absolute path, of a unified hierarchy
|
||||
func (c *Value) write(path string, perm os.FileMode) error {
|
||||
var data []byte
|
||||
switch t := c.value.(type) {
|
||||
case uint64:
|
||||
data = []byte(strconv.FormatUint(t, 10))
|
||||
case uint16:
|
||||
data = []byte(strconv.FormatUint(uint64(t), 10))
|
||||
case int64:
|
||||
data = []byte(strconv.FormatInt(t, 10))
|
||||
case []byte:
|
||||
data = t
|
||||
case string:
|
||||
data = []byte(t)
|
||||
case CPUMax:
|
||||
data = []byte(t)
|
||||
default:
|
||||
return ErrInvalidFormat
|
||||
}
|
||||
|
||||
return os.WriteFile(
|
||||
filepath.Join(path, c.filename),
|
||||
data,
|
||||
perm,
|
||||
)
|
||||
}
|
||||
|
||||
func writeValues(path string, values []Value) error {
|
||||
for _, o := range values {
|
||||
if err := o.write(path, defaultFilePerm); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func NewManager(mountpoint string, group string, resources *Resources) (*Manager, error) {
|
||||
if resources == nil {
|
||||
return nil, errors.New("resources reference is nil")
|
||||
}
|
||||
if err := VerifyGroupPath(group); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
path := filepath.Join(mountpoint, group)
|
||||
if err := os.MkdirAll(path, defaultDirPerm); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m := Manager{
|
||||
unifiedMountpoint: mountpoint,
|
||||
path: path,
|
||||
}
|
||||
if err := m.ToggleControllers(resources.EnabledControllers(), Enable); err != nil {
|
||||
// clean up cgroup dir on failure
|
||||
os.Remove(path)
|
||||
return nil, err
|
||||
}
|
||||
if err := setResources(path, resources); err != nil {
|
||||
os.Remove(path)
|
||||
return nil, err
|
||||
}
|
||||
return &m, nil
|
||||
}
|
||||
|
||||
type InitConfig struct {
|
||||
mountpoint string
|
||||
}
|
||||
|
||||
type InitOpts func(c *InitConfig) error
|
||||
|
||||
// WithMountpoint sets the unified mountpoint. The default path is /sys/fs/cgroup.
|
||||
func WithMountpoint(path string) InitOpts {
|
||||
return func(c *InitConfig) error {
|
||||
c.mountpoint = path
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// Load a cgroup.
|
||||
func Load(group string, opts ...InitOpts) (*Manager, error) {
|
||||
c := InitConfig{mountpoint: defaultCgroup2Path}
|
||||
for _, opt := range opts {
|
||||
if err := opt(&c); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if err := VerifyGroupPath(group); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
path := filepath.Join(c.mountpoint, group)
|
||||
return &Manager{
|
||||
unifiedMountpoint: c.mountpoint,
|
||||
path: path,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type Manager struct {
|
||||
unifiedMountpoint string
|
||||
path string
|
||||
}
|
||||
|
||||
func setResources(path string, resources *Resources) error {
|
||||
if resources != nil {
|
||||
if err := writeValues(path, resources.Values()); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setDevices(path, resources.Devices); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// CgroupType represents the types a cgroup can be.
|
||||
type CgroupType string
|
||||
|
||||
const (
|
||||
Domain CgroupType = "domain"
|
||||
Threaded CgroupType = "threaded"
|
||||
)
|
||||
|
||||
func (c *Manager) GetType() (CgroupType, error) {
|
||||
val, err := os.ReadFile(filepath.Join(c.path, typeFile))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
trimmed := strings.TrimSpace(string(val))
|
||||
return CgroupType(trimmed), nil
|
||||
}
|
||||
|
||||
func (c *Manager) SetType(cgType CgroupType) error {
|
||||
// NOTE: We could abort if cgType != Threaded here as currently
|
||||
// it's not possible to revert back to domain, but not sure
|
||||
// it's worth being that opinionated, especially if that may
|
||||
// ever change.
|
||||
v := Value{
|
||||
filename: typeFile,
|
||||
value: string(cgType),
|
||||
}
|
||||
return writeValues(c.path, []Value{v})
|
||||
}
|
||||
|
||||
func (c *Manager) RootControllers() ([]string, error) {
|
||||
b, err := os.ReadFile(filepath.Join(c.unifiedMountpoint, controllersFile))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return strings.Fields(string(b)), nil
|
||||
}
|
||||
|
||||
func (c *Manager) Controllers() ([]string, error) {
|
||||
b, err := os.ReadFile(filepath.Join(c.path, controllersFile))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return strings.Fields(string(b)), nil
|
||||
}
|
||||
|
||||
func (c *Manager) Update(resources *Resources) error {
|
||||
return setResources(c.path, resources)
|
||||
}
|
||||
|
||||
type ControllerToggle int
|
||||
|
||||
const (
|
||||
Enable ControllerToggle = iota + 1
|
||||
Disable
|
||||
)
|
||||
|
||||
func toggleFunc(controllers []string, prefix string) []string {
|
||||
out := make([]string, len(controllers))
|
||||
for i, c := range controllers {
|
||||
out[i] = prefix + c
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (c *Manager) ToggleControllers(controllers []string, t ControllerToggle) error {
|
||||
// when c.path is like /foo/bar/baz, the following files need to be written:
|
||||
// * /sys/fs/cgroup/cgroup.subtree_control
|
||||
// * /sys/fs/cgroup/foo/cgroup.subtree_control
|
||||
// * /sys/fs/cgroup/foo/bar/cgroup.subtree_control
|
||||
// Note that /sys/fs/cgroup/foo/bar/baz/cgroup.subtree_control does not need to be written.
|
||||
split := strings.Split(c.path, "/")
|
||||
var lastErr error
|
||||
for i := range split {
|
||||
f := strings.Join(split[:i], "/")
|
||||
if !strings.HasPrefix(f, c.unifiedMountpoint) || f == c.path {
|
||||
continue
|
||||
}
|
||||
filePath := filepath.Join(f, subtreeControl)
|
||||
if err := c.writeSubtreeControl(filePath, controllers, t); err != nil {
|
||||
// When running as rootless, the user may face EPERM on parent groups, but it is neglible when the
|
||||
// controller is already written.
|
||||
// So we only return the last error.
|
||||
lastErr = fmt.Errorf("failed to write subtree controllers %+v to %q: %w", controllers, filePath, err)
|
||||
} else {
|
||||
lastErr = nil
|
||||
}
|
||||
}
|
||||
return lastErr
|
||||
}
|
||||
|
||||
func (c *Manager) writeSubtreeControl(filePath string, controllers []string, t ControllerToggle) error {
|
||||
f, err := os.OpenFile(filePath, os.O_WRONLY, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
switch t {
|
||||
case Enable:
|
||||
controllers = toggleFunc(controllers, "+")
|
||||
case Disable:
|
||||
controllers = toggleFunc(controllers, "-")
|
||||
}
|
||||
_, err = f.WriteString(strings.Join(controllers, " "))
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *Manager) NewChild(name string, resources *Resources) (*Manager, error) {
|
||||
if strings.HasPrefix(name, "/") {
|
||||
return nil, errors.New("name must be relative")
|
||||
}
|
||||
path := filepath.Join(c.path, name)
|
||||
if err := os.MkdirAll(path, defaultDirPerm); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m := Manager{
|
||||
unifiedMountpoint: c.unifiedMountpoint,
|
||||
path: path,
|
||||
}
|
||||
if resources != nil {
|
||||
if err := m.ToggleControllers(resources.EnabledControllers(), Enable); err != nil {
|
||||
// clean up cgroup dir on failure
|
||||
os.Remove(path)
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if err := setResources(path, resources); err != nil {
|
||||
// clean up cgroup dir on failure
|
||||
os.Remove(path)
|
||||
return nil, err
|
||||
}
|
||||
return &m, nil
|
||||
}
|
||||
|
||||
func (c *Manager) AddProc(pid uint64) error {
|
||||
v := Value{
|
||||
filename: cgroupProcs,
|
||||
value: pid,
|
||||
}
|
||||
return writeValues(c.path, []Value{v})
|
||||
}
|
||||
|
||||
func (c *Manager) AddThread(tid uint64) error {
|
||||
v := Value{
|
||||
filename: cgroupThreads,
|
||||
value: tid,
|
||||
}
|
||||
return writeValues(c.path, []Value{v})
|
||||
}
|
||||
|
||||
// Kill will try to forcibly exit all of the processes in the cgroup. This is
|
||||
// equivalent to sending a SIGKILL to every process. On kernels 5.14 and greater
|
||||
// this will use the cgroup.kill file, on anything that doesn't have the cgroup.kill
|
||||
// file, a manual process of freezing -> sending a SIGKILL to every process -> thawing
|
||||
// will be used.
|
||||
func (c *Manager) Kill() error {
|
||||
v := Value{
|
||||
filename: killFile,
|
||||
value: "1",
|
||||
}
|
||||
err := writeValues(c.path, []Value{v})
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
logrus.Warnf("falling back to slower kill implementation: %s", err)
|
||||
// Fallback to slow method.
|
||||
return c.fallbackKill()
|
||||
}
|
||||
|
||||
// fallbackKill is a slower fallback to the more modern (kernels 5.14+)
|
||||
// approach of writing to the cgroup.kill file. This is heavily pulled
|
||||
// from runc's same approach (in signalAllProcesses), with the only differences
|
||||
// being this is just tailored to the API exposed in this library, and we don't
|
||||
// need to care about signals other than SIGKILL.
|
||||
//
|
||||
// https://github.com/opencontainers/runc/blob/8da0a0b5675764feaaaaad466f6567a9983fcd08/libcontainer/init_linux.go#L523-L529
|
||||
func (c *Manager) fallbackKill() error {
|
||||
if err := c.Freeze(); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
pids, err := c.Procs(true)
|
||||
if err != nil {
|
||||
if err := c.Thaw(); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
return err
|
||||
}
|
||||
var procs []*os.Process
|
||||
for _, pid := range pids {
|
||||
p, err := os.FindProcess(int(pid))
|
||||
if err != nil {
|
||||
logrus.Warn(err)
|
||||
continue
|
||||
}
|
||||
procs = append(procs, p)
|
||||
if err := p.Signal(unix.SIGKILL); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
}
|
||||
if err := c.Thaw(); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
|
||||
subreaper, err := getSubreaper()
|
||||
if err != nil {
|
||||
// The error here means that PR_GET_CHILD_SUBREAPER is not
|
||||
// supported because this code might run on a kernel older
|
||||
// than 3.4. We don't want to throw an error in that case,
|
||||
// and we simplify things, considering there is no subreaper
|
||||
// set.
|
||||
subreaper = 0
|
||||
}
|
||||
|
||||
for _, p := range procs {
|
||||
// In case a subreaper has been setup, this code must not
|
||||
// wait for the process. Otherwise, we cannot be sure the
|
||||
// current process will be reaped by the subreaper, while
|
||||
// the subreaper might be waiting for this process in order
|
||||
// to retrieve its exit code.
|
||||
if subreaper == 0 {
|
||||
if _, err := p.Wait(); err != nil {
|
||||
if !errors.Is(err, unix.ECHILD) {
|
||||
logrus.Warnf("wait on pid %d failed: %s", p.Pid, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Manager) Delete() error {
|
||||
// kernel prevents cgroups with running process from being removed, check the tree is empty
|
||||
processes, err := c.Procs(true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(processes) > 0 {
|
||||
return fmt.Errorf("cgroups: unable to remove path %q: still contains running processes", c.path)
|
||||
}
|
||||
return remove(c.path)
|
||||
}
|
||||
|
||||
func (c *Manager) getTasks(recursive bool, tType string) ([]uint64, error) {
|
||||
var tasks []uint64
|
||||
err := filepath.Walk(c.path, func(p string, info fs.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !recursive && info.IsDir() {
|
||||
if p == c.path {
|
||||
return nil
|
||||
}
|
||||
return filepath.SkipDir
|
||||
}
|
||||
_, name := filepath.Split(p)
|
||||
if name != tType {
|
||||
return nil
|
||||
}
|
||||
curTasks, err := parseCgroupTasksFile(p)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
tasks = append(tasks, curTasks...)
|
||||
return nil
|
||||
})
|
||||
return tasks, err
|
||||
}
|
||||
|
||||
func (c *Manager) Procs(recursive bool) ([]uint64, error) {
|
||||
return c.getTasks(recursive, cgroupProcs)
|
||||
}
|
||||
|
||||
func (c *Manager) Threads(recursive bool) ([]uint64, error) {
|
||||
return c.getTasks(recursive, cgroupThreads)
|
||||
}
|
||||
|
||||
func (c *Manager) MoveTo(destination *Manager) error {
|
||||
processes, err := c.Procs(true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, p := range processes {
|
||||
if err := destination.AddProc(p); err != nil {
|
||||
if strings.Contains(err.Error(), "no such process") {
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Manager) Stat() (*stats.Metrics, error) {
|
||||
controllers, err := c.Controllers()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Sizing this avoids an allocation to increase the map at runtime;
|
||||
// currently the default bucket size is 8 and we put 40+ elements
|
||||
// in it so we'd always end up allocating.
|
||||
out := make(map[string]uint64, 50)
|
||||
for _, controller := range controllers {
|
||||
switch controller {
|
||||
case "cpu", "memory":
|
||||
if err := readKVStatsFile(c.path, controller+".stat", out); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
continue
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
memoryEvents := make(map[string]uint64)
|
||||
if err := readKVStatsFile(c.path, "memory.events", memoryEvents); err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
var metrics stats.Metrics
|
||||
metrics.Pids = &stats.PidsStat{
|
||||
Current: getStatFileContentUint64(filepath.Join(c.path, "pids.current")),
|
||||
Limit: getStatFileContentUint64(filepath.Join(c.path, "pids.max")),
|
||||
}
|
||||
metrics.CPU = &stats.CPUStat{
|
||||
UsageUsec: out["usage_usec"],
|
||||
UserUsec: out["user_usec"],
|
||||
SystemUsec: out["system_usec"],
|
||||
NrPeriods: out["nr_periods"],
|
||||
NrThrottled: out["nr_throttled"],
|
||||
ThrottledUsec: out["throttled_usec"],
|
||||
PSI: getStatPSIFromFile(filepath.Join(c.path, "cpu.pressure")),
|
||||
}
|
||||
metrics.Memory = &stats.MemoryStat{
|
||||
Anon: out["anon"],
|
||||
File: out["file"],
|
||||
KernelStack: out["kernel_stack"],
|
||||
Slab: out["slab"],
|
||||
Sock: out["sock"],
|
||||
Shmem: out["shmem"],
|
||||
FileMapped: out["file_mapped"],
|
||||
FileDirty: out["file_dirty"],
|
||||
FileWriteback: out["file_writeback"],
|
||||
AnonThp: out["anon_thp"],
|
||||
InactiveAnon: out["inactive_anon"],
|
||||
ActiveAnon: out["active_anon"],
|
||||
InactiveFile: out["inactive_file"],
|
||||
ActiveFile: out["active_file"],
|
||||
Unevictable: out["unevictable"],
|
||||
SlabReclaimable: out["slab_reclaimable"],
|
||||
SlabUnreclaimable: out["slab_unreclaimable"],
|
||||
Pgfault: out["pgfault"],
|
||||
Pgmajfault: out["pgmajfault"],
|
||||
WorkingsetRefault: out["workingset_refault"],
|
||||
WorkingsetActivate: out["workingset_activate"],
|
||||
WorkingsetNodereclaim: out["workingset_nodereclaim"],
|
||||
Pgrefill: out["pgrefill"],
|
||||
Pgscan: out["pgscan"],
|
||||
Pgsteal: out["pgsteal"],
|
||||
Pgactivate: out["pgactivate"],
|
||||
Pgdeactivate: out["pgdeactivate"],
|
||||
Pglazyfree: out["pglazyfree"],
|
||||
Pglazyfreed: out["pglazyfreed"],
|
||||
ThpFaultAlloc: out["thp_fault_alloc"],
|
||||
ThpCollapseAlloc: out["thp_collapse_alloc"],
|
||||
Usage: getStatFileContentUint64(filepath.Join(c.path, "memory.current")),
|
||||
UsageLimit: getStatFileContentUint64(filepath.Join(c.path, "memory.max")),
|
||||
MaxUsage: getStatFileContentUint64(filepath.Join(c.path, "memory.peak")),
|
||||
SwapUsage: getStatFileContentUint64(filepath.Join(c.path, "memory.swap.current")),
|
||||
SwapLimit: getStatFileContentUint64(filepath.Join(c.path, "memory.swap.max")),
|
||||
SwapMaxUsage: getStatFileContentUint64(filepath.Join(c.path, "memory.swap.peak")),
|
||||
PSI: getStatPSIFromFile(filepath.Join(c.path, "memory.pressure")),
|
||||
}
|
||||
if len(memoryEvents) > 0 {
|
||||
metrics.MemoryEvents = &stats.MemoryEvents{
|
||||
Low: memoryEvents["low"],
|
||||
High: memoryEvents["high"],
|
||||
Max: memoryEvents["max"],
|
||||
Oom: memoryEvents["oom"],
|
||||
OomKill: memoryEvents["oom_kill"],
|
||||
}
|
||||
}
|
||||
metrics.Io = &stats.IOStat{
|
||||
Usage: readIoStats(c.path),
|
||||
PSI: getStatPSIFromFile(filepath.Join(c.path, "io.pressure")),
|
||||
}
|
||||
metrics.Rdma = &stats.RdmaStat{
|
||||
Current: rdmaStats(filepath.Join(c.path, "rdma.current")),
|
||||
Limit: rdmaStats(filepath.Join(c.path, "rdma.max")),
|
||||
}
|
||||
metrics.Hugetlb = readHugeTlbStats(c.path)
|
||||
|
||||
return &metrics, nil
|
||||
}
|
||||
|
||||
func readKVStatsFile(path string, file string, out map[string]uint64) error {
|
||||
f, err := os.Open(filepath.Join(path, file))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
s := bufio.NewScanner(f)
|
||||
for s.Scan() {
|
||||
name, value, err := parseKV(s.Text())
|
||||
if err != nil {
|
||||
return fmt.Errorf("error while parsing %s (line=%q): %w", filepath.Join(path, file), s.Text(), err)
|
||||
}
|
||||
out[name] = value
|
||||
}
|
||||
return s.Err()
|
||||
}
|
||||
|
||||
func (c *Manager) Freeze() error {
|
||||
return c.freeze(c.path, Frozen)
|
||||
}
|
||||
|
||||
func (c *Manager) Thaw() error {
|
||||
return c.freeze(c.path, Thawed)
|
||||
}
|
||||
|
||||
func (c *Manager) freeze(path string, state State) error {
|
||||
values := state.Values()
|
||||
for {
|
||||
if err := writeValues(path, values); err != nil {
|
||||
return err
|
||||
}
|
||||
current, err := fetchState(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if current == state {
|
||||
return nil
|
||||
}
|
||||
time.Sleep(1 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Manager) isCgroupEmpty() bool {
|
||||
// In case of any error we return true so that we exit and don't leak resources
|
||||
out := make(map[string]uint64)
|
||||
if err := readKVStatsFile(c.path, "cgroup.events", out); err != nil {
|
||||
return true
|
||||
}
|
||||
if v, ok := out["populated"]; ok {
|
||||
return v == 0
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// MemoryEventFD returns inotify file descriptor and 'memory.events' inotify watch descriptor
|
||||
func (c *Manager) MemoryEventFD() (int, uint32, error) {
|
||||
fpath := filepath.Join(c.path, "memory.events")
|
||||
fd, err := unix.InotifyInit()
|
||||
if err != nil {
|
||||
return 0, 0, errors.New("failed to create inotify fd")
|
||||
}
|
||||
wd, err := unix.InotifyAddWatch(fd, fpath, unix.IN_MODIFY)
|
||||
if err != nil {
|
||||
unix.Close(fd)
|
||||
return 0, 0, fmt.Errorf("failed to add inotify watch for %q: %w", fpath, err)
|
||||
}
|
||||
// monitor to detect process exit/cgroup deletion
|
||||
evpath := filepath.Join(c.path, "cgroup.events")
|
||||
if _, err = unix.InotifyAddWatch(fd, evpath, unix.IN_MODIFY); err != nil {
|
||||
unix.Close(fd)
|
||||
return 0, 0, fmt.Errorf("failed to add inotify watch for %q: %w", evpath, err)
|
||||
}
|
||||
|
||||
return fd, uint32(wd), nil
|
||||
}
|
||||
|
||||
func (c *Manager) EventChan() (<-chan Event, <-chan error) {
|
||||
ec := make(chan Event)
|
||||
errCh := make(chan error, 1)
|
||||
go c.waitForEvents(ec, errCh)
|
||||
|
||||
return ec, errCh
|
||||
}
|
||||
|
||||
func (c *Manager) waitForEvents(ec chan<- Event, errCh chan<- error) {
|
||||
defer close(errCh)
|
||||
|
||||
fd, _, err := c.MemoryEventFD()
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
return
|
||||
}
|
||||
defer unix.Close(fd)
|
||||
|
||||
for {
|
||||
buffer := make([]byte, unix.SizeofInotifyEvent*10)
|
||||
bytesRead, err := unix.Read(fd, buffer)
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
return
|
||||
}
|
||||
if bytesRead >= unix.SizeofInotifyEvent {
|
||||
out := make(map[string]uint64)
|
||||
if err := readKVStatsFile(c.path, "memory.events", out); err != nil {
|
||||
// When cgroup is deleted read may return -ENODEV instead of -ENOENT from open.
|
||||
if _, statErr := os.Lstat(filepath.Join(c.path, "memory.events")); !os.IsNotExist(statErr) {
|
||||
errCh <- err
|
||||
}
|
||||
return
|
||||
}
|
||||
ec <- Event{
|
||||
Low: out["low"],
|
||||
High: out["high"],
|
||||
Max: out["max"],
|
||||
OOM: out["oom"],
|
||||
OOMKill: out["oom_kill"],
|
||||
}
|
||||
if c.isCgroupEmpty() {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func setDevices(path string, devices []specs.LinuxDeviceCgroup) error {
|
||||
if len(devices) == 0 {
|
||||
return nil
|
||||
}
|
||||
insts, license, err := DeviceFilter(devices)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dirFD, err := unix.Open(path, unix.O_DIRECTORY|unix.O_RDONLY|unix.O_CLOEXEC, 0o600)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot get dir FD for %s", path)
|
||||
}
|
||||
defer unix.Close(dirFD)
|
||||
if _, err := LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil {
|
||||
if !canSkipEBPFError(devices) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// getSystemdFullPath returns the full systemd path when creating a systemd slice group.
|
||||
// the reason this is necessary is because the "-" character has a special meaning in
|
||||
// systemd slice. For example, when creating a slice called "my-group-112233.slice",
|
||||
// systemd will create a hierarchy like this:
|
||||
//
|
||||
// /sys/fs/cgroup/my.slice/my-group.slice/my-group-112233.slice
|
||||
func getSystemdFullPath(slice, group string) string {
|
||||
return filepath.Join(defaultCgroup2Path, dashesToPath(slice), dashesToPath(group))
|
||||
}
|
||||
|
||||
// dashesToPath converts a slice name with dashes to it's corresponding systemd filesystem path.
|
||||
func dashesToPath(in string) string {
|
||||
path := ""
|
||||
if strings.HasSuffix(in, ".slice") && strings.Contains(in, "-") {
|
||||
parts := strings.Split(in, "-")
|
||||
for i := range parts {
|
||||
s := strings.Join(parts[0:i+1], "-")
|
||||
if !strings.HasSuffix(s, ".slice") {
|
||||
s += ".slice"
|
||||
}
|
||||
path = filepath.Join(path, s)
|
||||
}
|
||||
} else {
|
||||
path = filepath.Join(path, in)
|
||||
}
|
||||
return path
|
||||
}
|
||||
|
||||
func NewSystemd(slice, group string, pid int, resources *Resources) (*Manager, error) {
|
||||
if slice == "" {
|
||||
slice = defaultSlice
|
||||
}
|
||||
ctx := context.TODO()
|
||||
path := getSystemdFullPath(slice, group)
|
||||
conn, err := systemdDbus.NewWithContext(ctx)
|
||||
if err != nil {
|
||||
return &Manager{}, err
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
properties := []systemdDbus.Property{
|
||||
systemdDbus.PropDescription("cgroup " + group),
|
||||
newSystemdProperty("DefaultDependencies", false),
|
||||
newSystemdProperty("MemoryAccounting", true),
|
||||
newSystemdProperty("CPUAccounting", true),
|
||||
newSystemdProperty("IOAccounting", true),
|
||||
}
|
||||
|
||||
// if we create a slice, the parent is defined via a Wants=
|
||||
if strings.HasSuffix(group, ".slice") {
|
||||
properties = append(properties, systemdDbus.PropWants(defaultSlice))
|
||||
} else {
|
||||
// otherwise, we use Slice=
|
||||
properties = append(properties, systemdDbus.PropSlice(defaultSlice))
|
||||
}
|
||||
|
||||
// only add pid if its valid, -1 is used w/ general slice creation.
|
||||
if pid != -1 {
|
||||
properties = append(properties, newSystemdProperty("PIDs", []uint32{uint32(pid)}))
|
||||
}
|
||||
|
||||
if resources.Memory != nil && resources.Memory.Min != nil && *resources.Memory.Min != 0 {
|
||||
properties = append(properties,
|
||||
newSystemdProperty("MemoryMin", uint64(*resources.Memory.Min)))
|
||||
}
|
||||
|
||||
if resources.Memory != nil && resources.Memory.Max != nil && *resources.Memory.Max != 0 {
|
||||
properties = append(properties,
|
||||
newSystemdProperty("MemoryMax", uint64(*resources.Memory.Max)))
|
||||
}
|
||||
|
||||
if resources.CPU != nil && resources.CPU.Weight != nil && *resources.CPU.Weight != 0 {
|
||||
properties = append(properties,
|
||||
newSystemdProperty("CPUWeight", *resources.CPU.Weight))
|
||||
}
|
||||
|
||||
if resources.CPU != nil && resources.CPU.Max != "" {
|
||||
quota, period := resources.CPU.Max.extractQuotaAndPeriod()
|
||||
// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
|
||||
// corresponds to USEC_INFINITY in systemd
|
||||
// if USEC_INFINITY is provided, CPUQuota is left unbound by systemd
|
||||
// always setting a property value ensures we can apply a quota and remove it later
|
||||
cpuQuotaPerSecUSec := uint64(math.MaxUint64)
|
||||
if quota > 0 {
|
||||
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
|
||||
// (integer percentage of CPU) internally. This means that if a fractional percent of
|
||||
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
|
||||
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
|
||||
cpuQuotaPerSecUSec = uint64(quota*1000000) / period
|
||||
if cpuQuotaPerSecUSec%10000 != 0 {
|
||||
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
|
||||
}
|
||||
}
|
||||
properties = append(properties,
|
||||
newSystemdProperty("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
|
||||
}
|
||||
|
||||
// If we can delegate, we add the property back in
|
||||
if canDelegate {
|
||||
properties = append(properties, newSystemdProperty("Delegate", true))
|
||||
}
|
||||
|
||||
if resources.Pids != nil && resources.Pids.Max > 0 {
|
||||
properties = append(properties,
|
||||
newSystemdProperty("TasksAccounting", true),
|
||||
newSystemdProperty("TasksMax", uint64(resources.Pids.Max)))
|
||||
}
|
||||
|
||||
if err := startUnit(conn, group, properties, pid == -1); err != nil {
|
||||
return &Manager{}, err
|
||||
}
|
||||
|
||||
return &Manager{
|
||||
path: path,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func startUnit(conn *systemdDbus.Conn, group string, properties []systemdDbus.Property, ignoreExists bool) error {
|
||||
ctx := context.TODO()
|
||||
|
||||
statusChan := make(chan string, 1)
|
||||
defer close(statusChan)
|
||||
|
||||
retry := true
|
||||
started := false
|
||||
|
||||
for !started {
|
||||
if _, err := conn.StartTransientUnitContext(ctx, group, "replace", properties, statusChan); err != nil {
|
||||
if !isUnitExists(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if ignoreExists {
|
||||
return nil
|
||||
}
|
||||
|
||||
if retry {
|
||||
retry = false
|
||||
// When a unit of the same name already exists, it may be a leftover failed unit.
|
||||
// If we reset it once, systemd can try to remove it.
|
||||
attemptFailedUnitReset(conn, group)
|
||||
continue
|
||||
}
|
||||
|
||||
return err
|
||||
} else {
|
||||
started = true
|
||||
}
|
||||
}
|
||||
|
||||
select {
|
||||
case s := <-statusChan:
|
||||
if s != "done" {
|
||||
attemptFailedUnitReset(conn, group)
|
||||
return fmt.Errorf("error creating systemd unit `%s`: got `%s`", group, s)
|
||||
}
|
||||
case <-time.After(30 * time.Second):
|
||||
logrus.Warnf("Timed out while waiting for StartTransientUnit(%s) completion signal from dbus. Continuing...", group)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func attemptFailedUnitReset(conn *systemdDbus.Conn, group string) {
|
||||
err := conn.ResetFailedUnitContext(context.TODO(), group)
|
||||
|
||||
if err != nil {
|
||||
logrus.Warnf("Unable to reset failed unit: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func LoadSystemd(slice, group string) (*Manager, error) {
|
||||
if slice == "" {
|
||||
slice = defaultSlice
|
||||
}
|
||||
path := getSystemdFullPath(slice, group)
|
||||
return &Manager{
|
||||
path: path,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *Manager) DeleteSystemd() error {
|
||||
ctx := context.TODO()
|
||||
conn, err := systemdDbus.NewWithContext(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer conn.Close()
|
||||
group := systemdUnitFromPath(c.path)
|
||||
ch := make(chan string)
|
||||
_, err = conn.StopUnitContext(ctx, group, "replace", ch)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
<-ch
|
||||
return nil
|
||||
}
|
||||
|
||||
func newSystemdProperty(name string, units interface{}) systemdDbus.Property {
|
||||
return systemdDbus.Property{
|
||||
Name: name,
|
||||
Value: dbus.MakeVariant(units),
|
||||
}
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cgroup2
|
||||
|
||||
type Memory struct {
|
||||
Swap *int64
|
||||
Min *int64
|
||||
Max *int64
|
||||
Low *int64
|
||||
High *int64
|
||||
}
|
||||
|
||||
func (r *Memory) Values() (o []Value) {
|
||||
if r.Swap != nil {
|
||||
o = append(o, Value{
|
||||
filename: "memory.swap.max",
|
||||
value: *r.Swap,
|
||||
})
|
||||
}
|
||||
if r.Min != nil {
|
||||
o = append(o, Value{
|
||||
filename: "memory.min",
|
||||
value: *r.Min,
|
||||
})
|
||||
}
|
||||
if r.Max != nil {
|
||||
o = append(o, Value{
|
||||
filename: "memory.max",
|
||||
value: *r.Max,
|
||||
})
|
||||
}
|
||||
if r.Low != nil {
|
||||
o = append(o, Value{
|
||||
filename: "memory.low",
|
||||
value: *r.Low,
|
||||
})
|
||||
}
|
||||
if r.High != nil {
|
||||
o = append(o, Value{
|
||||
filename: "memory.high",
|
||||
value: *r.High,
|
||||
})
|
||||
}
|
||||
return o
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cgroup2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// NestedGroupPath will nest the cgroups based on the calling processes cgroup
|
||||
// placing its child processes inside its own path
|
||||
func NestedGroupPath(suffix string) (string, error) {
|
||||
path, err := parseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return filepath.Join(path, suffix), nil
|
||||
}
|
||||
|
||||
// PidGroupPath will return the correct cgroup paths for an existing process running inside a cgroup
|
||||
// This is commonly used for the Load function to restore an existing container
|
||||
func PidGroupPath(pid int) (string, error) {
|
||||
p := fmt.Sprintf("/proc/%d/cgroup", pid)
|
||||
return parseCgroupFile(p)
|
||||
}
|
||||
|
||||
// VerifyGroupPath verifies the format of group path string g.
|
||||
// The format is same as the third field in /proc/PID/cgroup.
|
||||
// e.g. "/user.slice/user-1001.slice/session-1.scope"
|
||||
//
|
||||
// g must be a "clean" absolute path starts with "/", and must not contain "/sys/fs/cgroup" prefix.
|
||||
//
|
||||
// VerifyGroupPath doesn't verify whether g actually exists on the system.
|
||||
func VerifyGroupPath(g string) error {
|
||||
if !strings.HasPrefix(g, "/") {
|
||||
return ErrInvalidGroupPath
|
||||
}
|
||||
if filepath.Clean(g) != g {
|
||||
return ErrInvalidGroupPath
|
||||
}
|
||||
if strings.HasPrefix(g, "/sys/fs/cgroup") {
|
||||
return ErrInvalidGroupPath
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cgroup2
|
||||
|
||||
import "strconv"
|
||||
|
||||
type Pids struct {
|
||||
Max int64
|
||||
}
|
||||
|
||||
func (r *Pids) Values() (o []Value) {
|
||||
if r.Max != 0 {
|
||||
limit := "max"
|
||||
if r.Max > 0 {
|
||||
limit = strconv.FormatInt(r.Max, 10)
|
||||
}
|
||||
o = append(o, Value{
|
||||
filename: "pids.max",
|
||||
value: limit,
|
||||
})
|
||||
}
|
||||
return o
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cgroup2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type RDMA struct {
|
||||
Limit []RDMAEntry
|
||||
}
|
||||
|
||||
type RDMAEntry struct {
|
||||
Device string
|
||||
HcaHandles uint32
|
||||
HcaObjects uint32
|
||||
}
|
||||
|
||||
func (r RDMAEntry) String() string {
|
||||
return fmt.Sprintf("%s hca_handle=%d hca_object=%d", r.Device, r.HcaHandles, r.HcaObjects)
|
||||
}
|
||||
|
||||
func (r *RDMA) Values() (o []Value) {
|
||||
for _, e := range r.Limit {
|
||||
o = append(o, Value{
|
||||
filename: "rdma.max",
|
||||
value: e.String(),
|
||||
})
|
||||
}
|
||||
|
||||
return o
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cgroup2
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// State is a type that represents the state of the current cgroup
|
||||
type State string
|
||||
|
||||
const (
|
||||
Unknown State = ""
|
||||
Thawed State = "thawed"
|
||||
Frozen State = "frozen"
|
||||
Deleted State = "deleted"
|
||||
|
||||
cgroupFreeze = "cgroup.freeze"
|
||||
)
|
||||
|
||||
func (s State) Values() []Value {
|
||||
v := Value{
|
||||
filename: cgroupFreeze,
|
||||
}
|
||||
switch s {
|
||||
case Frozen:
|
||||
v.value = "1"
|
||||
case Thawed:
|
||||
v.value = "0"
|
||||
}
|
||||
return []Value{
|
||||
v,
|
||||
}
|
||||
}
|
||||
|
||||
func fetchState(path string) (State, error) {
|
||||
current, err := os.ReadFile(filepath.Join(path, cgroupFreeze))
|
||||
if err != nil {
|
||||
return Unknown, err
|
||||
}
|
||||
switch strings.TrimSpace(string(current)) {
|
||||
case "1":
|
||||
return Frozen, nil
|
||||
case "0":
|
||||
return Thawed, nil
|
||||
default:
|
||||
return Unknown, nil
|
||||
}
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package stats
|
1558
vendor/github.com/containerd/cgroups/v3/cgroup2/stats/metrics.pb.go
generated
vendored
Normal file
1558
vendor/github.com/containerd/cgroups/v3/cgroup2/stats/metrics.pb.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
626
vendor/github.com/containerd/cgroups/v3/cgroup2/stats/metrics.pb.txt
generated
vendored
Normal file
626
vendor/github.com/containerd/cgroups/v3/cgroup2/stats/metrics.pb.txt
generated
vendored
Normal file
|
@ -0,0 +1,626 @@
|
|||
file {
|
||||
name: "github.com/containerd/cgroups/cgroup2/stats/metrics.proto"
|
||||
package: "io.containerd.cgroups.v2"
|
||||
message_type {
|
||||
name: "Metrics"
|
||||
field {
|
||||
name: "pids"
|
||||
number: 1
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".io.containerd.cgroups.v2.PidsStat"
|
||||
json_name: "pids"
|
||||
}
|
||||
field {
|
||||
name: "cpu"
|
||||
number: 2
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".io.containerd.cgroups.v2.CPUStat"
|
||||
json_name: "cpu"
|
||||
}
|
||||
field {
|
||||
name: "memory"
|
||||
number: 4
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".io.containerd.cgroups.v2.MemoryStat"
|
||||
json_name: "memory"
|
||||
}
|
||||
field {
|
||||
name: "rdma"
|
||||
number: 5
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".io.containerd.cgroups.v2.RdmaStat"
|
||||
json_name: "rdma"
|
||||
}
|
||||
field {
|
||||
name: "io"
|
||||
number: 6
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".io.containerd.cgroups.v2.IOStat"
|
||||
json_name: "io"
|
||||
}
|
||||
field {
|
||||
name: "hugetlb"
|
||||
number: 7
|
||||
label: LABEL_REPEATED
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".io.containerd.cgroups.v2.HugeTlbStat"
|
||||
json_name: "hugetlb"
|
||||
}
|
||||
field {
|
||||
name: "memory_events"
|
||||
number: 8
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".io.containerd.cgroups.v2.MemoryEvents"
|
||||
json_name: "memoryEvents"
|
||||
}
|
||||
}
|
||||
message_type {
|
||||
name: "PSIData"
|
||||
field {
|
||||
name: "avg10"
|
||||
number: 1
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_DOUBLE
|
||||
json_name: "avg10"
|
||||
}
|
||||
field {
|
||||
name: "avg60"
|
||||
number: 2
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_DOUBLE
|
||||
json_name: "avg60"
|
||||
}
|
||||
field {
|
||||
name: "avg300"
|
||||
number: 3
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_DOUBLE
|
||||
json_name: "avg300"
|
||||
}
|
||||
field {
|
||||
name: "total"
|
||||
number: 4
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "total"
|
||||
}
|
||||
}
|
||||
message_type {
|
||||
name: "PSIStats"
|
||||
field {
|
||||
name: "some"
|
||||
number: 1
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".io.containerd.cgroups.v2.PSIData"
|
||||
json_name: "some"
|
||||
}
|
||||
field {
|
||||
name: "full"
|
||||
number: 2
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".io.containerd.cgroups.v2.PSIData"
|
||||
json_name: "full"
|
||||
}
|
||||
}
|
||||
message_type {
|
||||
name: "PidsStat"
|
||||
field {
|
||||
name: "current"
|
||||
number: 1
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "current"
|
||||
}
|
||||
field {
|
||||
name: "limit"
|
||||
number: 2
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "limit"
|
||||
}
|
||||
}
|
||||
message_type {
|
||||
name: "CPUStat"
|
||||
field {
|
||||
name: "usage_usec"
|
||||
number: 1
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "usageUsec"
|
||||
}
|
||||
field {
|
||||
name: "user_usec"
|
||||
number: 2
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "userUsec"
|
||||
}
|
||||
field {
|
||||
name: "system_usec"
|
||||
number: 3
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "systemUsec"
|
||||
}
|
||||
field {
|
||||
name: "nr_periods"
|
||||
number: 4
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "nrPeriods"
|
||||
}
|
||||
field {
|
||||
name: "nr_throttled"
|
||||
number: 5
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "nrThrottled"
|
||||
}
|
||||
field {
|
||||
name: "throttled_usec"
|
||||
number: 6
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "throttledUsec"
|
||||
}
|
||||
field {
|
||||
name: "psi"
|
||||
number: 7
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".io.containerd.cgroups.v2.PSIStats"
|
||||
json_name: "psi"
|
||||
}
|
||||
}
|
||||
message_type {
|
||||
name: "MemoryStat"
|
||||
field {
|
||||
name: "anon"
|
||||
number: 1
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "anon"
|
||||
}
|
||||
field {
|
||||
name: "file"
|
||||
number: 2
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "file"
|
||||
}
|
||||
field {
|
||||
name: "kernel_stack"
|
||||
number: 3
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "kernelStack"
|
||||
}
|
||||
field {
|
||||
name: "slab"
|
||||
number: 4
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "slab"
|
||||
}
|
||||
field {
|
||||
name: "sock"
|
||||
number: 5
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "sock"
|
||||
}
|
||||
field {
|
||||
name: "shmem"
|
||||
number: 6
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "shmem"
|
||||
}
|
||||
field {
|
||||
name: "file_mapped"
|
||||
number: 7
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "fileMapped"
|
||||
}
|
||||
field {
|
||||
name: "file_dirty"
|
||||
number: 8
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "fileDirty"
|
||||
}
|
||||
field {
|
||||
name: "file_writeback"
|
||||
number: 9
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "fileWriteback"
|
||||
}
|
||||
field {
|
||||
name: "anon_thp"
|
||||
number: 10
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "anonThp"
|
||||
}
|
||||
field {
|
||||
name: "inactive_anon"
|
||||
number: 11
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "inactiveAnon"
|
||||
}
|
||||
field {
|
||||
name: "active_anon"
|
||||
number: 12
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "activeAnon"
|
||||
}
|
||||
field {
|
||||
name: "inactive_file"
|
||||
number: 13
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "inactiveFile"
|
||||
}
|
||||
field {
|
||||
name: "active_file"
|
||||
number: 14
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "activeFile"
|
||||
}
|
||||
field {
|
||||
name: "unevictable"
|
||||
number: 15
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "unevictable"
|
||||
}
|
||||
field {
|
||||
name: "slab_reclaimable"
|
||||
number: 16
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "slabReclaimable"
|
||||
}
|
||||
field {
|
||||
name: "slab_unreclaimable"
|
||||
number: 17
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "slabUnreclaimable"
|
||||
}
|
||||
field {
|
||||
name: "pgfault"
|
||||
number: 18
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "pgfault"
|
||||
}
|
||||
field {
|
||||
name: "pgmajfault"
|
||||
number: 19
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "pgmajfault"
|
||||
}
|
||||
field {
|
||||
name: "workingset_refault"
|
||||
number: 20
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "workingsetRefault"
|
||||
}
|
||||
field {
|
||||
name: "workingset_activate"
|
||||
number: 21
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "workingsetActivate"
|
||||
}
|
||||
field {
|
||||
name: "workingset_nodereclaim"
|
||||
number: 22
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "workingsetNodereclaim"
|
||||
}
|
||||
field {
|
||||
name: "pgrefill"
|
||||
number: 23
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "pgrefill"
|
||||
}
|
||||
field {
|
||||
name: "pgscan"
|
||||
number: 24
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "pgscan"
|
||||
}
|
||||
field {
|
||||
name: "pgsteal"
|
||||
number: 25
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "pgsteal"
|
||||
}
|
||||
field {
|
||||
name: "pgactivate"
|
||||
number: 26
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "pgactivate"
|
||||
}
|
||||
field {
|
||||
name: "pgdeactivate"
|
||||
number: 27
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "pgdeactivate"
|
||||
}
|
||||
field {
|
||||
name: "pglazyfree"
|
||||
number: 28
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "pglazyfree"
|
||||
}
|
||||
field {
|
||||
name: "pglazyfreed"
|
||||
number: 29
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "pglazyfreed"
|
||||
}
|
||||
field {
|
||||
name: "thp_fault_alloc"
|
||||
number: 30
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "thpFaultAlloc"
|
||||
}
|
||||
field {
|
||||
name: "thp_collapse_alloc"
|
||||
number: 31
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "thpCollapseAlloc"
|
||||
}
|
||||
field {
|
||||
name: "usage"
|
||||
number: 32
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "usage"
|
||||
}
|
||||
field {
|
||||
name: "usage_limit"
|
||||
number: 33
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "usageLimit"
|
||||
}
|
||||
field {
|
||||
name: "swap_usage"
|
||||
number: 34
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "swapUsage"
|
||||
}
|
||||
field {
|
||||
name: "swap_limit"
|
||||
number: 35
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "swapLimit"
|
||||
}
|
||||
field {
|
||||
name: "max_usage"
|
||||
number: 36
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "maxUsage"
|
||||
}
|
||||
field {
|
||||
name: "swap_max_usage"
|
||||
number: 37
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "swapMaxUsage"
|
||||
}
|
||||
field {
|
||||
name: "psi"
|
||||
number: 38
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".io.containerd.cgroups.v2.PSIStats"
|
||||
json_name: "psi"
|
||||
}
|
||||
}
|
||||
message_type {
|
||||
name: "MemoryEvents"
|
||||
field {
|
||||
name: "low"
|
||||
number: 1
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "low"
|
||||
}
|
||||
field {
|
||||
name: "high"
|
||||
number: 2
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "high"
|
||||
}
|
||||
field {
|
||||
name: "max"
|
||||
number: 3
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "max"
|
||||
}
|
||||
field {
|
||||
name: "oom"
|
||||
number: 4
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "oom"
|
||||
}
|
||||
field {
|
||||
name: "oom_kill"
|
||||
number: 5
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "oomKill"
|
||||
}
|
||||
}
|
||||
message_type {
|
||||
name: "RdmaStat"
|
||||
field {
|
||||
name: "current"
|
||||
number: 1
|
||||
label: LABEL_REPEATED
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".io.containerd.cgroups.v2.RdmaEntry"
|
||||
json_name: "current"
|
||||
}
|
||||
field {
|
||||
name: "limit"
|
||||
number: 2
|
||||
label: LABEL_REPEATED
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".io.containerd.cgroups.v2.RdmaEntry"
|
||||
json_name: "limit"
|
||||
}
|
||||
}
|
||||
message_type {
|
||||
name: "RdmaEntry"
|
||||
field {
|
||||
name: "device"
|
||||
number: 1
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_STRING
|
||||
json_name: "device"
|
||||
}
|
||||
field {
|
||||
name: "hca_handles"
|
||||
number: 2
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT32
|
||||
json_name: "hcaHandles"
|
||||
}
|
||||
field {
|
||||
name: "hca_objects"
|
||||
number: 3
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT32
|
||||
json_name: "hcaObjects"
|
||||
}
|
||||
}
|
||||
message_type {
|
||||
name: "IOStat"
|
||||
field {
|
||||
name: "usage"
|
||||
number: 1
|
||||
label: LABEL_REPEATED
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".io.containerd.cgroups.v2.IOEntry"
|
||||
json_name: "usage"
|
||||
}
|
||||
field {
|
||||
name: "psi"
|
||||
number: 2
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".io.containerd.cgroups.v2.PSIStats"
|
||||
json_name: "psi"
|
||||
}
|
||||
}
|
||||
message_type {
|
||||
name: "IOEntry"
|
||||
field {
|
||||
name: "major"
|
||||
number: 1
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "major"
|
||||
}
|
||||
field {
|
||||
name: "minor"
|
||||
number: 2
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "minor"
|
||||
}
|
||||
field {
|
||||
name: "rbytes"
|
||||
number: 3
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "rbytes"
|
||||
}
|
||||
field {
|
||||
name: "wbytes"
|
||||
number: 4
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "wbytes"
|
||||
}
|
||||
field {
|
||||
name: "rios"
|
||||
number: 5
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "rios"
|
||||
}
|
||||
field {
|
||||
name: "wios"
|
||||
number: 6
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "wios"
|
||||
}
|
||||
}
|
||||
message_type {
|
||||
name: "HugeTlbStat"
|
||||
field {
|
||||
name: "current"
|
||||
number: 1
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "current"
|
||||
}
|
||||
field {
|
||||
name: "max"
|
||||
number: 2
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "max"
|
||||
}
|
||||
field {
|
||||
name: "pagesize"
|
||||
number: 3
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_STRING
|
||||
json_name: "pagesize"
|
||||
}
|
||||
}
|
||||
options {
|
||||
go_package: "github.com/containerd/cgroups/cgroup2/stats"
|
||||
}
|
||||
syntax: "proto3"
|
||||
}
|
122
vendor/github.com/containerd/cgroups/v3/cgroup2/stats/metrics.proto
generated
vendored
Normal file
122
vendor/github.com/containerd/cgroups/v3/cgroup2/stats/metrics.proto
generated
vendored
Normal file
|
@ -0,0 +1,122 @@
|
|||
syntax = "proto3";
|
||||
|
||||
package io.containerd.cgroups.v2;
|
||||
|
||||
option go_package = "github.com/containerd/cgroups/cgroup2/stats";
|
||||
|
||||
message Metrics {
|
||||
PidsStat pids = 1;
|
||||
CPUStat cpu = 2;
|
||||
MemoryStat memory = 4;
|
||||
RdmaStat rdma = 5;
|
||||
IOStat io = 6;
|
||||
repeated HugeTlbStat hugetlb = 7;
|
||||
MemoryEvents memory_events = 8;
|
||||
}
|
||||
|
||||
message PSIData {
|
||||
double avg10 = 1;
|
||||
double avg60 = 2;
|
||||
double avg300 = 3;
|
||||
uint64 total = 4;
|
||||
}
|
||||
|
||||
message PSIStats {
|
||||
PSIData some = 1;
|
||||
PSIData full = 2;
|
||||
}
|
||||
|
||||
message PidsStat {
|
||||
uint64 current = 1;
|
||||
uint64 limit = 2;
|
||||
}
|
||||
|
||||
message CPUStat {
|
||||
uint64 usage_usec = 1;
|
||||
uint64 user_usec = 2;
|
||||
uint64 system_usec = 3;
|
||||
uint64 nr_periods = 4;
|
||||
uint64 nr_throttled = 5;
|
||||
uint64 throttled_usec = 6;
|
||||
PSIStats psi = 7;
|
||||
}
|
||||
|
||||
message MemoryStat {
|
||||
uint64 anon = 1;
|
||||
uint64 file = 2;
|
||||
uint64 kernel_stack = 3;
|
||||
uint64 slab = 4;
|
||||
uint64 sock = 5;
|
||||
uint64 shmem = 6;
|
||||
uint64 file_mapped = 7;
|
||||
uint64 file_dirty = 8;
|
||||
uint64 file_writeback = 9;
|
||||
uint64 anon_thp = 10;
|
||||
uint64 inactive_anon = 11;
|
||||
uint64 active_anon = 12;
|
||||
uint64 inactive_file = 13;
|
||||
uint64 active_file = 14;
|
||||
uint64 unevictable = 15;
|
||||
uint64 slab_reclaimable = 16;
|
||||
uint64 slab_unreclaimable = 17;
|
||||
uint64 pgfault = 18;
|
||||
uint64 pgmajfault = 19;
|
||||
uint64 workingset_refault = 20;
|
||||
uint64 workingset_activate = 21;
|
||||
uint64 workingset_nodereclaim = 22;
|
||||
uint64 pgrefill = 23;
|
||||
uint64 pgscan = 24;
|
||||
uint64 pgsteal = 25;
|
||||
uint64 pgactivate = 26;
|
||||
uint64 pgdeactivate = 27;
|
||||
uint64 pglazyfree = 28;
|
||||
uint64 pglazyfreed = 29;
|
||||
uint64 thp_fault_alloc = 30;
|
||||
uint64 thp_collapse_alloc = 31;
|
||||
uint64 usage = 32;
|
||||
uint64 usage_limit = 33;
|
||||
uint64 swap_usage = 34;
|
||||
uint64 swap_limit = 35;
|
||||
uint64 max_usage = 36;
|
||||
uint64 swap_max_usage = 37;
|
||||
PSIStats psi = 38;
|
||||
}
|
||||
|
||||
message MemoryEvents {
|
||||
uint64 low = 1;
|
||||
uint64 high = 2;
|
||||
uint64 max = 3;
|
||||
uint64 oom = 4;
|
||||
uint64 oom_kill = 5;
|
||||
}
|
||||
|
||||
message RdmaStat {
|
||||
repeated RdmaEntry current = 1;
|
||||
repeated RdmaEntry limit = 2;
|
||||
}
|
||||
|
||||
message RdmaEntry {
|
||||
string device = 1;
|
||||
uint32 hca_handles = 2;
|
||||
uint32 hca_objects = 3;
|
||||
}
|
||||
|
||||
message IOStat {
|
||||
repeated IOEntry usage = 1;
|
||||
PSIStats psi = 2;
|
||||
}
|
||||
|
||||
message IOEntry {
|
||||
uint64 major = 1;
|
||||
uint64 minor = 2;
|
||||
uint64 rbytes = 3;
|
||||
uint64 wbytes = 4;
|
||||
uint64 rios = 5;
|
||||
uint64 wios = 6;
|
||||
}
|
||||
|
||||
message HugeTlbStat {
|
||||
uint64 current = 1;
|
||||
uint64 max = 2;
|
||||
string pagesize = 3;
|
||||
}
|
|
@ -0,0 +1,561 @@
|
|||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cgroup2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
"github.com/containerd/cgroups/v3/cgroup2/stats"
|
||||
|
||||
"github.com/godbus/dbus/v5"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
cgroupProcs = "cgroup.procs"
|
||||
cgroupThreads = "cgroup.threads"
|
||||
defaultDirPerm = 0o755
|
||||
)
|
||||
|
||||
// defaultFilePerm is a var so that the test framework can change the filemode
|
||||
// of all files created when the tests are running. The difference between the
|
||||
// tests and real world use is that files like "cgroup.procs" will exist when writing
|
||||
// to a read cgroup filesystem and do not exist prior when running in the tests.
|
||||
// this is set to a non 0 value in the test code
|
||||
var defaultFilePerm = os.FileMode(0)
|
||||
|
||||
// remove will remove a cgroup path handling EAGAIN and EBUSY errors and
|
||||
// retrying the remove after a exp timeout
|
||||
func remove(path string) error {
|
||||
var err error
|
||||
delay := 10 * time.Millisecond
|
||||
for i := 0; i < 5; i++ {
|
||||
if i != 0 {
|
||||
time.Sleep(delay)
|
||||
delay *= 2
|
||||
}
|
||||
if err = os.RemoveAll(path); err == nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("cgroups: unable to remove path %q: %w", path, err)
|
||||
}
|
||||
|
||||
// parseCgroupTasksFile parses /sys/fs/cgroup/$GROUPPATH/cgroup.procs or
|
||||
// /sys/fs/cgroup/$GROUPPATH/cgroup.threads
|
||||
func parseCgroupTasksFile(path string) ([]uint64, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
var (
|
||||
out []uint64
|
||||
s = bufio.NewScanner(f)
|
||||
)
|
||||
for s.Scan() {
|
||||
if t := s.Text(); t != "" {
|
||||
pid, err := strconv.ParseUint(t, 10, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, pid)
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func parseKV(raw string) (string, uint64, error) {
|
||||
parts := strings.Fields(raw)
|
||||
if len(parts) != 2 {
|
||||
return "", 0, ErrInvalidFormat
|
||||
}
|
||||
v, err := parseUint(parts[1], 10, 64)
|
||||
return parts[0], v, err
|
||||
}
|
||||
|
||||
func parseUint(s string, base, bitSize int) (uint64, error) {
|
||||
v, err := strconv.ParseUint(s, base, bitSize)
|
||||
if err != nil {
|
||||
intValue, intErr := strconv.ParseInt(s, base, bitSize)
|
||||
// 1. Handle negative values greater than MinInt64 (and)
|
||||
// 2. Handle negative values lesser than MinInt64
|
||||
if intErr == nil && intValue < 0 {
|
||||
return 0, nil
|
||||
} else if intErr != nil &&
|
||||
intErr.(*strconv.NumError).Err == strconv.ErrRange &&
|
||||
intValue < 0 {
|
||||
return 0, nil
|
||||
}
|
||||
return 0, err
|
||||
}
|
||||
return v, nil
|
||||
}
|
||||
|
||||
// parseCgroupFile parses /proc/PID/cgroup file and return string
|
||||
func parseCgroupFile(path string) (string, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
return parseCgroupFromReader(f)
|
||||
}
|
||||
|
||||
func parseCgroupFromReader(r io.Reader) (string, error) {
|
||||
s := bufio.NewScanner(r)
|
||||
for s.Scan() {
|
||||
var (
|
||||
text = s.Text()
|
||||
parts = strings.SplitN(text, ":", 3)
|
||||
)
|
||||
if len(parts) < 3 {
|
||||
return "", fmt.Errorf("invalid cgroup entry: %q", text)
|
||||
}
|
||||
// text is like "0::/user.slice/user-1001.slice/session-1.scope"
|
||||
if parts[0] == "0" && parts[1] == "" {
|
||||
return parts[2], nil
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return "", fmt.Errorf("cgroup path not found")
|
||||
}
|
||||
|
||||
// ToResources converts the oci LinuxResources struct into a
|
||||
// v2 Resources type for use with this package.
|
||||
//
|
||||
// converting cgroups configuration from v1 to v2
|
||||
// ref: https://github.com/containers/crun/blob/master/crun.1.md#cgroup-v2
|
||||
func ToResources(spec *specs.LinuxResources) *Resources {
|
||||
var resources Resources
|
||||
if cpu := spec.CPU; cpu != nil {
|
||||
resources.CPU = &CPU{
|
||||
Cpus: cpu.Cpus,
|
||||
Mems: cpu.Mems,
|
||||
}
|
||||
if shares := cpu.Shares; shares != nil {
|
||||
convertedWeight := 1 + ((*shares-2)*9999)/262142
|
||||
resources.CPU.Weight = &convertedWeight
|
||||
}
|
||||
if period := cpu.Period; period != nil {
|
||||
resources.CPU.Max = NewCPUMax(cpu.Quota, period)
|
||||
}
|
||||
}
|
||||
if mem := spec.Memory; mem != nil {
|
||||
resources.Memory = &Memory{}
|
||||
if swap := mem.Swap; swap != nil {
|
||||
resources.Memory.Swap = swap
|
||||
if l := mem.Limit; l != nil {
|
||||
reduce := *swap - *l
|
||||
resources.Memory.Swap = &reduce
|
||||
}
|
||||
}
|
||||
if l := mem.Limit; l != nil {
|
||||
resources.Memory.Max = l
|
||||
}
|
||||
if l := mem.Reservation; l != nil {
|
||||
resources.Memory.Low = l
|
||||
}
|
||||
}
|
||||
if hugetlbs := spec.HugepageLimits; hugetlbs != nil {
|
||||
hugeTlbUsage := HugeTlb{}
|
||||
for _, hugetlb := range hugetlbs {
|
||||
hugeTlbUsage = append(hugeTlbUsage, HugeTlbEntry{
|
||||
HugePageSize: hugetlb.Pagesize,
|
||||
Limit: hugetlb.Limit,
|
||||
})
|
||||
}
|
||||
resources.HugeTlb = &hugeTlbUsage
|
||||
}
|
||||
if pids := spec.Pids; pids != nil {
|
||||
resources.Pids = &Pids{
|
||||
Max: pids.Limit,
|
||||
}
|
||||
}
|
||||
if i := spec.BlockIO; i != nil {
|
||||
resources.IO = &IO{}
|
||||
if i.Weight != nil {
|
||||
resources.IO.BFQ.Weight = 1 + (*i.Weight-10)*9999/990
|
||||
}
|
||||
for t, devices := range map[IOType][]specs.LinuxThrottleDevice{
|
||||
ReadBPS: i.ThrottleReadBpsDevice,
|
||||
WriteBPS: i.ThrottleWriteBpsDevice,
|
||||
ReadIOPS: i.ThrottleReadIOPSDevice,
|
||||
WriteIOPS: i.ThrottleWriteIOPSDevice,
|
||||
} {
|
||||
for _, d := range devices {
|
||||
resources.IO.Max = append(resources.IO.Max, Entry{
|
||||
Type: t,
|
||||
Major: d.Major,
|
||||
Minor: d.Minor,
|
||||
Rate: d.Rate,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
if i := spec.Rdma; i != nil {
|
||||
resources.RDMA = &RDMA{}
|
||||
for device, value := range spec.Rdma {
|
||||
if device != "" && (value.HcaHandles != nil && value.HcaObjects != nil) {
|
||||
resources.RDMA.Limit = append(resources.RDMA.Limit, RDMAEntry{
|
||||
Device: device,
|
||||
HcaHandles: *value.HcaHandles,
|
||||
HcaObjects: *value.HcaObjects,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return &resources
|
||||
}
|
||||
|
||||
// Gets uint64 parsed content of single value cgroup stat file
|
||||
func getStatFileContentUint64(filePath string) uint64 {
|
||||
f, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
// We expect an unsigned 64 bit integer, or a "max" string
|
||||
// in some cases.
|
||||
buf := make([]byte, 32)
|
||||
n, err := f.Read(buf)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
trimmed := strings.TrimSpace(string(buf[:n]))
|
||||
if trimmed == "max" {
|
||||
return math.MaxUint64
|
||||
}
|
||||
|
||||
res, err := parseUint(trimmed, 10, 64)
|
||||
if err != nil {
|
||||
logrus.Errorf("unable to parse %q as a uint from Cgroup file %q", trimmed, filePath)
|
||||
return res
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
func readIoStats(path string) []*stats.IOEntry {
|
||||
// more details on the io.stat file format: https://www.kernel.org/doc/Documentation/cgroup-v2.txt
|
||||
var usage []*stats.IOEntry
|
||||
fpath := filepath.Join(path, "io.stat")
|
||||
currentData, err := os.ReadFile(fpath)
|
||||
if err != nil {
|
||||
return usage
|
||||
}
|
||||
entries := strings.Split(string(currentData), "\n")
|
||||
|
||||
for _, entry := range entries {
|
||||
parts := strings.Split(entry, " ")
|
||||
if len(parts) < 2 {
|
||||
continue
|
||||
}
|
||||
majmin := strings.Split(parts[0], ":")
|
||||
if len(majmin) != 2 {
|
||||
continue
|
||||
}
|
||||
major, err := strconv.ParseUint(majmin[0], 10, 0)
|
||||
if err != nil {
|
||||
return usage
|
||||
}
|
||||
minor, err := strconv.ParseUint(majmin[1], 10, 0)
|
||||
if err != nil {
|
||||
return usage
|
||||
}
|
||||
parts = parts[1:]
|
||||
ioEntry := stats.IOEntry{
|
||||
Major: major,
|
||||
Minor: minor,
|
||||
}
|
||||
for _, s := range parts {
|
||||
keyPairValue := strings.Split(s, "=")
|
||||
if len(keyPairValue) != 2 {
|
||||
continue
|
||||
}
|
||||
v, err := strconv.ParseUint(keyPairValue[1], 10, 0)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
switch keyPairValue[0] {
|
||||
case "rbytes":
|
||||
ioEntry.Rbytes = v
|
||||
case "wbytes":
|
||||
ioEntry.Wbytes = v
|
||||
case "rios":
|
||||
ioEntry.Rios = v
|
||||
case "wios":
|
||||
ioEntry.Wios = v
|
||||
}
|
||||
}
|
||||
usage = append(usage, &ioEntry)
|
||||
}
|
||||
return usage
|
||||
}
|
||||
|
||||
func rdmaStats(filepath string) []*stats.RdmaEntry {
|
||||
currentData, err := os.ReadFile(filepath)
|
||||
if err != nil {
|
||||
return []*stats.RdmaEntry{}
|
||||
}
|
||||
return toRdmaEntry(strings.Split(string(currentData), "\n"))
|
||||
}
|
||||
|
||||
func parseRdmaKV(raw string, entry *stats.RdmaEntry) {
|
||||
var value uint64
|
||||
var err error
|
||||
|
||||
parts := strings.Split(raw, "=")
|
||||
switch len(parts) {
|
||||
case 2:
|
||||
if parts[1] == "max" {
|
||||
value = math.MaxUint32
|
||||
} else {
|
||||
value, err = parseUint(parts[1], 10, 32)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
if parts[0] == "hca_handle" {
|
||||
entry.HcaHandles = uint32(value)
|
||||
} else if parts[0] == "hca_object" {
|
||||
entry.HcaObjects = uint32(value)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func toRdmaEntry(strEntries []string) []*stats.RdmaEntry {
|
||||
var rdmaEntries []*stats.RdmaEntry
|
||||
for i := range strEntries {
|
||||
parts := strings.Fields(strEntries[i])
|
||||
switch len(parts) {
|
||||
case 3:
|
||||
entry := new(stats.RdmaEntry)
|
||||
entry.Device = parts[0]
|
||||
parseRdmaKV(parts[1], entry)
|
||||
parseRdmaKV(parts[2], entry)
|
||||
|
||||
rdmaEntries = append(rdmaEntries, entry)
|
||||
default:
|
||||
continue
|
||||
}
|
||||
}
|
||||
return rdmaEntries
|
||||
}
|
||||
|
||||
// isUnitExists returns true if the error is that a systemd unit already exists.
|
||||
func isUnitExists(err error) bool {
|
||||
if err != nil {
|
||||
if dbusError, ok := err.(dbus.Error); ok {
|
||||
return strings.Contains(dbusError.Name, "org.freedesktop.systemd1.UnitExists")
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func systemdUnitFromPath(path string) string {
|
||||
_, unit := filepath.Split(path)
|
||||
return unit
|
||||
}
|
||||
|
||||
func readHugeTlbStats(path string) []*stats.HugeTlbStat {
|
||||
hpSizes := hugePageSizes()
|
||||
usage := make([]*stats.HugeTlbStat, len(hpSizes))
|
||||
for idx, pagesize := range hpSizes {
|
||||
usage[idx] = &stats.HugeTlbStat{
|
||||
Max: getStatFileContentUint64(filepath.Join(path, "hugetlb."+pagesize+".max")),
|
||||
Current: getStatFileContentUint64(filepath.Join(path, "hugetlb."+pagesize+".current")),
|
||||
Pagesize: pagesize,
|
||||
}
|
||||
}
|
||||
return usage
|
||||
}
|
||||
|
||||
var (
|
||||
hPageSizes []string
|
||||
initHPSOnce sync.Once
|
||||
)
|
||||
|
||||
// The following idea and implementation is taken pretty much line for line from
|
||||
// runc. Because the hugetlb files are well known, and the only variable thrown in
|
||||
// the mix is what huge page sizes you have on your host, this lends itself well
|
||||
// to doing the work to find the files present once, and then re-using this. This
|
||||
// saves a os.Readdirnames(0) call to search for hugeltb files on every `manager.Stat`
|
||||
// call.
|
||||
// https://github.com/opencontainers/runc/blob/3a2c0c2565644d8a7e0f1dd594a060b21fa96cf1/libcontainer/cgroups/utils.go#L301
|
||||
func hugePageSizes() []string {
|
||||
initHPSOnce.Do(func() {
|
||||
dir, err := os.OpenFile("/sys/kernel/mm/hugepages", unix.O_DIRECTORY|unix.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
files, err := dir.Readdirnames(0)
|
||||
dir.Close()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
hPageSizes, err = getHugePageSizeFromFilenames(files)
|
||||
if err != nil {
|
||||
logrus.Warnf("hugePageSizes: %s", err)
|
||||
}
|
||||
})
|
||||
|
||||
return hPageSizes
|
||||
}
|
||||
|
||||
func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) {
|
||||
pageSizes := make([]string, 0, len(fileNames))
|
||||
var warn error
|
||||
|
||||
for _, file := range fileNames {
|
||||
// example: hugepages-1048576kB
|
||||
val := strings.TrimPrefix(file, "hugepages-")
|
||||
if len(val) == len(file) {
|
||||
// Unexpected file name: no prefix found, ignore it.
|
||||
continue
|
||||
}
|
||||
// In all known versions of Linux up to 6.3 the suffix is always
|
||||
// "kB". If we find something else, produce an error but keep going.
|
||||
eLen := len(val) - 2
|
||||
val = strings.TrimSuffix(val, "kB")
|
||||
if len(val) != eLen {
|
||||
// Highly unlikely.
|
||||
if warn == nil {
|
||||
warn = errors.New(file + `: invalid suffix (expected "kB")`)
|
||||
}
|
||||
continue
|
||||
}
|
||||
size, err := strconv.Atoi(val)
|
||||
if err != nil {
|
||||
// Highly unlikely.
|
||||
if warn == nil {
|
||||
warn = fmt.Errorf("%s: %w", file, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
// Model after https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/mm/hugetlb_cgroup.c?id=eff48ddeab782e35e58ccc8853f7386bbae9dec4#n574
|
||||
// but in our case the size is in KB already.
|
||||
if size >= (1 << 20) {
|
||||
val = strconv.Itoa(size>>20) + "GB"
|
||||
} else if size >= (1 << 10) {
|
||||
val = strconv.Itoa(size>>10) + "MB"
|
||||
} else {
|
||||
val += "KB"
|
||||
}
|
||||
pageSizes = append(pageSizes, val)
|
||||
}
|
||||
|
||||
return pageSizes, warn
|
||||
}
|
||||
|
||||
func getStatPSIFromFile(path string) *stats.PSIStats {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
psistats := &stats.PSIStats{}
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
parts := strings.Fields(sc.Text())
|
||||
var pv *stats.PSIData
|
||||
switch parts[0] {
|
||||
case "some":
|
||||
psistats.Some = &stats.PSIData{}
|
||||
pv = psistats.Some
|
||||
case "full":
|
||||
psistats.Full = &stats.PSIData{}
|
||||
pv = psistats.Full
|
||||
}
|
||||
if pv != nil {
|
||||
err = parsePSIData(parts[1:], pv)
|
||||
if err != nil {
|
||||
logrus.Errorf("failed to read file %s: %v", path, err)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err := sc.Err(); err != nil {
|
||||
logrus.Errorf("unable to parse PSI data: %v", err)
|
||||
return nil
|
||||
}
|
||||
return psistats
|
||||
}
|
||||
|
||||
func parsePSIData(psi []string, data *stats.PSIData) error {
|
||||
for _, f := range psi {
|
||||
kv := strings.SplitN(f, "=", 2)
|
||||
if len(kv) != 2 {
|
||||
return fmt.Errorf("invalid PSI data: %q", f)
|
||||
}
|
||||
var pv *float64
|
||||
switch kv[0] {
|
||||
case "avg10":
|
||||
pv = &data.Avg10
|
||||
case "avg60":
|
||||
pv = &data.Avg60
|
||||
case "avg300":
|
||||
pv = &data.Avg300
|
||||
case "total":
|
||||
v, err := strconv.ParseUint(kv[1], 10, 64)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid %s PSI value: %w", kv[0], err)
|
||||
}
|
||||
data.Total = v
|
||||
}
|
||||
if pv != nil {
|
||||
v, err := strconv.ParseFloat(kv[1], 64)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid %s PSI value: %w", kv[0], err)
|
||||
}
|
||||
*pv = v
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getSubreaper() (int, error) {
|
||||
var i uintptr
|
||||
if err := unix.Prctl(unix.PR_GET_CHILD_SUBREAPER, uintptr(unsafe.Pointer(&i)), 0, 0, 0); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
return int(i), nil
|
||||
}
|
Loading…
Reference in New Issue