Compare commits

...

1 Commits

Author SHA1 Message Date
Tonghao Zhang 334c032be0 HUATUO: Initial Commit
Have a Good Journey, :)

Signed-off-by: Tonghao Zhang <tonghao@bamaicloud.com>
2025-06-06 00:06:01 -04:00
6604 changed files with 2285208 additions and 1 deletions

5
.gitignore vendored Normal file
View File

@ -0,0 +1,5 @@
*.o
bpf/include/vmlinux.h
_output/
cmd/**/bin/

128
.golangci.yaml Normal file
View File

@ -0,0 +1,128 @@
---
linters:
disable-all: true
enable:
- goimports
- gosimple
- ineffassign # Detects when assignments to existing variables are not used
- unconvert # Remove unnecessary type conversions
- exportloopref # Checks for pointers to enclosing loop variables
- tenv # Detects using os.Setenv instead of t.Setenv since Go 1.17
- dupword # Checks for duplicate words in the source code
- gofmt # Gofmt checks whether code was gofmt-ed
- bodyclose # checks whether HTTP response body is closed successfully
- misspell
- staticcheck
- typecheck
- unused
- loggercheck
- nakedret
- gofumpt
- musttag
- whitespace
- dupword
- gocritic
- usestdlibvars
- gosec
- govet
- nolintlint
- unused
- errcheck
- errname
- errorlint
- fatcontext
- gocheckcompilerdirectives
- inamedparam
# Could be enabled later:
# - gocyclo
# - prealloc
# - maligned
linters-settings:
unused:
# Mark all struct fields that have been written to as used.
# Default: true
field-writes-are-uses: false
# Mark all local variables as used.
# default: true
local-variables-are-used: false
misspell:
# Correct spellings using locale preferences for US or UK.
# Setting locale to US will correct the British spelling of 'colour' to 'color'.
# Default is to use a neutral variety of English.
locale: US
gofumpt:
# Choose whether to use the extra rules.
# Default: false
extra-rules: true
# Module path which contains the source code being formatted.
module-path: huatuo-bamai
gocritic:
enabled-tags:
- diagnostic
- style
- performance
- experimental
- opinionated
disabled-checks:
- commentedOutCode
- deferInLoop
- evalOrder
- exitAfterDefer
- exposedSyncMutex
- ifElseChain
- importShadow
- sloppyReassign
- unnamedResult
- whyNoLint
- filepathJoin
nolintlint:
allow-unused: true
gosec:
# https://github.com/securego/gosec#available-rules
#
# The following issues surfaced when `gosec` linter
# was enabled.
# Disable G115:
# "G115: integer overflow conversion int8 -> uint64 (gosec)"
excludes:
- G107
- G115
- G204
- G401
- G501
exclude-dirs:
- pkg/tracing
- vendor
issues:
# List of regexps of issue texts to exclude.
#
# But independently of this option we use default exclude patterns,
# it can be disabled by `exclude-use-default: false`.
# To list all excluded by default patterns execute `golangci-lint run --help`
#
# Default: https://golangci-lint.run/usage/false-positives/#default-exclusions
#
# _xxx as used var.
exclude:
- "^(var|field) `_.*` is unused$"
exclude-rules:
- linters:
- revive
text: "if-return"
- linters:
- revive
text: "empty-block"
- linters:
- revive
text: "superfluous-else"
- linters:
- revive
text: "unused-parameter"
- linters:
- revive
text: "unreachable-code"
- linters:
- revive
text: "redefines-builtin-id"

202
LICENSE Normal file
View File

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "{}"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright {yyyy} Authors of Cilium
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

78
Makefile Normal file
View File

@ -0,0 +1,78 @@
GO ?= go
# the root directory
ROOT_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
# bpf source code files
BPF_DIR := $(ROOT_DIR)/bpf
# used for go generate to compile eBPF
BPF_COMPILE := $(ROOT_DIR)/build/clang.sh
BPF_INCLUDE := "-I$(BPF_DIR)/include"
APP_COMMIT ?= $(shell git describe --dirty --long --always)
APP_BUILD_TIME=$(shell date "+%Y%m%d%H%M%S")
APP_VERSION="1.0"
GO_BUILD_STATIC := CGO_ENABLED=1 $(GO) build -tags "netgo osusergo $(GO_TAGS)" -gcflags=all="-N -l" \
-ldflags "-extldflags -static
GO_BUILD_STATIC_WITH_VERSION := $(GO_BUILD_STATIC) \
-X main.AppVersion=$(APP_VERSION) \
-X main.AppGitCommit=$(APP_COMMIT) \
-X main.AppBuildTime=$(APP_BUILD_TIME)"
# export
export GO_BUILD_STATIC
all: gen-deps gen build tracer
gen-deps:
# maybe need to install libbpf-devel
gen:
@BPF_DIR=$(BPF_DIR) \
BPF_COMPILE=$(BPF_COMPILE) \
BPF_INCLUDE=$(BPF_INCLUDE) \
$(GO) generate -x ./...
build:
$(GO_BUILD_STATIC_WITH_VERSION) -o _output/bin/huatuo-bamai ./cmd/huatuo-bamai
TRACER_DIR := cmd
BIN_DIR := bin
SUBDIRS := $(shell find $(TRACER_DIR) -mindepth 1 -maxdepth 1 -type d -not -path "$(BIN_DIR)" | grep -v 'depend\|huatuo-bamai')
TARGETS := $(patsubst %,$(BIN_DIR)/%,$(notdir $(SUBDIRS)))
COMBINED := $(foreach dir,$(SUBDIRS),$(dir)/$(BIN_DIR)/*.bin)
tracer: $(TARGETS)
$(BIN_DIR)/%: $(TRACER_DIR)/%
cd $< && make
check: imports fmt golangci-lint
imports:
@echo "imports"
@goimports -w -local huatuo-bamai $(shell find . -type f -name '*.go' -not -path "./vendor/*")
fmt: fmt-rewrite-rules
@echo "gofumpt"
gofumpt -l -w $(shell find . -type f -name '*.go' -not -path "./vendor/*")
fmt-rewrite-rules:
@echo "fmt-rewrite-rules"
gofmt -w -r 'interface{} -> any' $(shell find . -type f -name '*.go' -not -path "./vendor/*")
golangci-lint:
@echo "golangci-lint"
golangci-lint run --build-tags=$(GO_TAGS) -v ./... --timeout=5m --config .golangci.yaml
vendor:
$(GO) mod tidy
$(GO) mod verify
$(GO) mod vendor
clean:
rm -rf _output $(shell find . -type f -name "*.o") $(COMBINED)
.PHONY: all gen-deps gen build tracer check imports golint fmt golangci-lint vendor clean

View File

@ -1,2 +1,91 @@
# huatuo-bamai
简体中文 | [English](./README_EN.md)
# 什么是 HUATUO
**HUATUO华佗**是由**滴滴**开源并依托 **CCF 开源发展委员会**孵化的云原生操作系统可观测性项目,专注于为复杂云原生环境提供操作系统内核级深度观测能力。该项目基于 [eBPF](https://docs.kernel.org/userspace-api/ebpf/syscall.html) 技术,通过整合 [kprobe](https://www.kernel.org/doc/html/latest/trace/kprobes.html)、 [tracepoint](https://www.kernel.org/doc/html/latest/trace/tracepoints.html)、 [ftrace](https://www.kernel.org/doc/html/latest/trace/ftrace.html) 等内核动态追踪技术,实现了多维度的内核观测能力:**1.** 更精细化的内核子系统埋点指标 Metric **2.** 异常事件驱动的内核运行时上下文捕获 Events **3.** 针对系统突发毛刺的自动追踪 AutoTracing、AutoProfiling。该项目逐步构建了完整的 Linux 内核深度可观测体系架构。目前HUATUO 已在滴滴生产环境中实现规模化部署,在诸多故障场景中发挥关键作用,有效保障了云原生操作系统的高可用性和性能优化。通过持续的技术演进,希望 HUATUO 能够推动 eBPF 技术在云原生可观测领域向更细粒度、更低开销、更高时效性的方向发展。
# 核心特性
- **低损耗内核全景观测**:基于 BPF 技术保持性能损耗小于1%的基准水位实现对内存管理、CPU 调度、网络及块 IO 子系统等核心模块的精细化、全维度、全景观测与性能剖析。通过自适应采样机制,实现系统资源损耗与观测精度的动态平衡。
- **异常事件驱动诊断**:构建基于异常事件驱动的运行时上下文捕获机制,聚焦内核异常与慢速路径的精准埋点。当发生缺页异常、调度延迟、锁竞争等关键事件时,自动触发调用链追踪,生成包含寄存器状态、堆栈轨迹及资源占用的图谱诊断信息。
- **全自动化追踪 AutoTracing**AutoTracing 模块采用启发式追踪算法,解决云原生复杂场景下的典型性能毛刺故障。针对 CPU idle 掉底CPU sys 突增IO 突增loadavg 突增等棘手问题,实现自动化快照留存机制和根因分析。
- **持续性能剖析 Profiling**:持续对操作系统内核,应用程序进行全方位性能剖析,涉及系统 CPU、内存、I/O、 锁、以及各种解释性编程语言,力助业务持续的优化迭代更新。该功能在哨兵压测,防火演练,重要节假日护堤等场景发挥关键作用。
- **分布式链路追踪 Tracing**:以网络为中心的面向服务请求的分布式链路追踪,能够清晰的划分系统调用层级关系,节点关联关系,耗时记账等,支持在大规模分布式系统中的跨节点追踪,提供微服务调用的全景视图,保障系统在复杂场景下的稳定性。
- **开源技术生态融合**:无缝对接主流开源可观测技术栈,如 Prometheus、Grafana、Pyroscope、Elasticsearch等。支持独立物理机和云原生部署自动感知 K8S 容器资源/标签/注解,自动关联操作系统内核事件指标,消除数据孤岛。通过零侵扰、内核可编程方式兼容主流硬件平台和内核版本,确保其适应性、应用性。
# 快速上手
为用户开发者快速体验 HUATUO 我们提供容器编译镜像的便捷方式,一键运行 docker compose 即可启动。该命令会启动 elasticsearch, prometheus, grafana 以及编译的 huatuo-bamai 组件。上述命令执行成功后,打开浏览器访问 http://localhost:3000 即可浏览监控大盘。
```bash
$ docker compose --project-directory ./build/docker up
```
# 软件架构
![](./docs/huatuo-arch.svg)
# 功能列表
## Autotracing
| 追踪名称 | 核心功能 | 场景 |
| ---------------| --------------------- |-------------------------------------- |
| cpu sys | 宿主 sys 增高检测 | 由于系统负载异常导致业务毛刺问题 |
| cpu idle | 容器 cpu idle 掉底检测,提供调用栈,火焰图,进程上下文信息等 | 容器 cpu 使用异常,帮助业务判断进程热点是否异常 |
| dload | 跟踪 D 状态进程提供容器运行情况、D 状态进程调用栈信息等 | 由于系统 D 或 R 状态进程数量突增导致负载升高的问题。系统 D 状态突增通常和资源不可用或者锁被长期持有相关R 状态进程数量突增往往是业务代码设计不合理导致 |
| waitrate | 容器 cpu 外部争抢检测,提供发生争抢时的容器信息等 | 容器 cpu 争抢可能会引起业务毛刺,已存在争抢指标缺乏具体争抢容器信息,通过 waitrate 追踪可以获取参与争抢的容器信息,给混部资源隔离提供参考 |
| memburst | 记录突发内存分配上下文 | 宿主机短时间内大量分配内存时,检测宿主机上短时间内大量分配内存的事件,突发性内存分配可能引发直接回收或者 oom 等 |
| iotracer | 检测宿主磁盘满、IO 延迟异常时,输出异常时 IO 访问的文件名和路径、磁盘设备、inode 号、容器等上下文信息 | 频繁出现磁盘 IO 带宽打满、磁盘访问突增,进而导致应用请求延迟或者系统性能抖动 |
## Events
| 事件名称 | 核心功能 | 场景 |
| ---------------| --------------------- |----------------------------------------|
| softirq | 宿主软中断延迟响应或长期关闭,输出长时间关闭软中断的调用栈,进程信息等 | 该类问题会严重影响网络收发,进而导致业务毛刺或者超时等其他问题 |
| dropwatch | TCP 数据包丢包检测,输出发生丢包时主机、网络上下文信息等 | 该类问题主要会引起业务毛刺和延迟 |
| netrecvlat | 在网络收方向获取数据包从驱动、协议栈、到用户主动收过程的延迟事件 | 网络延迟问题中有一类是数据传输阶段收方向存在延迟但不清楚是延迟位置netrecvlat case 根据 skb 入网卡时间戳依次在驱动、协议栈和用户 copy 数据的路径计算延迟,通过预先设定的阈值过滤超时的数据包,已定位延迟位置 |
| oom | 检测宿主或容器内 oom 事件 | 当宿主机层面或者容器维度发生 oom 事件时,能够获取触发 oom 的进程信息、被 kill 的进程信息以及容器信息,便于定位进程内存泄漏、异常退出等问题 |
| softlockup | 当系统上发生 softlockup 时,收集目标进程信息以及 cpu 信息,同时获取各个 cpu 上的内核栈信息 | 系统发生 softlockup |
| hungtask | 提供系统内所有 D 状态进程数量、内核栈信息 | 用于定位瞬时出现 D 进程的场景,能及时保留现场便于后期问题跟踪 |
| memreclaim | 进程进入直接回收的耗时,超过时间阈值,记录进程信息 | 内存压力过大时,如果此时进程申请内存,有可能进入直接回收,此时处于同步回收阶段,可能会造成业务进程的卡顿,此时记录进程进入直接回收的时间,有助于我们判断此进程被直接回收影响的剧烈程度 |
## Metrics
metrics 采集包括各子系统的众多指标,包括 cpu, memory, io, network 等metrics 主要来源 procfs, eBPF, 计算聚合等,以下为部分 Metrics 的简介。[详细参考](docs/metrics.md)
| 子系统 | Metric | 描述 | 维度 |
| ----------- | --------------- | ----------------------------------- | ------------------ |
| cpu | sys, usr, util | cpu 占用百分比 | 宿主、容器 |
| cpu | burst, throttled | cpu burst 时长, throttled/limited 的次数 | 容器 |
| cpu | inner, exter_wait_rate | 容器内外部争抢指数 | 容器 |
| cpu | nr_running, nr_uninterruptible | 对应状态的任务数 | 容器 |
| cpu | load 1, 5, 15 | 宿主的 1、5、15 分钟平滑负载值 | 宿主 |
| cpu | softirq_latency | NET_RX/NET_TX 中断延迟在指定区间内的次数 | 宿主 |
| cpu | runqlat_nlat | 调度延迟在指定时间段内的出现的次数 | 宿主、容器 |
| cpu | reschedipi_oversell_probability | VM 宿主机可能发生 cpu 超卖 | 宿主 |
| memory | direct_reclaim | 内存直接回收相关指标 | 容器 |
| memory | asyncreclaim | 内存异步回收相关指标 | 容器 |
| memory | vmstat, memory_stat | 其他内存状态指标 | 宿主、容器 |
| memory | hungtask, oom, softlockup | 事件计数统计 | 宿主、容器 |
| IO | d2c | 统计 IO 的延迟,只包括驱动和磁盘硬件处理部分 | 宿主、容器 |
| IO | q2c | 统计 IO 的延迟,包括整个 IO 生命周期 | 宿主、容器 |
| IO | disk_freeze | 统计磁盘 freeze 的事件次数 | 宿主 |
| IO | disk_flush | 统计 RAI 设备的 flush 操作延迟 | 宿主、容器 |
| network | arp | ARP 缓存数量 | 系统、宿主、容器 |
| network | tcp, udp mem | Socket 使用、socket 内存使用等 | 系统、宿主、容器 |
| network | qdisc | 网络出向队列状态统计 | 宿主 |
| network | netdev | 设备指标统计 | 宿主、容器 |
| network | netstat | 网络指标统计 | 宿主、容器 |
| network | sockstat | Socket 指标统计 | 宿主、容器 |
# 前端展示
## 机房内核事件总览
![](./docs/huatuo-cluster00.png)
![](./docs/huatuo-cluster01.png)
## AutoProfiling
![](./docs/huatuo-profiling.png)
## Dropwatch
![](./docs/huatuo-dropwatch.png)
## net_rx_latency
![](./docs/huatuo-netlatency.png)
# 联系我们
@[hao022](https://github.com/hao022)
@[nashuiliang](https://github.com/nashuiliang)

98
README_EN.md Normal file
View File

@ -0,0 +1,98 @@
[简体中文](./README_CN.md) | English
# Abstract
**HuaTuo (华佗)** aims to provide in-depth observability for the OS Linux kernel in complex **cloud-native** scenarios. The project is based on [eBPF](https://docs.kernel.org/userspace-api/ebpf/syscall.html) technology and has built a set of deep observation service components for the Linux kernel. By leveraging kernel dynamic tracing technologies such as [kprobe](https://www.kernel.org/doc/html/latest/trace/kprobes.html), [tracepoint](https://www.kernel.org/doc/html/latest/trace/tracepoints.html), and [ftrace](https://www.kernel.org/doc/html/latest/trace/ftrace.html), HuaTuo provides more observation perspectives for the Linux kernel, including kernel runtime context capture driven by anomalous events and more granular, accurate kernel per subsystem metrics.
HuaTuo also integrates core technologies such as automated tracing, profiling, and distributed tracing for system performance spikes. HuaTuo has been successfully applied on a large scale within Didi (DiDi Global Inc.), solidly guaranteeing the stability and performance optimization of cloud-native operating systems and showcasing the distinct advantages of eBPF technology in cloud-native scenarios.
# Key Features
- **Continuous** Kernel Observability: Achieves in-depth, low-overhead (less than 1% performance impact) instrumentation of various kernel subsystems, providing comprehensive metrics on memory, CPU scheduling, network stack, and disk I/O.
- Kernel **Anomaly-Driven** Observability: Instruments the kernel's exception paths and slow paths to capture rich runtime context triggered by anomalous events, enabling more insightful observability data.
- **Automated** Tracing (AutoTracing): Implements automated tracing capabilities to address system resource spikes and performance jitters (e.g., CPU idle drop, raising CPU sys utilization, I/O bursts, and Loadavg raising).
- **Smooth Transition** to Popular Observability Stacks: Provides standard data sources for Prometheus and Pyroscope, integrates with Kubernetes container resources, and automatically correlates Kubernetes labels/annotations with kernel event metrics, eliminating data silos, ensuring seamless integration and analysis across various data sources for comprehensive system monitoring.
# Getting Started
## run
HuaTuo provides a convenient way for quick getting started, all in one command as below:
```bash
$ docker compose --project-directory ./build/docker up
```
Run it in the project root directory, then open [http://localhost:3000](http://localhost:3000) to view the panels on your browser.
The upper command starts three dependencies containers: [elasticsearch](https://www.elastic.co), [prometheus](https://prometheus.io), [grafana](https://grafana.com), then compiles and starts huatuo-bamai.
- Data related to event-driven operations, such as Autotracing and Events, are stored in elasticsearch
- Metrics-related data is actively collected and stored by prometheus
- elasticsearch data reporting port: 9200
- prometheus data source port: 9090
- grafana port: 3000
## User-Defined Collection
The built-in modules cover most monitoring needs. Additionally, HuaTuo supports custom data collection with easy integration. [How to Add Custom Collection](./docs/CUSTOM.md)
# Architectures
![](./docs/huatuo-arch.svg)
# Observability Overview
## Exception Totals
![](./docs/huatuo-cluster00.png)
![](./docs/huatuo-cluster01.png)
## Profiling
![](./docs/huatuo-profiling.png)
## SKB dropwatch
![](./docs/huatuo-dropwatch.png)
## Net Latency
![](./docs/huatuo-netlatency.png)
# Functionality Overview
## Autotracing
| Tracing Name | Core Functionality | Scenarios |
| ------------ | ----------------------- | ------------------------------------ |
| cpu sys | Detects rising host cpu.sys utilization | Issues caused by abnormal cpu.sys load leading to jitters |
| cpu idle | Detects low CPU idle in containers, provides call stack, flame graphs, process context info, etc. | Abnormal container CPU usage, helps identify process hotspots |
| dload | Tracks processes in the D (uninterruptible) state, provides container runtime info, D-state process call stack, etc. | Issues caused by a sudden increase in the number of system D or R (runnable) state processes, leading to higher load. A spike in D-state processes is often related to unavailable resources or long-held locks, while R-state process spikes may indicate unreasonable user logic design |
| waitrate | Detects CPU contention in containers, provides information about the contending containers | CPU contention in containers can cause jitters, and the existing contention metrics lack specific container info. Waitrate tracking can provide the info about the containers involved in the contention, which can be used as a reference for resource isolation in hybrid deployment scenarios |
| mmburst | Records burst memory allocation context | Detects events where the host allocates a large amount of memory in a short time, which can lead to direct reclaim or OOM |
| iotracer | When the host disk is full or I/O latency is abnormal, provides the file name, path, device, inode, and container context info for the abnormal I/O access | Frequent disk I/O bandwidth saturation or sudden I/O spikes can lead to application request latency or system performance jitters |
## Events
| Event Name | Core Functionality | Scenarios |
| -------------- | --------------------- | ------------------------------------ |
| softirq | When the kernel delayed response in soft interrupts or prolonged shutdown, supports the call stack and process information of the soft interrupts that have been shut down for an extended period of time. | This type of issue can severely impact network receive/transmit, leading to jitters or latency |
| dropwatch | Detects TCP packet drops, provides host and network context info when drops occur | This type of issue can cause jitters and latency |
| netrecvlat | Captures latency events along the data packet receive path from the driver, TCP/IP stack, to user-level | For network latency issues, there is a class where the receive-side exhibits latency, but the location is unclear. The netrecvlat case calculates latency by timestamping the skb at the interface, driver, TCP/IP stack, and user-level copy, and filters timed-out packets to point the latency location |
| oom | Detects OOM events in the host or containers | When OOM events occur at the host or container level, it can obtain information about the triggering process, the killed process, and container details, which is helpful for diagnosing process memory leaks, abnormal exits, etc. |
| softlockup | When the system encounters a softlockup, it collects information about the target process, CPU, and kernel stack for per CPU | Used for investigating system softlockup incidents |
| hungtask | Provides the number of processes in the D (uninterruptible) state and their kernel stack info | Used to identify and save the context of processes that suddenly enter the D state, for later investigation |
| memreclaim | Records the latency when a process enters direct reclaim, if it exceeds a time threshold | When under memory pressure, if a process requests memory, it may enter direct reclaim, a synchronous reclaim phase that can cause process jitters. This records the time a process spends in direct reclaim, helping assess the impact on the affected process |
## Metrics
Metrics collection involves various indicators from per subsystem, including CPU, memory, IO, network, etc. The primary sources of these metrics are procfs, eBPF, and computational aggregation, as follows is a summary. [for details](docs/metrics.md)
| Subsystem | Metric | Description | Dimension |
| ------------| --------------- |------------------------------------ | ----------------------- |
| cpu | sys, usr, util | Percentage | host, container |
| cpu | burst, throttled | Number of periods burst occurs, times the group has been throttled/limited | container |
| cpu | inner, exter_wait_rate | Wait rate caused by processes inside/outside the container | container |
| cpu | nr_running, nr_uninterruptible | The number of running/uninterruptible tasks in the container | container |
| cpu | load 1, 5, 15 | System load avg over the last x minute | container |
| cpu | softirq_latency | The number of NET_RX/NET_TX irq latency happened | host |
| cpu | runqlat_nlat | The number of times when schedule latency of processes in host/container is within x~xms | host, container |
| cpu | reschedipi_oversell_probability | The possibility of cpu overselling exists on the host where the vm is located | host |
| memory | direct_reclaim | Time speed in page allocation in memory cgroup | container |
| memory | asyncreclaim | Memory cgroup's direct reclaim time in cgroup async memory reclaim | container |
| memory | vmstat, memory_stat | Memory statistics | host, container |
| memory | hungtask, oom, softlockup | Count of event happened | host, container |
| IO | d2c | Statistics of io latency when accessing the disk, including the time consumed by the driver and hardware components | host, container |
| IO | q2c | Statistics of io latency for the entire io lifecycle when accessing the disk | host, container |
| IO | disk_freeze | Statistics of disk freeze events | host |
| IO | disk_flush | Statistics of delay for flush operations on disk raid device | host, container |
| network | arp | ARP entries | system, host, container |
| network | tcp, udp mem | Socket memory | system |
| network | qdisc | Qdisc statistics | host |
| network | netdev | Network device metrics | host, container |
| network | netstat | Network statistics | host, container |
| network | sockstat | Socket statistics | host, container |
# Contact Us
You can report bugs, provide suggestions, or engage in discussions via Github Issues and Github Discussions. Alternatively, you can contact us using the following ways:

66
bpf/cgroup_css_events.c Normal file
View File

@ -0,0 +1,66 @@
#include "vmlinux.h"
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_common.h"
#define CGROUP_KNODE_NAME_MAXLEN 64
struct cgroup_perf_event_t {
u64 cgroup;
u64 ops_type;
s32 cgroup_root;
s32 cgroup_level;
u64 css[CGROUP_SUBSYS_COUNT];
char knode_name[CGROUP_KNODE_NAME_MAXLEN + 2];
};
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(int));
__uint(value_size, sizeof(u32));
} cgroup_perf_events SEC(".maps");
char __license[] SEC("license") = "GPL";
/* TP_PROTO(struct cgroup *cgrp, const char *path) */
static int
bpf_cgroup_event_class_prog(struct bpf_raw_tracepoint_args *ctx, u64 type)
{
struct cgroup *cgrp = (void *)ctx->args[0];
struct cgroup_perf_event_t data = {};
int knode_len;
/* knode name */
knode_len =
bpf_probe_read_str(&data.knode_name, sizeof(data.knode_name),
BPF_CORE_READ(cgrp, kn, name));
if (knode_len != CGROUP_KNODE_NAME_MAXLEN + 1)
return 0;
data.ops_type = type;
data.cgroup = (u64)cgrp;
data.cgroup_root = BPF_CORE_READ(cgrp, root, hierarchy_id);
data.cgroup_level = BPF_CORE_READ(cgrp, level);
bpf_probe_read(&data.css, sizeof(u64) * CGROUP_SUBSYS_COUNT,
BPF_CORE_READ(cgrp, subsys));
bpf_perf_event_output(ctx, &cgroup_perf_events, BPF_F_CURRENT_CPU,
&data, sizeof(data));
return 0;
}
SEC("raw_tracepoint/cgroup_mkdir")
int bpf_cgroup_mkdir_prog(struct bpf_raw_tracepoint_args *ctx)
{
return bpf_cgroup_event_class_prog(ctx, 0);
}
SEC("raw_tracepoint/cgroup_rmdir")
int bpf_cgroup_rmdir_prog(struct bpf_raw_tracepoint_args *ctx)
{
return bpf_cgroup_event_class_prog(ctx, 1);
}

56
bpf/cgroup_css_gather.c Normal file
View File

@ -0,0 +1,56 @@
#include "vmlinux.h"
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_common.h"
#define CGROUP_KNODE_NAME_MAXLEN 64
struct cgroup_perf_event_t {
u64 cgroup;
u64 ops_type;
s32 cgroup_root;
s32 cgroup_level;
u64 css[CGROUP_SUBSYS_COUNT];
char knode_name[CGROUP_KNODE_NAME_MAXLEN + 2];
};
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(int));
__uint(value_size, sizeof(u32));
} cgroup_perf_events SEC(".maps");
char __license[] SEC("license") = "GPL";
SEC("kprobe/cgroup_clone_children_read")
int bpf_cgroup_clone_children_read_prog(struct pt_regs *ctx)
{
struct cgroup_subsys_state *css = (void *)PT_REGS_PARM1(ctx);
struct cgroup *cgrp = BPF_CORE_READ(css, cgroup);
struct cgroup_perf_event_t data = {};
int knode_len;
/* knode name */
knode_len =
bpf_probe_read_str(&data.knode_name, sizeof(data.knode_name),
BPF_CORE_READ(cgrp, kn, name));
if (knode_len != CGROUP_KNODE_NAME_MAXLEN + 1)
return 0;
data.cgroup = (u64)cgrp;
data.ops_type = 0;
data.cgroup_root = BPF_CORE_READ(cgrp, root, hierarchy_id);
data.cgroup_level = BPF_CORE_READ(cgrp, level);
/* css */
bpf_probe_read(&data.css, sizeof(u64) * CGROUP_SUBSYS_COUNT,
BPF_CORE_READ(cgrp, subsys));
/* output */
bpf_perf_event_output(ctx, &cgroup_perf_events, BPF_F_CURRENT_CPU,
&data, sizeof(data));
return 0;
}

287
bpf/dropwatch.c Normal file
View File

@ -0,0 +1,287 @@
#include "vmlinux.h"
#include "vmlinux_net.h"
#include "bpf_common.h"
#include "bpf_ratelimit.h"
#define TYPE_TCP_COMMON_DROP 1
#define TYPE_TCP_SYN_FLOOD 2
#define TYPE_TCP_LISTEN_OVERFLOW_HANDSHAKE1 3
#define TYPE_TCP_LISTEN_OVERFLOW_HANDSHAKE3 4
#define SK_FL_PROTO_SHIFT 8
#define SK_FL_PROTO_MASK 0x0000ff00
#define SK_FL_TYPE_SHIFT 16
#define SK_FL_TYPE_MASK 0xffff0000
struct perf_event_t {
u64 tgid_pid;
u32 saddr;
u32 daddr;
u16 sport;
u16 dport;
u32 seq;
u32 ack_seq;
u32 queue_mapping;
u64 pkt_len;
s64 stack_size;
u64 stack[PERF_MAX_STACK_DEPTH];
u32 sk_max_ack_backlog;
u8 state;
u8 type;
char comm[TASK_COMM_LEN];
};
/* format: /sys/kernel/debug/tracing/events/skb/kfree_skb/format */
struct kfree_skb_args {
unsigned long long pad;
void *skbaddr;
void *location;
u16 protocol;
};
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(int));
__uint(value_size, sizeof(u32));
} perf_events SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(max_entries, 1);
__uint(key_size, sizeof(u32));
__uint(value_size, sizeof(struct perf_event_t));
} dropwatch_stackmap SEC(".maps");
char __license[] SEC("license") = "Dual MIT/GPL";
static const struct perf_event_t zero_data = {};
static const u32 stackmap_key = 0;
BPF_RATELIMIT(rate, 1, 100); // 100/s
struct sock___5_10 {
u16 sk_type;
u16 sk_protocol;
}__attribute__((preserve_access_index));
static void sk_get_type_and_protocol(struct sock *sk, u16 *protocol, u16 *type)
{
// kernel version <= 4.18
//
// struct sock {
// unsigned int __sk_flags_offset[0];
// #ifdef __BIG_ENDIAN_BITFIELD
// #define SK_FL_PROTO_SHIFT 16
// #define SK_FL_PROTO_MASK 0x00ff0000
// #
// #define SK_FL_TYPE_SHIFT 0
// #define SK_FL_TYPE_MASK 0x0000ffff
// #else
// #define SK_FL_PROTO_SHIFT 8
// #define SK_FL_PROTO_MASK 0x0000ff00
// #
// #define SK_FL_TYPE_SHIFT 16
// #define SK_FL_TYPE_MASK 0xffff0000
// #endif
//
// unsigned int sk_padding : 1,
// sk_kern_sock : 1,
// sk_no_check_tx : 1,
// sk_no_check_rx : 1,
// sk_userlocks : 4,
// sk_protocol : 8,
// sk_type : 16;
// }
if (bpf_core_field_exists(sk->__sk_flags_offset)) {
u32 sk_flags;
bpf_probe_read(&sk_flags, sizeof(sk_flags), &sk->__sk_flags_offset);
*protocol = sk_flags >> SK_FL_PROTO_SHIFT;
*type = sk_flags >> SK_FL_TYPE_SHIFT;
return;
}
// kernel version >= 5.10
//
// struct sock {
// u16 sk_type;
// u16 sk_protocol;
// }
struct sock___5_10 *sk_new = (struct sock___5_10 *)sk;
*protocol = BPF_CORE_READ(sk_new, sk_protocol);
*type = BPF_CORE_READ(sk_new, sk_type);
return;
}
SEC("tracepoint/skb/kfree_skb")
int bpf_kfree_skb_prog(struct kfree_skb_args *ctx)
{
struct sk_buff *skb = ctx->skbaddr;
struct perf_event_t *data = NULL;
struct sock_common *sk_common;
struct tcphdr tcphdr;
struct iphdr iphdr;
struct sock *sk;
u16 protocol = 0;
u16 type = 0;
u8 state = 0;
/* only for IP && TCP */
if (ctx->protocol != ETH_P_IP)
return 0;
bpf_probe_read(&iphdr, sizeof(iphdr), skb_network_header(skb));
if (iphdr.protocol != IPPROTO_TCP)
return 0;
sk = BPF_CORE_READ(skb, sk);
if (!sk)
return 0;
sk_common = (struct sock_common *)sk;
// filter the sock by AF_INET, SOCK_STREAM, IPPROTO_TCP
if (BPF_CORE_READ(sk_common, skc_family) != AF_INET)
return 0;
sk_get_type_and_protocol(sk, &protocol, &type);
if ((u8)protocol != IPPROTO_TCP || type != SOCK_STREAM)
return 0;
// filter not CLOSE
state = BPF_CORE_READ(sk_common, skc_state);
if (state == TCP_CLOSE || state == 0)
return 0;
// ratelimit
if (bpf_ratelimited(&rate))
return 0;
data = bpf_map_lookup_elem(&dropwatch_stackmap, &stackmap_key);
if (!data) {
return 0;
}
bpf_probe_read(&tcphdr, sizeof(tcphdr), skb_transport_header(skb));
/* event */
data->tgid_pid = bpf_get_current_pid_tgid();
bpf_get_current_comm(&data->comm, sizeof(data->comm));
data->type = TYPE_TCP_COMMON_DROP;
data->state = state;
data->saddr = iphdr.saddr;
data->daddr = iphdr.daddr;
data->sport = tcphdr.source;
data->dport = tcphdr.dest;
data->seq = tcphdr.seq;
data->ack_seq = tcphdr.ack_seq;
data->pkt_len = BPF_CORE_READ(skb, len);
data->queue_mapping = BPF_CORE_READ(skb, queue_mapping);
data->stack_size = bpf_get_stack(ctx, data->stack, sizeof(data->stack), 0);
data->sk_max_ack_backlog = 0; // ignore sk_max_ack_backlog in dropwatch case.
// output
bpf_perf_event_output(ctx, &perf_events, BPF_F_CURRENT_CPU, data, sizeof(*data));
// clean
bpf_map_update_elem(&dropwatch_stackmap, &stackmap_key, &zero_data, BPF_EXIST);
return 0;
}
// The current kernel does not support kprobe+offset very well, waiting for kpatch to come online.
#if 0
static int fill_overflow_event(void *ctx, u8 type, struct sock *sk, struct sk_buff *skb)
{
struct perf_event_t *data = NULL;
struct iphdr iphdr;
struct tcphdr tcphdr;
data = bpf_map_lookup_elem(&dropwatch_stackmap, &stackmap_key);
if (!data) {
return 0;
}
bpf_probe_read(&iphdr, sizeof(iphdr), skb_network_header(skb));
bpf_probe_read(&tcphdr, sizeof(tcphdr), skb_transport_header(skb));
/* event */
data->tgid_pid = bpf_get_current_pid_tgid();
bpf_get_current_comm(&data->comm, sizeof(data->comm));
data->type = type;
data->state = 0;
data->saddr = iphdr.saddr;
data->daddr = iphdr.daddr;
data->sport = tcphdr.source;
data->dport = tcphdr.dest;
data->seq = tcphdr.seq;
data->ack_seq = tcphdr.ack_seq;
data->pkt_len = BPF_CORE_READ(skb, len);
data->queue_mapping = BPF_CORE_READ(skb, queue_mapping);
data->stack_size = 0; // ignore stack in not-overflow.
data->sk_max_ack_backlog = BPF_CORE_READ(sk, sk_max_ack_backlog);
// output
bpf_perf_event_output(ctx, &perf_events, BPF_F_CURRENT_CPU, data, sizeof(*data));
// clean
bpf_map_update_elem(&dropwatch_stackmap, &stackmap_key, &zero_data, BPF_EXIST);
return 0;
}
// the dropwatch case: syn_flood.
SEC("kprobe/tcp_conn_request+1290")
int bpf_tcp_syn_flood_action_prog(struct pt_regs *ctx)
{
// the function of `tcp_syn_flood_action` arguments:
// %r15: struct sock *sk
// %r13: struct sk_buff *skb
struct sock *sk = (void *)ctx->r15;
struct sk_buff *skb= (void *)ctx->r13;
// ratelimit
if (bpf_ratelimited(ctx, rate))
return 0;
// fill
return fill_overflow_event(ctx, TYPE_TCP_SYN_FLOOD, sk, skb);
}
// the dropwatch case: listen-overflow in the TCP_CLOSE state(client: TCP_SYN_SENT).
SEC("kprobe/tcp_conn_request+167")
int bpf_tcp_listen_overflow_handshake1_prog(struct pt_regs *ctx)
{
// this position has registers as follows:
// %r15: struct sock *sk
// %r13: struct sk_buff *skb
struct sock *sk = (void *)ctx->r15;
struct sk_buff *skb= (void *)ctx->r13;
// ratelimit
if (bpf_ratelimited(ctx, rate))
return 0;
// fill
return fill_overflow_event(ctx, TYPE_TCP_LISTEN_OVERFLOW_HANDSHAKE1, sk, skb);
}
// the dropwatch case: listen-overflow in the TCP_NEW_SYN_RECV state(client: TCP_ESTABLISHED).
SEC("kprobe/tcp_v4_syn_recv_sock+700")
int bpf_tcp_listen_overflow_handshake3_prog(struct pt_regs *ctx)
{
// this position has registers as follows:
// %rdi: struct sock *sk
// %rsi: struct sk_buff *skb
// %r15: struct request_sock *req
struct sock *sk = (void *)ctx->di;
struct sk_buff *skb= (void *)ctx->si;
// ratelimit
if (bpf_ratelimited(ctx, rate))
return 0;
// fill
return fill_overflow_event(ctx, TYPE_TCP_LISTEN_OVERFLOW_HANDSHAKE3, sk, skb);
}
#endif

43
bpf/hungtask.c Normal file
View File

@ -0,0 +1,43 @@
#include "vmlinux.h"
#include "bpf_common.h"
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
#include "bpf_ratelimit.h"
char __license[] SEC("license") = "Dual MIT/GPL";
#define CPU_NUM 128
BPF_RATELIMIT_IN_MAP(rate, 1, CPU_NUM * 10000, 0);
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(int));
__uint(value_size, sizeof(u32));
} hungtask_perf_events SEC(".maps");
struct hungtask_info {
int32_t pid;
char comm[TASK_COMM_LEN];
};
struct tracepoint_args {
unsigned long pad;
char comm[TASK_COMM_LEN];
int pid;
};
SEC("tracepoint/sched/sched_process_hang")
int tracepoint_sched_process_hang(struct tracepoint_args *ctx)
{
struct hungtask_info info = {};
if (bpf_ratelimited_in_map(ctx, rate))
return 0;
info.pid = ctx->pid;
// custom defined struct can't use BPF_CORE_READ_STR_INTO()
bpf_probe_read_str(&info.comm, TASK_COMM_LEN, ctx->comm);
bpf_perf_event_output(ctx, &hungtask_perf_events, BPF_F_CURRENT_CPU, &info, sizeof(info));
return 0;
}

42
bpf/include/bpf_common.h Normal file
View File

@ -0,0 +1,42 @@
#ifndef __BPF_COMMON_H__
#define __BPF_COMMON_H__
#ifndef NULL
#define NULL ((void *)0)
#endif
/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
* BPF_FUNC_perf_event_read_value flags.
*/
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
/* BPF_FUNC_perf_event_output for sk_buff input context. */
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
enum {
BPF_F_SKIP_FIELD_MASK = 0xffULL,
BPF_F_USER_STACK = (1ULL << 8),
/* flags used by BPF_FUNC_get_stackid only. */
BPF_F_FAST_STACK_CMP = (1ULL << 9),
BPF_F_REUSE_STACKID = (1ULL << 10),
/* flags used by BPF_FUNC_get_stack only. */
BPF_F_USER_BUILD_ID = (1ULL << 11),
};
#define TASK_COMM_LEN 16
#define PATH_MAX 4096 /* # chars in a path name including nul */
/* include/uapi/linux/perf_event.h */
#define PERF_MAX_STACK_DEPTH 127
#define PERF_MIN_STACK_DEPTH 16
/* flags for BPF_MAP_UPDATE_ELEM command */
#define BPF_ANY 0 /* create new element or update existing */
#define BPF_NOEXIST 1 /* create new element if it didn't exist */
#define BPF_EXIST 2 /* update existing element */
#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */
#endif /* __BPF_COMMON_H__ */

View File

@ -0,0 +1,48 @@
#ifndef __BPF_FUNC_TRACE_H__
#define __BPF_FUNC_TRACE_H__
#include <bpf/bpf_helpers.h>
struct trace_entry_ctx {
u64 id;
u64 start_ns;
u64 delta_ns;
};
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, u64);
__type(value, struct trace_entry_ctx);
__uint(max_entries, 10240);
} func_trace_map SEC(".maps");
static __always_inline void func_trace_begain(u64 id)
{
struct trace_entry_ctx entry = {
.start_ns = bpf_ktime_get_ns(),
.id = id,
};
bpf_map_update_elem(&func_trace_map, &id, &entry, BPF_ANY);
}
static __always_inline struct trace_entry_ctx *func_trace_end(u64 id)
{
struct trace_entry_ctx *entry;
entry = bpf_map_lookup_elem(&func_trace_map, &id);
if (!entry) {
return NULL;
}
// update any elem you need!
entry->delta_ns = bpf_ktime_get_ns() - entry->start_ns;
return entry;
}
static __always_inline void func_trace_destroy(u64 id)
{
bpf_map_delete_elem(&func_trace_map, &id);
}
#endif

113
bpf/include/bpf_ratelimit.h Normal file
View File

@ -0,0 +1,113 @@
#ifndef __BPF_RATELIMIT_H__
#define __BPF_RATELIMIT_H__
#include <bpf/bpf_helpers.h>
struct bpf_ratelimit {
uint64_t interval; // unit: second
uint64_t begin;
uint64_t burst; // max events/interval
uint64_t max_burst; // max burst
uint64_t events; // current events/interval
uint64_t nmissed; // missed events/interval
uint64_t total_events; // total events
uint64_t total_nmissed; // total missed events
uint64_t total_interval; // total interval
};
#define BPF_RATELIMIT(name, interval, burst) \
struct bpf_ratelimit name = {interval, 0, burst, 0, 0, 0, 0, 0, 0}
// bpf_ratelimited: whether the threshold is exceeded
//
// @rate: struct bpf_ratelimit *
// @return:
// true: the threshold is exceeded
// false: the threshold is not exceeded
static __always_inline bool bpf_ratelimited(struct bpf_ratelimit *rate)
{
// validate
if (rate == NULL || rate->interval == 0)
return false;
u64 curr = bpf_ktime_get_ns() / 1000000000;
if (rate->begin == 0)
rate->begin = curr;
if (curr > rate->begin + rate->interval) {
__sync_fetch_and_add(&rate->total_interval, curr - rate->begin);
rate->begin = curr;
rate->events = rate->nmissed = 0;
}
if (rate->burst && rate->burst > rate->events) {
__sync_fetch_and_add(&rate->events, 1);
__sync_fetch_and_add(&rate->total_events, 1);
return false;
}
__sync_fetch_and_add(&rate->nmissed, 1);
__sync_fetch_and_add(&rate->total_nmissed, 1);
return true;
}
#define BPF_RATELIMIT_IN_MAP(name, interval, burst, max_burst) \
struct { \
__uint(type, BPF_MAP_TYPE_ARRAY); \
__uint(key_size, sizeof(u32)); \
__uint(value_size, sizeof(struct bpf_ratelimit)); \
__uint(max_entries, 1); \
} bpf_rlimit_##name SEC(".maps"); \
struct { \
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); \
__uint(key_size, sizeof(int)); \
__uint(value_size, sizeof(u32)); \
} event_bpf_rlimit_##name SEC(".maps"); \
volatile const struct bpf_ratelimit ___bpf_rlimit_cfg_##name = { \
interval, 0, burst, max_burst, 0, 0, 0, 0, 0}
// bpf_ratelimited_in_map: whether the threshold is exceeded
//
// @rate: struct bpf_ratelimit *
// @return:
// true: the threshold is exceeded
// false: the threshold is not exceeded
#define bpf_ratelimited_in_map(ctx, rate) \
bpf_ratelimited_core_in_map(ctx, &bpf_rlimit_##rate, \
&event_bpf_rlimit_##rate, \
&___bpf_rlimit_cfg_##rate)
static __always_inline bool
bpf_ratelimited_core_in_map(void *ctx, void *map, void *perf_map,
const volatile struct bpf_ratelimit *cfg)
{
u32 key = 0;
struct bpf_ratelimit *rate = NULL;
rate = bpf_map_lookup_elem(map, &key);
if (rate == NULL)
return false;
// init from cfg
if (rate->interval == 0) {
rate->interval = cfg->interval;
rate->burst = cfg->burst;
rate->max_burst = cfg->max_burst;
}
// the threshold is not exceeded, return false
u64 old_nmissed = rate->nmissed;
if (!bpf_ratelimited(rate))
return false;
// the threshold/max_burst is exceeded, notify once in a cycle
if (old_nmissed == 0 || (rate->max_burst > 0 &&
rate->nmissed > rate->max_burst - rate->burst))
bpf_perf_event_output(ctx, perf_map, BPF_F_CURRENT_CPU, rate,
sizeof(struct bpf_ratelimit));
return true;
}
#endif

26
bpf/include/vmlinux_net.h Normal file
View File

@ -0,0 +1,26 @@
#ifndef __VMLINUX_NET_H__
#define __VMLINUX_NET_H__
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
#define IFNAMSIZ 16
#define ETH_P_IP 0x0800 /* Internet Protocol packet */
#define AF_INET 2 /* Internet IP Protocol */
#define IP_MF 0x2000 /* Flag: "More Fragments" */
#define IP_OFFSET 0x1FFF /* "Fragment Offset" part */
// skb_network_header - get the network header from sk_buff
static inline unsigned char *skb_network_header(struct sk_buff *skb)
{
return BPF_CORE_READ(skb, head) + BPF_CORE_READ(skb, network_header);
}
// skb_transport_header - get the transport header from sk_buff
static inline unsigned char *skb_transport_header(struct sk_buff *skb)
{
return BPF_CORE_READ(skb, head) + BPF_CORE_READ(skb, transport_header);
}
#endif

View File

@ -0,0 +1,7 @@
#ifndef __VMLINUX_SCHED_H__
#define __VMLINUX_SCHED_H__
/* copy from include/linux/sched.h */
#define PF_KSWAPD 0x00020000 /* I am kswapd */
#endif

25
bpf/lacp.c Normal file
View File

@ -0,0 +1,25 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include "bpf_common.h"
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(int));
__uint(value_size, sizeof(u32));
} ad_event_map SEC(".maps");
SEC("kprobe/ad_disable_collecting_distributing")
int ad_disable(struct pt_regs *ctx)
{
// nothing to do here, only notify user space, because this is a
// ko module and CO-RE relocation is not supported directly at old
// kernel
u64 nothing = 0;
bpf_perf_event_output(ctx, &ad_event_map, BPF_F_CURRENT_CPU, &nothing,
sizeof(nothing));
return 0;
}
char __license[] SEC("license") = "Dual MIT/GPL";

55
bpf/memory_cgroup.c Normal file
View File

@ -0,0 +1,55 @@
#include "bpf_common.h"
#include "vmlinux.h"
#include "vmlinux_sched.h"
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char __license[] SEC("license") = "Dual MIT/GPL";
struct mem_cgroup_metric {
/* cg: direct reclaim count caused by try_charge */
unsigned long directstall_count;
};
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, unsigned long);
__type(value, struct mem_cgroup_metric);
__uint(max_entries, 10240);
} mem_cgroup_map SEC(".maps");
SEC("tracepoint/vmscan/mm_vmscan_memcg_reclaim_begin")
int tracepoint_vmscan_mm_vmscan_memcg_reclaim_begin(struct pt_regs *ctx)
{
struct cgroup_subsys_state *mm_subsys;
struct mem_cgroup_metric *valp;
struct task_struct *task;
task = (struct task_struct *)bpf_get_current_task();
if (BPF_CORE_READ(task, flags) & PF_KSWAPD)
return 0;
mm_subsys = BPF_CORE_READ(task, cgroups, subsys[memory_cgrp_id]);
valp = bpf_map_lookup_elem(&mem_cgroup_map, &mm_subsys);
if (!valp) {
struct mem_cgroup_metric new_metrics = {
.directstall_count = 1,
};
bpf_map_update_elem(&mem_cgroup_map, &mm_subsys, &new_metrics,
BPF_ANY);
return 0;
}
__sync_fetch_and_add(&valp->directstall_count, 1);
return 0;
}
SEC("kprobe/mem_cgroup_css_released")
int kprobe_mem_cgroup_css_released(struct pt_regs *ctx)
{
u64 css = PT_REGS_PARM1(ctx);
bpf_map_delete_elem(&mem_cgroup_map, &css);
return 0;
}

90
bpf/memory_free_compact.c Normal file
View File

@ -0,0 +1,90 @@
#include "vmlinux.h"
#include "bpf_common.h"
#include "bpf_func_trace.h"
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
struct mm_free_compact_entry {
/* host: compaction latency */
unsigned long compaction_stat;
/* host: page alloc latency in direct reclaim */
unsigned long allocstall_stat;
};
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, int);
__type(value, struct mm_free_compact_entry);
__uint(max_entries, 10240);
} mm_free_compact_map SEC(".maps");
char __license[] SEC("license") = "Dual MIT/GPL";
static __always_inline void
update_metric_map(u64 free_delta_ns, u64 compact_delta_ns)
{
struct mm_free_compact_entry *valp;
int key = 0;
valp = bpf_map_lookup_elem(&mm_free_compact_map, &key);
if (!valp) {
struct mm_free_compact_entry new_metrics = {
.allocstall_stat = free_delta_ns,
.compaction_stat = compact_delta_ns,
};
bpf_map_update_elem(&mm_free_compact_map, &key, &new_metrics,
BPF_ANY);
return;
}
if (free_delta_ns)
__sync_fetch_and_add(&valp->allocstall_stat, free_delta_ns);
if (compact_delta_ns)
__sync_fetch_and_add(&valp->compaction_stat, compact_delta_ns);
}
static __always_inline void func_trace_end_and_update_metric(bool free_pages)
{
struct trace_entry_ctx *entry;
entry = func_trace_end(bpf_get_current_pid_tgid());
if (!entry)
return;
if (free_pages)
update_metric_map(entry->delta_ns, 0);
else
update_metric_map(0, entry->delta_ns);
func_trace_destroy(entry->id);
}
SEC("tracepoint/vmscan/mm_vmscan_direct_reclaim_begin")
int tracepoint_try_to_free_pages_begin(struct pt_regs *ctx)
{
func_trace_begain(bpf_get_current_pid_tgid());
return 0;
}
SEC("tracepoint/vmscan/mm_vmscan_direct_reclaim_end")
int tracepoint_try_to_free_pages_end(struct pt_regs *ctx)
{
func_trace_end_and_update_metric(true);
return 0;
}
SEC("kprobe/try_to_compact_pages")
int kprobe_try_to_compact_pages_host(struct pt_regs *ctx)
{
func_trace_begain(bpf_get_current_pid_tgid());
return 0;
}
SEC("kretprobe/try_to_compact_pages")
int kretprobe_try_to_compact_pages_host(struct pt_regs *ctx)
{
func_trace_end_and_update_metric(false);
return 0;
}

64
bpf/memory_reclaim.c Normal file
View File

@ -0,0 +1,64 @@
#include "vmlinux.h"
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_common.h"
#include "bpf_func_trace.h"
#include "bpf_ratelimit.h"
char __license[] SEC("license") = "Dual MIT/GPL";
volatile const unsigned long deltath = 0;
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(int));
__uint(value_size, sizeof(u32));
} reclaim_perf_events SEC(".maps");
struct reclaim_entry {
char comm[TASK_COMM_LEN];
u64 delta_time;
u64 css;
u64 pid;
};
SEC("kprobe/try_to_free_pages")
int kprobe_try_to_free_pages(struct pt_regs *ctx)
{
func_trace_begain(bpf_get_current_pid_tgid());
return 0;
}
SEC("kretprobe/try_to_free_pages")
int kretprobe_try_to_free_pages(struct pt_regs *ctx)
{
struct trace_entry_ctx *entry;
struct task_struct *task;
entry = func_trace_end(bpf_get_current_pid_tgid());
if (!entry)
return 0;
if (entry->delta_ns > deltath) {
task = (struct task_struct *)bpf_get_current_task();
struct reclaim_entry data = {
.pid = entry->id,
.css = (u64)BPF_CORE_READ(task, cgroups,
subsys[cpu_cgrp_id]),
.delta_time = entry->delta_ns,
};
bpf_get_current_comm(data.comm, sizeof(data.comm));
bpf_perf_event_output(ctx, &reclaim_perf_events,
BPF_F_CURRENT_CPU, &data,
sizeof(struct reclaim_entry));
}
func_trace_destroy(entry->id);
return 0;
}

101
bpf/monsoftirq_tracing.c Normal file
View File

@ -0,0 +1,101 @@
#include "vmlinux.h"
#include "bpf_common.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
#define NSEC_PER_MSEC 1000000UL
#define NSEC_PER_USEC 1000UL
#define NR_SOFTIRQS_MAX 16 // must be 2^order
enum lat_zone {
LAT_ZONE0=0, // 0 ~ 10us
LAT_ZONE1, // 10us ~ 100us
LAT_ZONE2, // 100us ~ 1ms
LAT_ZONE3, // 1ms ~ inf
LAT_ZONE_MAX,
};
struct tp_softirq {
unsigned long long pad;
unsigned int vec;
};
// Because bpf access array is strictly checked,
// the size of the array must be aligned in order
// of 2, so we should not use NR_SOFTIRQS, but
// use NR_SOFTIRQS_MAX as the size of the array
struct softirq_lat {
u64 silat[NR_SOFTIRQS_MAX][LAT_ZONE_MAX];
};
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
//key -> NR_SOFTIRQS
__type(key, u32);
// value -> ts, record softirq_raise start time
__type(value, u64);
__uint(max_entries, NR_SOFTIRQS);
} silat_map SEC(".maps");//softirq latency map
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(key_size, sizeof(u32));
__uint(value_size, sizeof(struct softirq_lat));
__uint(max_entries, 1);
} softirq_lats SEC(".maps");
SEC("tracepoint/irq/softirq_raise")
void probe_softirq_raise(struct tp_softirq *ctx)
{
u32 nr;
u64 now;
nr = ctx->vec;
now = bpf_ktime_get_ns();
bpf_map_update_elem(&silat_map, &nr, &now, BPF_ANY);
}
static void
calc_softirq_latency(struct softirq_lat *lat_mc, u32 nr, u64 now)
{
u64 lat, *ts;
ts = bpf_map_lookup_elem(&silat_map, &nr);
if (!ts)
return;
lat = now - *ts;
//update to metrics
if (lat < 10 * NSEC_PER_USEC) { //10us
__sync_fetch_and_add(&lat_mc->silat[nr & (NR_SOFTIRQS_MAX - 1)][LAT_ZONE0], 1);
} else if (lat < 100 * NSEC_PER_USEC) {//100us
__sync_fetch_and_add(&lat_mc->silat[nr & (NR_SOFTIRQS_MAX - 1)][LAT_ZONE1], 1);
} else if (lat < 1 * NSEC_PER_MSEC) {//1ms
__sync_fetch_and_add(&lat_mc->silat[nr & (NR_SOFTIRQS_MAX - 1)][LAT_ZONE2], 1);
} else {//1ms+
__sync_fetch_and_add(&lat_mc->silat[nr & (NR_SOFTIRQS_MAX - 1)][LAT_ZONE3], 1);
}
}
SEC("tracepoint/irq/softirq_entry")
void probe_softirq_entry(struct tp_softirq *ctx)
{
u32 key = 0, nr;
u64 now;
struct softirq_lat *lat_mc;
lat_mc = bpf_map_lookup_elem(&softirq_lats, &key);
if (!lat_mc)
return;
nr = ctx->vec;
now = bpf_ktime_get_ns();
// update softirq lat to lat metric
calc_softirq_latency(lat_mc, nr, now);
}
char __license[] SEC("license") = "Dual MIT/GPL";

175
bpf/netrecvlat.c Normal file
View File

@ -0,0 +1,175 @@
//go:build ignore
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_endian.h>
#include "bpf_common.h"
#include "vmlinux_net.h"
#include "bpf_ratelimit.h"
volatile const long long mono_wall_offset = 0;
volatile const long long to_netif = 5 * 1000 * 1000; // 5ms
volatile const long long to_tcpv4 = 10 * 1000 * 1000; // 10ms
volatile const long long to_user_copy = 115 * 1000 * 1000; // 115ms
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
BPF_RATELIMIT(rate, 1, 100);
struct netif_receive_skb_args {
struct trace_entry entry;
struct sk_buff *skb;
};
struct skb_copy_datagram_iovec_args {
struct trace_entry entry;
struct sk_buff *skb;
};
struct perf_event_t {
char comm[TASK_COMM_LEN];
u64 latency;
u64 tgid_pid;
u64 pkt_len;
u16 sport;
u16 dport;
u32 saddr;
u32 daddr;
u32 seq;
u32 ack_seq;
u8 state;
u8 where;
};
enum skb_rcv_where {
TO_NETIF_RCV,
TO_TCPV4_RCV,
TO_USER_COPY,
};
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(int));
__uint(value_size, sizeof(u32));
} net_recv_lat_event_map SEC(".maps");
struct mix {
struct iphdr *ip_hdr;
u64 lat;
u8 state;
u8 where;
};
static inline u64 delta_now_skb_tstamp(struct sk_buff *skb)
{
u64 tstamp = BPF_CORE_READ(skb, tstamp);
// although the skb->tstamp record is opened in user space by SOF_TIMESTAMPING_RX_SOFTWARE,
// it is still 0 in the following cases:
// unix recv, netlink recv, few virtual dev(e.g. tun dev, napi dsabled)
if (!tstamp)
return 0;
return bpf_ktime_get_ns() + mono_wall_offset - tstamp;
}
static inline u8 get_state(struct sk_buff *skb)
{
return BPF_CORE_READ(skb, sk, __sk_common.skc_state);
}
static inline void fill_and_output_event(void *ctx, struct sk_buff *skb, struct mix *_mix)
{
struct perf_event_t event = {};
struct tcphdr tcp_hdr;
// ratelimit
if (bpf_ratelimited(&rate))
return;
if (likely(_mix->where == TO_USER_COPY)) {
event.tgid_pid = bpf_get_current_pid_tgid();
bpf_get_current_comm(&event.comm, sizeof(event.comm));
}
bpf_probe_read(&tcp_hdr, sizeof(tcp_hdr), skb_transport_header(skb));
event.latency = _mix->lat;
event.saddr = _mix->ip_hdr->saddr;
event.daddr = _mix->ip_hdr->daddr;
event.sport = tcp_hdr.source;
event.dport = tcp_hdr.dest;
event.seq = tcp_hdr.seq;
event.ack_seq = tcp_hdr.ack_seq;
event.pkt_len = BPF_CORE_READ(skb, len);
event.state = _mix->state;
event.where = _mix->where;
bpf_perf_event_output(ctx, &net_recv_lat_event_map, BPF_F_CURRENT_CPU, &event, sizeof(struct perf_event_t));
}
SEC("tracepoint/net/netif_receive_skb")
int netif_receive_skb_prog(struct netif_receive_skb_args *args)
{
struct sk_buff *skb = args->skb;
struct iphdr ip_hdr;
u64 delta;
if (unlikely(BPF_CORE_READ(skb, protocol) != bpf_ntohs(ETH_P_IP))) // IPv4
return 0;
bpf_probe_read(&ip_hdr, sizeof(ip_hdr), skb_network_header(skb));
if (ip_hdr.protocol != IPPROTO_TCP)
return 0;
delta = delta_now_skb_tstamp(skb);
if (delta < to_netif)
return 0;
fill_and_output_event(args, skb, &(struct mix){&ip_hdr, delta, 0, TO_NETIF_RCV});
return 0;
}
SEC("kprobe/tcp_v4_rcv")
int tcp_v4_rcv_prog(struct pt_regs *ctx)
{
struct sk_buff *skb = (struct sk_buff*)PT_REGS_PARM1_CORE(ctx);
struct iphdr ip_hdr;
u64 delta;
delta = delta_now_skb_tstamp(skb);
if (delta < to_tcpv4)
return 0;
bpf_probe_read(&ip_hdr, sizeof(ip_hdr), skb_network_header(skb));
fill_and_output_event(ctx, skb, &(struct mix){&ip_hdr, delta, get_state(skb), TO_TCPV4_RCV});
return 0;
}
SEC("tracepoint/skb/skb_copy_datagram_iovec")
int skb_copy_datagram_iovec_prog(struct skb_copy_datagram_iovec_args *args)
{
struct sk_buff *skb = args->skb;
struct iphdr ip_hdr;
u64 delta;
if (unlikely(BPF_CORE_READ(skb, protocol) != bpf_ntohs(ETH_P_IP))) // IPv4
return 0;
bpf_probe_read(&ip_hdr, sizeof(ip_hdr), skb_network_header(skb));
if (ip_hdr.protocol != IPPROTO_TCP)
return 0;
delta = delta_now_skb_tstamp(skb);
if (delta < to_user_copy)
return 0;
fill_and_output_event(args, skb, &(struct mix){&ip_hdr, delta, get_state(skb), TO_USER_COPY});
return 0;
}
char __license[] SEC("license") = "Dual MIT/GPL";

54
bpf/oom.c Normal file
View File

@ -0,0 +1,54 @@
#include "vmlinux.h"
#include "bpf_common.h"
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
#include "bpf_ratelimit.h"
char __license[] SEC("license") = "Dual MIT/GPL";
#define CPU_NUM 128
BPF_RATELIMIT_IN_MAP(rate, 1, CPU_NUM * 10000, 0);
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(int));
__uint(value_size, sizeof(u32));
} oom_perf_events SEC(".maps");
struct oom_info {
char trigger_comm[TASK_COMM_LEN];
char victim_comm[TASK_COMM_LEN];
u32 trigger_pid;
u32 victim_pid;
u64 trigger_memcg_css;
u64 victim_memcg_css;
};
SEC("kprobe/oom_kill_process")
int kprobe_oom_kill_process(struct pt_regs *ctx)
{
struct oom_control *oc;
struct oom_info info = {};
struct task_struct *trigger_task, *victim_task;
if (bpf_ratelimited_in_map(ctx, rate))
return 0;
oc = (void *)ctx->di;
if (!oc)
return 0;
trigger_task = (struct task_struct *)bpf_get_current_task();
victim_task = BPF_CORE_READ(oc, chosen);
info.trigger_pid = BPF_CORE_READ(trigger_task, pid);
info.victim_pid = BPF_CORE_READ(victim_task, pid);
BPF_CORE_READ_STR_INTO(&info.trigger_comm, trigger_task, comm);
BPF_CORE_READ_STR_INTO(&info.victim_comm, victim_task, comm);
info.victim_memcg_css = (u64)BPF_CORE_READ(victim_task, cgroups, subsys[4]);
info.trigger_memcg_css = (u64)BPF_CORE_READ(trigger_task, cgroups, subsys[4]);
bpf_perf_event_output(ctx, &oom_perf_events, BPF_F_CURRENT_CPU, &info, sizeof(info));
return 0;
}

311
bpf/runqlat_tracing.c Normal file
View File

@ -0,0 +1,311 @@
#include "vmlinux.h"
#include "bpf_common.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
// defaultly, we use task_group address as key to operate map.
#define TG_ADDR_KEY
#define TASK_RUNNING 0
#define TASK_ON_RQ_QUEUED 1
#define _(P) \
({ \
typeof(P) val = 0; \
bpf_probe_read(&val, sizeof(val), &(P)); \
val; \
})
char __license[] SEC("license") = "Dual MIT/GPL";
struct stat_t {
unsigned long nvcsw; // task_group counts of voluntary context switch
unsigned long nivcsw; // task_group counts of involuntary context switch
unsigned long nlat_01; // task_group counts of sched latency range [0, 10)ms
unsigned long nlat_02; // task_group counts of sched latency range [10, 20)ms
unsigned long nlat_03; // task_group counts of sched latency range [20, 50)ms
unsigned long nlat_04; // task_group counts of sched latency range [50, inf)ms
};
struct g_stat_t {
unsigned long g_nvcsw; // global counts of voluntary context switch
unsigned long g_nivcsw; // global counts of involuntary context switch
unsigned long g_nlat_01; // global counts of sched latency range [0, 10)ms
unsigned long g_nlat_02; // global counts of sched latency range [10, 20)ms
unsigned long g_nlat_03; // global counts of sched latency range [20, 50)ms
unsigned long g_nlat_04; // global counts of sched latency range [50, inf)ms
};
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, u32);
__type(value, u64);
// FIXME: is 10000 enough or too large?
__uint(max_entries, 10000);
} latency SEC(".maps");
struct stat_t;
struct {
__uint(type, BPF_MAP_TYPE_HASH);
#ifdef TG_ADDR_KEY
__type(key, u64);
#else
__type(key, u32);
#endif
__type(value, struct stat_t);
__uint(max_entries, 10000);
} cpu_tg_metric SEC(".maps");
struct g_stat_t;
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, u32);
__type(value, struct g_stat_t);
// all global counts are integrated in one g_stat_t struct
__uint(max_entries, 1);
} cpu_host_metric SEC(".maps");
// record enqueue timestamp
static int trace_enqueue(u32 pid)
{
//u64 *valp;
u64 ts;
if (pid == 0)
return 0;
ts = bpf_ktime_get_ns();
bpf_map_update_elem(&latency, &pid, &ts, BPF_ANY);
return 0;
}
struct sched_wakeup_new_args {
unsigned long long pad;
char comm[16];
int pid;
int prio;
int success;
int target_cpu;
};
SEC("tracepoint/sched/sched_wakeup_new")
int sched_wakeup_new_entry(struct sched_wakeup_new_args *ctx)
{
return trace_enqueue(ctx->pid);
}
struct sched_wakeup_args {
unsigned long long pad;
char comm[16];
int pid;
int prio;
int success;
int target_cpu;
};
SEC("tracepoint/sched/sched_wakeup")
int sched_wakeup_entry(struct sched_wakeup_new_args *ctx)
{
return trace_enqueue(ctx->pid);
}
#define NSEC_PER_MSEC 1000000L
SEC("raw_tracepoint/sched_switch")
int sched_switch_entry(struct bpf_raw_tracepoint_args *ctx)
{
u32 prev_pid, next_pid, g_key = 0;
u64 now, *tsp, delta;
bool is_voluntary;
struct stat_t *entry;
struct g_stat_t *g_entry;
// TP_PROTO(bool preempt, struct task_struct *prev, struct task_struct *next)
struct task_struct *prev = (struct task_struct *)ctx->args[1];
struct task_struct *next = (struct task_struct *)ctx->args[2];
#ifdef TG_ADDR_KEY
// get task_group addr: task_struct->sched_task_group
u64 key = (u64)_(prev->sched_task_group);
#else
// get pid ns id: task_struct->nsproxy->pid_ns_for_children->ns.inum
u32 key = BPF_CORE_READ(prev, nsproxy, pid_ns_for_children, ns.inum);
#endif
long state;
// to avoid compilation warning, use raw interface instead of macro _()
bpf_probe_read(&state, sizeof(long), (void *)&(prev->state));
// ivcsw: treat like an enqueue event and store timestamp
prev_pid = _(prev->pid);
if (state == TASK_RUNNING) {
if (prev_pid != 0) {
now = bpf_ktime_get_ns();
bpf_map_update_elem(&latency, &prev_pid, &now, BPF_ANY);
}
is_voluntary = 0;
} else {
is_voluntary = 1;
}
g_entry = bpf_map_lookup_elem(&cpu_host_metric, &g_key);
if (!g_entry) {
// init global counts map
struct g_stat_t g_new_stat = {
.g_nvcsw = 0,
.g_nivcsw = 0,
.g_nlat_01 = 0,
.g_nlat_02 = 0,
.g_nlat_03 = 0,
.g_nlat_04 = 0,
};
bpf_map_update_elem(&cpu_host_metric, &g_key, &g_new_stat, BPF_NOEXIST);
g_entry = bpf_map_lookup_elem(&cpu_host_metric, &g_key);
if (!g_entry)
return 0;
}
// When use pid namespace id as key, sometimes we would encounter
// null id because task->nsproxy is freed, usually means that this
// task is almost dead (zombie), so ignore it.
if (key && prev_pid) {
entry = bpf_map_lookup_elem(&cpu_tg_metric, &key);
if (!entry) {
struct stat_t new_stat = {
.nvcsw = 0,
.nivcsw = 0,
.nlat_01 = 0,
.nlat_02 = 0,
.nlat_03 = 0,
.nlat_04 = 0,
};
bpf_map_update_elem(&cpu_tg_metric, &key, &new_stat, BPF_NOEXIST);
entry = bpf_map_lookup_elem(&cpu_tg_metric, &key);
if (!entry)
return 0;
}
if (is_voluntary) {
__sync_fetch_and_add(&entry->nvcsw, 1);
__sync_fetch_and_add(&g_entry->g_nvcsw, 1);
} else {
__sync_fetch_and_add(&entry->nivcsw, 1);
__sync_fetch_and_add(&g_entry->g_nivcsw, 1);
}
}
//trace_sched_switch is called under prev != next, no need to check again.
next_pid = _(next->pid);
// ignore idle
if (next_pid == 0)
return 0;
// fetch timestamp and calculate delta
tsp = bpf_map_lookup_elem(&latency, &next_pid);
if (tsp == 0 || *tsp == 0) {
return 0; // missed enqueue
}
now = bpf_ktime_get_ns();
delta = now - *tsp;
bpf_map_delete_elem(&latency, &next_pid);
#ifdef TG_ADDR_KEY
key = (u64)_(next->sched_task_group);
#else
key = BPF_CORE_READ(next, nsproxy, pid_ns_for_children, ns.inum);
#endif
if (key) {
entry = bpf_map_lookup_elem(&cpu_tg_metric, &key);
if (!entry) {
struct stat_t new_stat = {
.nvcsw = 0,
.nivcsw = 0,
.nlat_01 = 0,
.nlat_02 = 0,
.nlat_03 = 0,
.nlat_04 = 0,
};
bpf_map_update_elem(&cpu_tg_metric, &key, &new_stat, BPF_NOEXIST);
entry = bpf_map_lookup_elem(&cpu_tg_metric, &key);
if (!entry)
return 0;
}
if (delta < 10 * NSEC_PER_MSEC) {
__sync_fetch_and_add(&entry->nlat_01, 1);
__sync_fetch_and_add(&g_entry->g_nlat_01, 1);
} else if (delta < 20 * NSEC_PER_MSEC) {
__sync_fetch_and_add(&entry->nlat_02, 1);
__sync_fetch_and_add(&g_entry->g_nlat_02, 1);
} else if (delta < 50 * NSEC_PER_MSEC) {
__sync_fetch_and_add(&entry->nlat_03, 1);
__sync_fetch_and_add(&g_entry->g_nlat_03, 1);
} else {
__sync_fetch_and_add(&entry->nlat_04, 1);
__sync_fetch_and_add(&g_entry->g_nlat_04, 1);
}
}
return 0;
}
SEC("raw_tracepoint/sched_process_exit")
int sched_process_exit_entry(struct bpf_raw_tracepoint_args *ctx)
{
u32 pid;
// TP_PROTO(struct task_struct *tsk)
struct task_struct *p = (struct task_struct *)ctx->args[0];
pid = _(p->pid);
/*
* check latency table to fix latency table overflow in below scenario:
* when wake up the target task, but the target task always running in
* the other cpu, the target cpu will never be the next pid, because the
* target task will be exiting, the latency item never delete.
* To avoid latency table overflow, we should delete the latency item in
* exit process.
*/
if (bpf_map_lookup_elem(&latency, &pid)) {
bpf_map_delete_elem(&latency, &pid);
}
return 0;
}
#ifdef TG_ADDR_KEY
// When cgroup is removed, the record should be deleted.
SEC("kprobe/sched_free_group")
int sched_free_group_entry(struct pt_regs *ctx)
{
struct task_group *tg = (void *) PT_REGS_PARM1(ctx);
struct stat_t *entry;
entry = bpf_map_lookup_elem(&cpu_tg_metric, &tg);
if (entry)
bpf_map_delete_elem(&cpu_tg_metric, &tg);
return 0;
}
#else
// When pid namespace is destroyed, the record should be deleted.
SEC("kprobe/destroy_pid_namespace")
int destroy_pid_namespace_entry(struct pt_regs *ctx)
{
struct pid_namespace *ns = (void *) PT_REGS_PARM1(ctx);
struct stat_t *entry;
// ns->ns.inum
u32 pidns = BPF_CORE_READ(ns, ns.inum);
entry = bpf_map_lookup_elem(&cpu_tg_metric, &pidns);
if (entry)
bpf_map_delete_elem(&cpu_tg_metric, &pidns);
return 0;
}
#endif

158
bpf/softirq.c Normal file
View File

@ -0,0 +1,158 @@
#include "vmlinux.h"
#include "bpf_common.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_ratelimit.h"
char __license[] SEC("license") = "Dual MIT/GPL";
#define NR_STACK_TRACE_MAX 0x4000
#define MSEC_PER_NSEC 1000000UL
#define TICK_DEP_MASK_NONE 0
#define SOFTIRQ_THRESH 5000000UL
volatile const u64 softirq_thresh = SOFTIRQ_THRESH;
#define CPU_NUM 128
#define TICK 1000
BPF_RATELIMIT(rate, 1, CPU_NUM * TICK * 1000);
struct timer_softirq_run_ts {
u32 start_trace;
u32 restarting_tick;
u64 soft_ts;
};
struct report_event {
u64 stack[PERF_MAX_STACK_DEPTH];
s64 stack_size;
u64 now;
u64 stall_time;
char comm[TASK_COMM_LEN];
u32 pid;
u32 cpu;
};
// the map for recording irq/softirq timer ts
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(key_size, sizeof(u32));
__uint(value_size, sizeof(struct timer_softirq_run_ts));
__uint(max_entries, 1);
} timerts_map SEC(".maps");
// the map use for storing struct report_event memory
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(key_size, sizeof(u32)); // key = 0
__uint(value_size, sizeof(struct report_event));
__uint(max_entries, 1);
} report_map SEC(".maps");
// the event map use for report userspace
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(int));
__uint(value_size, sizeof(u32));
} irqoff_event_map SEC(".maps");
SEC("kprobe/scheduler_tick")
void probe_scheduler_tick(struct pt_regs *ctx)
{
// verify bpf-ratelimit
if (bpf_ratelimited(&rate))
return;
//update soft timer timestamps
int key = 0;
struct timer_softirq_run_ts *ts;
//struct thresh_data *tdata;
struct report_event *event;
u64 now;
u64 delta;
ts = bpf_map_lookup_elem(&timerts_map, &key);
if (!ts)
return;
if (!ts->start_trace)
return;
//update soft timer timestamps
if (!ts->soft_ts) {
ts->soft_ts = bpf_ktime_get_ns();
return;
}
event = bpf_map_lookup_elem(&report_map, &key);
if (!event)
return;
if (ts->restarting_tick) {
ts->restarting_tick = 0;
ts->soft_ts = bpf_ktime_get_ns();
return;
}
now = bpf_ktime_get_ns();
delta = now - ts->soft_ts;
// if delta over threshold, dump important info to user
if (delta >= softirq_thresh) {
event->now = now;
event->stall_time = delta;
__builtin_memset(event->comm, 0, sizeof(event->comm));
bpf_get_current_comm(&event->comm, sizeof(event->comm));
event->pid = (u32)bpf_get_current_pid_tgid();
event->cpu = bpf_get_smp_processor_id();
event->stack_size = bpf_get_stack(ctx, event->stack, sizeof(event->stack), 0);
bpf_perf_event_output(ctx, &irqoff_event_map, BPF_F_CURRENT_CPU,
event, sizeof(struct report_event));
}
// update soft_ts, use for next trace
ts->soft_ts = now;
}
struct tp_tick_stop {
unsigned long pad;
int success;
int dependency;
};
SEC("tracepoint/timer/tick_stop")
void probe_tick_stop(struct tp_tick_stop *ctx)
{
struct timer_softirq_run_ts *ts;
int key = 0;
ts = bpf_map_lookup_elem(&timerts_map, &key);
if (!ts)
return;
if (ctx->success == 1 && ctx->dependency == TICK_DEP_MASK_NONE) {
ts->start_trace = 0;
}
return;
}
SEC("kprobe/tick_nohz_restart_sched_tick")
void probe_tick_nohz_restart_sched_tick(struct pt_regs *ctx)
{
struct timer_softirq_run_ts *ts;
int key = 0;
u64 now;
ts = bpf_map_lookup_elem(&timerts_map, &key);
if (!ts)
return;
now = bpf_ktime_get_ns();
ts->soft_ts = now;
ts->start_trace = 1;
ts->restarting_tick = 1;
}

40
bpf/softlockup.c Normal file
View File

@ -0,0 +1,40 @@
#include "vmlinux.h"
#include "bpf_common.h"
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
#include "bpf_ratelimit.h"
char __license[] SEC("license") = "Dual MIT/GPL";
#define CPU_NUM 128
BPF_RATELIMIT_IN_MAP(rate, 1, CPU_NUM * 10000, 0);
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(int));
__uint(value_size, sizeof(u32));
} softlockup_perf_events SEC(".maps");
struct softlockup_info {
u32 cpu;
u32 pid;
char comm[TASK_COMM_LEN];
};
SEC("kprobe/watchdog_timer_fn+442")
int kprobe_watchdog_timer_fn(struct pt_regs *ctx)
{
struct softlockup_info info = {};
struct task_struct *task;
if (bpf_ratelimited_in_map(ctx, rate))
return 0;
info.cpu = bpf_get_smp_processor_id();
task = (struct task_struct *)bpf_get_current_task();
info.pid = bpf_get_current_pid_tgid() & 0xffffffffUL;
BPF_CORE_READ_STR_INTO(&info.comm, task, comm);
bpf_perf_event_output(ctx, &softlockup_perf_events, BPF_F_CURRENT_CPU, &info, sizeof(info));
return 0;
}

57
build/clang.sh Executable file
View File

@ -0,0 +1,57 @@
#!/bin/sh
usage() {
echo "OVERVIEW: HuaTuo BPF compiler tool (clang LLVM)
USAGE: clang.sh -s <source.c> -o <output.o> -I [includes] -C '[compile_options]'
EXAMPLE:
clang.sh -s example.bpf.c -o example.o # run preprocess, compile, and assemble steps (-C '-c')
clang.sh -s example.bpf.c -o example.o -I include -I include/4.18.0-193.6.3.el8_2.v1.3.x86_64 # specify the headers, (-C '-c')
clang.sh -s example.bpf.c -o example.o -C '-E' # only run the preprocessor
clang.sh -s example.bpf.c -o example.o -C '-S' # only run preprocess and compilation steps"
}
SRC=
OBJ=
INCLUDES=
DEFAULT_INCLUDES="-I include -I include/4.18.0-193.6.3.el8_2.v1.2.x86_64"
COMPILE_OPTIONS=
DEFAULT_COMPILE_OPTIONS="-Wall -O2 -g -target bpf -D__TARGET_ARCH_x86 -mcpu=v1 -c"
while getopts 'hs:o:C:I:' opt
do
case ${opt} in
s)
[ -n "${SRC}" ] && echo "-s(source) required 1 file (bpf.c)" && exit 1
SRC=${OPTARG}
;;
o)
[ -n "${OBJ}" ] && echo "-o(output) required 1 file (output.o)" && exit 1
OBJ=${OPTARG}
;;
C)
COMPILE_OPTIONS=${OPTARG}
;;
I)
INCLUDES="${INCLUDES} -I ${OPTARG}"
;;
h)
usage
exit
;;
?)
usage
exit 1
;;
esac
done
[ -z "${SRC}" ] && echo -e "-s must be specified, such as -c example.bpf.c \n\n $(usage)" && exit 1
[ -z "${OBJ}" ] && echo -e "-o must be specified, such as -o example.o \n\n $(usage)" && exit 1
# Note: parameter ${DEFAULT_COMPILE_OPTIONS} will be overwritten by ${COMPILE_OPTIONS} in ${OPTIONS}
OPTIONS="${DEFAULT_COMPILE_OPTIONS} ${COMPILE_OPTIONS}"
[ -z "${INCLUDES}" ] && INCLUDES="${DEFAULT_INCLUDES}"
clang ${OPTIONS} ${SRC} -o ${OBJ} ${INCLUDES}

15
build/docker/.env Normal file
View File

@ -0,0 +1,15 @@
# elasticsearch
ELASTIC_VERSION=8.15.5
# https://www.elastic.co/guide/en/elasticsearch/reference/current/built-in-users.html
ELASTIC_PASSWORD='huatuo-bamai' # user 'elastic' (built-in)
KIBANA_SYSTEM_PASSWORD='huatuo-bamai' # user 'kibana_system' (built-in)
# setup to init user
ELASTICSEARCH_HOST='localhost'
# prometheus
PROMETHEUS_VERSION=v2.53.3 # LTS v2.53
# Grafana
GRAFANA_VERSION=11.0.0

34
build/docker/Dockerfile Normal file
View File

@ -0,0 +1,34 @@
ARG BUILD_PATH=/go/huatuo-bamai
ARG RUN_PATH=/home/huatuo-bamai
# https://hub.docker.com/_/golang/tags?name=1.22.4
FROM golang:1.22.4-alpine AS base
# Install dependencies for build
RUN apk add --no-cache \
make \
clang15 \
libbpf-dev \
bpftool \
curl && \
bpftool btf dump file /sys/kernel/btf/vmlinux format c > bpf/include/vmlinux.h
ENV PATH=$PATH:/usr/lib/llvm15/bin
# Build huatuo
FROM base AS build
ARG BUILD_PATH
WORKDIR ${BUILD_PATH}
COPY . .
RUN make
# Release huatuo image
FROM base AS run
ARG BUILD_PATH
ARG RUN_PATH
WORKDIR ${RUN_PATH}
COPY --from=build \
${BUILD_PATH}/_output ./_output
COPY --from=build \
${BUILD_PATH}/huatuo-bamai.conf .
CMD ["/run.sh"]

View File

@ -0,0 +1,48 @@
services:
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:${ELASTIC_VERSION:-8.15.5}
container_name: es
network_mode: host
environment:
discovery.type: single-node
ELASTIC_PASSWORD: ${ELASTIC_PASSWORD:-}
KIBANA_SYSTEM_PASSWORD: ${KIBANA_SYSTEM_PASSWORD:-}
volumes:
- ./elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml:ro
prometheus:
image: prom/prometheus:${PROMETHEUS_VERSION:-v2.53.3}
container_name: prometheus
network_mode: host
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
grafana:
image: grafana/grafana-oss:${GRAFANA_VERSION:-11.0.0}
container_name: grafana
network_mode: host
volumes:
- ./grafana/datasources/elasticsearch.yaml:/etc/grafana/provisioning/datasources/elasticsearch.yaml:ro
- ./grafana/datasources/prometheus.yaml:/etc/grafana/provisioning/datasources/prometheus.yaml:ro
- ./grafana/dashboards:/etc/grafana/provisioning/dashboards:ro
depends_on:
- prometheus
- elasticsearch
huatuo-bamai:
build:
context: ./../../ # compile required in Dockerfile
dockerfile: ./build/docker/Dockerfile
container_name: huatuo-bamai
network_mode: host
privileged: true
environment:
ELASTICSEARCH_HOST: ${ELASTICSEARCH_HOST:-}
ELASTIC_PASSWORD: ${ELASTIC_PASSWORD:-}
volumes:
- ./run.sh:/run.sh:ro
- /sys/kernel:/sys/kernel
depends_on:
- elasticsearch
- prometheus
- grafana

View File

@ -0,0 +1,4 @@
cluster.name: "docker-cluster"
network.host: 0.0.0.0
http.port: 9200
xpack.security.enabled: true

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,24 @@
apiVersion: 1
providers:
# <string> an unique provider name. Required
- name: 'huatuo-bamai'
# <int> Org id. Default to 1
orgId: 1
# <string> name of the dashboard folder.
folder: ''
# <string> folder UID. will be automatically generated if not specified
folderUid: ''
# <string> provider type. Default to 'file'
type: file
# <bool> disable dashboard deletion
disableDeletion: false
# <int> how often Grafana will scan for changed dashboards
updateIntervalSeconds: 10
# <bool> allow updating provisioned dashboards from the UI
allowUiUpdates: false
options:
# <string, required> path to dashboard files on disk. Required when using the 'file' type
path: /etc/grafana/provisioning/dashboards
# <bool> use folder names from filesystem to create folders in Grafana
foldersFromFilesStructure: true

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,63 @@
# https://grafana.com/docs/grafana/latest/datasources/elasticsearch/
apiVersion: 1
# List of data sources to delete from the database.
deleteDatasources:
- name: huatuo-bamai-es
# Mark provisioned data sources for deletion if they are no longer in a provisioning file.
# It takes no effect if data sources are already listed in the deleteDatasources section.
prune: true
# List of data sources to insert/update depending on what's
# available in the database.
datasources:
# <string, required> Sets the name you use to refer to
# the data source in panels and queries.
- name: huatuo-bamai-es
# <string, required> Sets the data source type.
type: elasticsearch
# <string, required> Sets the access mode, either
# proxy or direct (Server or Browser in the UI).
# Some data sources are incompatible with any setting
# but proxy (Server).
access: proxy
# <int> Sets the organization id. Defaults to orgId 1.
orgId: 1
# <string> Sets a custom UID to reference this
# data source in other parts of the configuration.
# If not specified, Grafana generates one.
uid: huatuo-bamai-es
# <string> Sets the data source's URL, including the
# port.
url: http://localhost:9200
# <string> Sets the database user, if necessary.
user: elastic
# <string> Sets the database name, if necessary.
database:
# <bool> Enables credential headers.
withCredentials:
# <bool> Toggles whether the data source is pre-selected
# for new panels. You can set only one default
# data source per organization.
isDefault:
# <map> Fields to convert to JSON and store in jsonData.
jsonData:
index: 'huatuo_bamai*'
timeField: 'uploaded_time'
# <map> Fields to encrypt before storing in jsonData.
secureJsonData:
# <string> Defines the CA cert, client cert, and
# client key for encrypted authentication.
tlsCACert: '...'
tlsClientCert: '...'
tlsClientKey: '...'
# <string> Sets the database password, if necessary.
password: huatuo-bamai
# <int> Sets the version. Used to compare versions when
# updating. Ignored when creating a new data source.
version: 1
# <bool> Allows users to edit data sources from the
# Grafana UI.
editable: false

View File

@ -0,0 +1,29 @@
# https://grafana.com/docs/grafana/latest/datasources/prometheus/
apiVersion: 1
# List of data sources to delete from the database.
deleteDatasources:
- name: huatuo-bamai-prom
# Mark provisioned data sources for deletion if they are no longer in a provisioning file.
# It takes no effect if data sources are already listed in the deleteDatasources section.
prune: true
datasources:
- name: huatuo-bamai-prom
type: prometheus
access: proxy
# <int> Sets the organization id. Defaults to orgId 1.
orgId: 1
# <string> Sets a custom UID to reference this
# data source in other parts of the configuration.
# If not specified, Grafana generates one.
uid: huatuo-bamai-prom
url: http://localhost:9090
jsonData:
httpMethod: POST
prometheusType: Prometheus
cacheLevel: 'High'
disableRecordingRules: false
incrementalQueryOverlapWindow: 10m

View File

@ -0,0 +1,33 @@
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]
# add huatuo
- job_name: "huatuo"
static_configs:
- targets: ["localhost:19704"]

71
build/docker/run.sh Executable file
View File

@ -0,0 +1,71 @@
#!/bin/sh
ELASTICSEARCH_HOST=${ELASTICSEARCH_HOST:-localhost}
ELASTIC_PASSWORD=${ELASTIC_PASSWORD:-huatuo-bamai}
# Wait for Elasticsearch to be ready
# ref: https://github.com/deviantony/docker-elk/blob/main/setup/entrypoint.sh
wait_for_elasticsearch() {
args="-s -D- -m15 -w '%{http_code}' http://${ELASTICSEARCH_HOST}:9200/"
if [ -n "${ELASTIC_PASSWORD}" ]; then
args="$args -u elastic:${ELASTIC_PASSWORD}"
fi
result=1
output=""
# retry for up to 180 seconds
for sec in $(seq 1 180); do
exit_code=0
output=$(eval "curl $args") || exit_code=$?
# echo "exec curl $args, exit code: $exit_code, output: $output"
if [ $exit_code -ne 0 ]; then
result=$exit_code
fi
# Extract the last three characters of the output to check the HTTP status code
http_code=$(echo "$output" | tail -c 4)
if [ "$http_code" -eq 200 ]; then
result=0
break
fi
echo "Waiting for Elasticsearch ready... ${sec}s"
sleep 1
done
if [ $result -ne 0 ] && [ "$http_code" -ne 000 ]; then
echo "$output" | head -c -3
fi
return $result
}
exit_code=0
wait_for_elasticsearch || exit_code=$?
if [ $exit_code -ne 0 ]; then
case $exit_code in
6)
echo 'Could not resolve host. Is Elasticsearch running?'
;;
7)
echo 'Failed to connect to host. Is Elasticsearch healthy?'
;;
28)
echo 'Timeout connecting to host. Is Elasticsearch healthy?'
;;
*)
echo "Connection to Elasticsearch failed. Exit code: ${exit_code}"
;;
esac
exit $exit_code
fi
# Waiting for initialization of Elasticsearch built-in users
sleep 5
echo "Elasticsearch is ready."
# Run huatuo-bamai
exec _output/bin/huatuo-bamai --region example --config huatuo-bamai.conf

226
cmd/huatuo-bamai/main.go Normal file
View File

@ -0,0 +1,226 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"os"
"os/signal"
"runtime"
"strings"
"syscall"
_ "huatuo-bamai/core/autotracing"
_ "huatuo-bamai/core/events"
_ "huatuo-bamai/core/metrics"
"huatuo-bamai/internal/bpf"
"huatuo-bamai/internal/conf"
"huatuo-bamai/internal/log"
"huatuo-bamai/internal/pod"
"huatuo-bamai/internal/services"
"huatuo-bamai/internal/storage"
"huatuo-bamai/internal/utils/cgrouputil"
"huatuo-bamai/internal/utils/pidutil"
"huatuo-bamai/pkg/tracing"
"github.com/urfave/cli/v2"
)
func mainAction(ctx *cli.Context) error {
if ctx.NArg() > 0 {
return fmt.Errorf("invalid param %v", ctx.Args())
}
if err := pidutil.LockPidFile(ctx.App.Name); err != nil {
return fmt.Errorf("failed to lock pid file: %w", err)
}
defer pidutil.RemovePidFile(ctx.App.Name)
// init cpu quota
host, err := cgrouputil.NewRuntimeCgroup(ctx.App.Name,
conf.Get().RuntimeCgroup.LimitInitCPU,
conf.Get().RuntimeCgroup.LimitMem)
if err != nil {
return fmt.Errorf("new cgroup: %w", err)
}
defer host.Delete()
// initialize the storage clients.
storageInitCtx := storage.InitContext{
EsAddresses: conf.Get().Storage.ES.Address,
EsUsername: conf.Get().Storage.ES.Username,
EsPassword: conf.Get().Storage.ES.Password,
EsIndex: conf.Get().Storage.ES.Index,
LocalPath: conf.Get().Storage.LocalFile.Path,
LocalMaxRotation: conf.Get().Storage.LocalFile.MaxRotation,
LocalRotationSize: conf.Get().Storage.LocalFile.RotationSize,
Region: conf.Region,
}
if err := storage.InitDefaultClients(&storageInitCtx); err != nil {
return fmt.Errorf("storage.InitDefaultClients: %w", err)
}
// init the bpf manager.
if err := bpf.InitBpfManager(); err != nil {
return fmt.Errorf("failed to init bpf manager: %w", err)
}
if err := pod.ContainerCgroupCssInit(); err != nil {
return fmt.Errorf("init pod cgroup metadata: %w", err)
}
blackListed := conf.Get().Tracing.BlackList
mgr, err := tracing.NewMgrTracingEvent(blackListed)
if err != nil {
return err
}
if err := mgr.MgrTracingEventStartAll(); err != nil {
return err
}
prom, err := InitMetricsCollector(blackListed)
if err != nil {
return fmt.Errorf("InitMetricsCollector: %w", err)
}
log.Infof("Initialize the Metrics collector: %v", prom)
services.Start(conf.Get().APIServer.TCPAddr, mgr, prom)
// update cpu quota
if err := host.UpdateCPU(conf.Get().RuntimeCgroup.LimitCPU); err != nil {
return fmt.Errorf("cg update cpu: %w", err)
}
waitExit := make(chan os.Signal, 1)
signal.Notify(waitExit, syscall.SIGHUP, syscall.SIGQUIT, syscall.SIGUSR1, syscall.SIGINT, syscall.SIGTERM)
for {
s := <-waitExit
switch s {
case syscall.SIGQUIT, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM:
log.Infof("huatuo-bamai exit by signal %d", s)
bpf.CloseBpfManager()
return nil
case syscall.SIGUSR1:
return nil
default:
return nil
}
}
}
var (
// AppGitCommit will be the hash that the binary was built from
// and will be populated by the Makefile
AppGitCommit string
// AppBuildTime will be populated by the Makefile
AppBuildTime string
// AppVersion will be populated by the Makefile, read from
// VERSION file of the source code.
AppVersion string
AppUsage = "An In-depth Observation of Linux Kernel Application"
)
func main() {
app := cli.NewApp()
app.Usage = AppUsage
if AppVersion == "" {
panic("the value of AppVersion must be specified")
}
v := []string{
"",
fmt.Sprintf(" app_version: %s", AppVersion),
fmt.Sprintf(" go_version: %s", runtime.Version()),
fmt.Sprintf(" git_commit: %s", AppGitCommit),
fmt.Sprintf(" build_time: %s", AppBuildTime),
}
app.Version = strings.Join(v, "\n")
app.Flags = []cli.Flag{
&cli.StringFlag{
Name: "config",
Value: "huatuo-bamai.conf",
Usage: "huatuo-bamai config file",
},
&cli.StringFlag{
Name: "region",
Required: true,
Usage: "the host and containers are in this region",
},
&cli.StringSliceFlag{
Name: "disable-tracing",
Usage: "disable tracing. This is related to TracerConfig.BlackList in config, and complement each other",
},
&cli.BoolFlag{
Name: "log-debug",
Usage: "enable debug output for logging",
},
}
app.Before = func(ctx *cli.Context) error {
if err := conf.LoadConfig(ctx.String("config")); err != nil {
return fmt.Errorf("failed to load config: %w", err)
}
// set Region
conf.Region = ctx.String("region")
// log level
if conf.Get().LogLevel != "" {
log.SetLevel(conf.Get().LogLevel)
log.Infof("log level [%s] configured in file, use it", log.GetLevel())
}
logFile := conf.Get().LogFile
if logFile != "" {
file, err := os.OpenFile(logFile, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o666)
if err == nil {
log.SetOutput(file)
} else {
log.SetOutput(os.Stdout)
log.Infof("Failed to log to file, using default stdout")
}
}
// tracer
disabledTracing := ctx.StringSlice("disable-tracing")
if len(disabledTracing) > 0 {
definedTracers := conf.Get().Tracing.BlackList
definedTracers = append(definedTracers, disabledTracing...)
conf.Set("TracerConfig.BlackList", definedTracers)
log.Infof("The tracer black list by cli: %v", conf.Get().Tracing.BlackList)
}
if ctx.Bool("log-debug") {
log.SetLevel("Debug")
}
return nil
}
// core
app.Action = mainAction
// run
if err := app.Run(os.Args); err != nil {
log.Errorf("Error: %v", err)
os.Exit(1)
}
}

View File

@ -0,0 +1,43 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"huatuo-bamai/pkg/metric"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
)
var promNamespace = "huatuo_bamai"
// InitMetricsCollector creates a new MetricsCollector instance.
func InitMetricsCollector(blackListed []string) (*prometheus.Registry, error) {
nc, err := metric.NewCollectorManager(blackListed)
if err != nil {
return nil, fmt.Errorf("create collector: %w", err)
}
promRegistry := prometheus.NewRegistry()
promRegistry.MustRegister(
nc,
collectors.NewGoCollector(),
collectors.NewProcessCollector(
collectors.ProcessCollectorOpts{Namespace: promNamespace}))
return promRegistry, nil
}

349
core/autotracing/cpuidle.go Normal file
View File

@ -0,0 +1,349 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package autotracing
import (
"context"
"encoding/json"
"fmt"
"os"
"os/exec"
"path/filepath"
"runtime"
"strconv"
"strings"
"time"
"huatuo-bamai/internal/conf"
"huatuo-bamai/internal/flamegraph"
"huatuo-bamai/internal/log"
"huatuo-bamai/internal/pod"
"huatuo-bamai/internal/storage"
"huatuo-bamai/internal/utils/cgrouputil"
"huatuo-bamai/pkg/tracing"
"huatuo-bamai/pkg/types"
)
func init() {
tracing.RegisterEventTracing("cpuidle", newCPUIdle)
}
func newCPUIdle() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &cpuIdleTracing{},
Internal: 20,
Flag: tracing.FlagTracing,
}, nil
}
// GetCPUCoresInCgroup function returns the number of cgroup cores
func GetCPUCoresInCgroup(cgroupPath string) (uint64, error) {
periodPath := cgroupPath + "/cpu.cfs_period_us"
quotaPath := cgroupPath + "/cpu.cfs_quota_us"
period, err := readIntFromFile(periodPath)
if err != nil {
return 0, err
}
quota, err := readIntFromFile(quotaPath)
if err != nil {
return 0, err
}
if quota == -1 {
return uint64(runtime.NumCPU()), nil
}
if period == 0 {
return 0, fmt.Errorf("period not zero")
}
return uint64(quota / period), nil
}
func readIntFromFile(filePath string) (int, error) {
data, err := os.ReadFile(filePath)
if err != nil {
return 0, err
}
str := strings.TrimSpace(string(data))
value, err := strconv.Atoi(str)
if err != nil {
return 0, err
}
return value, nil
}
func readCPUUsage(path string) (map[string]uint64, error) {
cpuacctPath := path + "/cpuacct.stat"
output, err := os.ReadFile(cpuacctPath)
if err != nil {
return nil, err
}
cpuUsage := make(map[string]uint64)
lines := strings.Split(string(output), "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
if line == "" {
continue
}
parts := strings.Fields(line)
if len(parts) != 2 {
continue
}
key := parts[0]
valueStr := parts[1]
value, err := strconv.ParseUint(valueStr, 10, 64)
if err != nil {
return nil, err
}
cpuUsage[key] = value
}
cpuUsage["total"] = uint64(time.Now().UnixNano())
return cpuUsage, nil
}
// UserHZtons because kernel USER_HZ = 100, the default value set to 10,000,000
const (
UserHZtons = 10000000
USERHZ = 100
)
func calculateCPUUsage(info *containerCPUInfo, currUsage map[string]uint64) error {
deltaTotal := currUsage["total"] - info.prevUsage["total"]
deltaUser := currUsage["user"] - info.prevUsage["user"]
deltaSys := currUsage["system"] - info.prevUsage["system"]
cpuCores, err := GetCPUCoresInCgroup(info.path)
if err != nil {
return fmt.Errorf("get cgroup cpu err")
}
if cpuCores == 0 || deltaTotal == 0 {
return fmt.Errorf("division by zero error")
}
log.Debugf("cpuidle calculate core %v currUsage %v prevUsage %v", cpuCores, currUsage, info.prevUsage)
info.nowUsageP["cpuUser"] = deltaUser * UserHZtons * USERHZ / deltaTotal / cpuCores
info.nowUsageP["cpuSys"] = deltaSys * UserHZtons * USERHZ / deltaTotal / cpuCores
return nil
}
type containerCPUInfo struct {
prevUsage map[string]uint64
prevUsageP map[string]uint64
nowUsageP map[string]uint64
deltaUser int64
deltaSys int64
timestamp int64
path string
alive bool
}
// cpuIdleIDMap is the container information
type cpuIdleIDMap map[string]*containerCPUInfo
func updateCPUIdleIDMap(m cpuIdleIDMap) error {
containers, err := pod.GetNormalContainers()
if err != nil {
return fmt.Errorf("GetNormalContainers: %w", err)
}
for _, container := range containers {
_, ok := m[container.ID]
if ok {
m[container.ID].path = filepath.Join(cgrouputil.V1CpuPath(), container.CgroupSuffix)
m[container.ID].alive = true
} else {
temp := &containerCPUInfo{
prevUsage: map[string]uint64{
"user": 0,
"system": 0,
"total": 0,
},
prevUsageP: map[string]uint64{
"cpuUser": 0,
"cpuSys": 0,
},
nowUsageP: map[string]uint64{
"cpuUser": 0,
"cpuSys": 0,
},
deltaUser: 0,
deltaSys: 0,
timestamp: 0,
path: filepath.Join(cgrouputil.V1CpuPath(), container.CgroupSuffix),
alive: true,
}
m[container.ID] = temp
}
}
return nil
}
var cpuIdleIdMap = make(cpuIdleIDMap)
func cpuIdleDetect(ctx context.Context) (string, error) {
// get config info
userth := conf.Get().Tracing.Cpuidle.CgUserth
deltauserth := conf.Get().Tracing.Cpuidle.CgDeltaUserth
systh := conf.Get().Tracing.Cpuidle.CgSysth
deltasysth := conf.Get().Tracing.Cpuidle.CgDeltaSysth
usageth := conf.Get().Tracing.Cpuidle.CgUsageth
deltausageth := conf.Get().Tracing.Cpuidle.CgDeltaUsageth
step := conf.Get().Tracing.Cpuidle.CgStep
graceth := conf.Get().Tracing.Cpuidle.CgGrace
for {
select {
case <-ctx.Done():
return "", types.ErrExitByCancelCtx
case <-time.After(time.Duration(step) * time.Second):
if err := updateCPUIdleIDMap(cpuIdleIdMap); err != nil {
return "", err
}
for containerID, v := range cpuIdleIdMap {
if !v.alive {
delete(cpuIdleIdMap, containerID)
} else {
v.alive = false
currUsage, err := readCPUUsage(v.path)
if err != nil {
log.Debugf("cpuidle failed to read %s CPU usage: %s", v.path, err)
continue
}
if v.prevUsage["user"] == 0 && v.prevUsage["system"] == 0 && v.prevUsage["total"] == 0 {
v.prevUsage = currUsage
continue
}
err = calculateCPUUsage(v, currUsage)
if err != nil {
log.Debugf("cpuidle calculate err %s", err)
continue
}
v.deltaUser = int64(v.nowUsageP["cpuUser"] - v.prevUsageP["cpuUser"])
v.deltaSys = int64(v.nowUsageP["cpuSys"] - v.prevUsageP["cpuSys"])
v.prevUsageP["cpuUser"] = v.nowUsageP["cpuUser"]
v.prevUsageP["cpuSys"] = v.nowUsageP["cpuSys"]
v.prevUsage = currUsage
nowtime := time.Now().Unix()
gracetime := nowtime - v.timestamp
nowUsage := v.nowUsageP["cpuUser"] + v.nowUsageP["cpuSys"]
nowDeltaUsage := v.deltaUser + v.deltaSys
log.Debugf("cpuidle ctID %v user %v deltauser %v sys %v deltasys %v usage %v deltausage %v grace %v graceth %v",
containerID, v.nowUsageP["cpuUser"], v.deltaUser, v.nowUsageP["cpuSys"], v.deltaSys, nowUsage, nowDeltaUsage, gracetime, graceth)
if gracetime > graceth {
if (v.nowUsageP["cpuUser"] > userth && v.deltaUser > deltauserth) ||
(v.nowUsageP["cpuSys"] > systh && v.deltaSys > deltasysth) ||
(nowUsage > usageth && nowDeltaUsage > deltausageth) {
v.timestamp = nowtime
for key := range v.prevUsage {
v.prevUsage[key] = 0
}
return containerID, nil
}
}
}
}
}
}
}
type cpuIdleTracing struct{}
// Cpuidle is an instance of cpuIdleTracer
var (
tracerTime time.Time
)
type CPUIdleTracingData struct {
NowUser uint64 `json:"nowuser"`
UserThreshold uint64 `json:"userthreshold"`
DeltaUser int64 `json:"deltauser"`
DeltaUserTH int64 `json:"deltauserth"`
NowSys uint64 `json:"nowsys"`
SysThreshold uint64 `json:"systhreshold"`
DeltaSys int64 `json:"deltasys"`
DeltaSysTH int64 `json:"deltasysth"`
NowUsage uint64 `json:"nowusage"`
UsageThreshold uint64 `json:"usagethreshold"`
DeltaUsage int64 `json:"deltausage"`
DeltaUsageTH int64 `json:"deltausageth"`
FlameData []flamegraph.FrameData `json:"flamedata"`
}
// Start detect work, load bpf and wait data form perfevent
func (c *cpuIdleTracing) Start(ctx context.Context) error {
// TODO: Verify the conditions for startup.
containerID, err := cpuIdleDetect(ctx)
if err != nil {
return err
}
tracerTime = time.Now()
dur := conf.Get().Tracing.Cpuidle.CgUsageToolduration
durstr := strconv.FormatInt(dur, 10)
// exec tracerperf
cmdctx, cancel := context.WithTimeout(ctx, time.Duration(dur+30)*time.Second)
defer cancel()
log.Infof("cpuidle exec tracerperf ctid %v dur %v", containerID, durstr)
cmd := exec.CommandContext(cmdctx, "./tracer/perf.bin", "--casename", "cpuidle.o", "--container-id", containerID, "--dur", durstr)
output, err := cmd.CombinedOutput()
if err != nil {
log.Errorf("cpuidle cmd output %v", strings.TrimSuffix(string(output), "\n"))
return fmt.Errorf("cpuidle tracerperf exec err: %w", err)
}
// parse json
log.Infof("cpuidle parse json")
tracerData := CPUIdleTracingData{}
err = json.Unmarshal(output, &tracerData.FlameData)
if err != nil {
return fmt.Errorf("parse JSON err: %w", err)
}
// save
log.Infof("cpuidle upload ES")
log.Debugf("cpuidle FlameData %v", tracerData.FlameData)
tracerData.NowUser = cpuIdleIdMap[containerID].nowUsageP["cpuUser"]
tracerData.UserThreshold = conf.Get().Tracing.Cpuidle.CgUserth
tracerData.DeltaUser = cpuIdleIdMap[containerID].deltaUser
tracerData.DeltaUserTH = conf.Get().Tracing.Cpuidle.CgDeltaUserth
tracerData.NowSys = cpuIdleIdMap[containerID].nowUsageP["cpuSys"]
tracerData.SysThreshold = conf.Get().Tracing.Cpuidle.CgSysth
tracerData.DeltaSys = cpuIdleIdMap[containerID].deltaSys
tracerData.DeltaSysTH = conf.Get().Tracing.Cpuidle.CgDeltaSysth
tracerData.NowUsage = cpuIdleIdMap[containerID].nowUsageP["cpuSys"] + cpuIdleIdMap[containerID].nowUsageP["cpuUser"]
tracerData.UsageThreshold = conf.Get().Tracing.Cpuidle.CgUsageth
tracerData.DeltaUsage = cpuIdleIdMap[containerID].deltaUser + cpuIdleIdMap[containerID].deltaSys
tracerData.DeltaUsageTH = conf.Get().Tracing.Cpuidle.CgDeltaUsageth
storage.Save("cpuidle", containerID, tracerTime, &tracerData)
log.Infof("cpuidle upload ES end")
return err
}

182
core/autotracing/cpusys.go Normal file
View File

@ -0,0 +1,182 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package autotracing
import (
"context"
"encoding/json"
"fmt"
"os"
"os/exec"
"strconv"
"strings"
"time"
"huatuo-bamai/internal/conf"
"huatuo-bamai/internal/flamegraph"
"huatuo-bamai/internal/log"
"huatuo-bamai/internal/storage"
"huatuo-bamai/pkg/tracing"
"huatuo-bamai/pkg/types"
)
func init() {
tracing.RegisterEventTracing("cpusys", newCpuSys)
}
func newCpuSys() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &cpuSysTracing{},
Internal: 20,
Flag: tracing.FlagTracing,
}, nil
}
// CPUStats structure that records cpu usage
type CPUStats struct {
system uint64
total uint64
}
func CpuSysDetect(ctx context.Context) (uint64, int64, error) {
var (
percpuStats CPUStats
pervSys uint64
deltaSys int64
err error
)
sysdelta := conf.Get().Tracing.Cpusys.CPUSysDelta
sysstep := conf.Get().Tracing.Cpusys.CPUSysStep
systh := conf.Get().Tracing.Cpusys.CPUSysth
for {
select {
case <-ctx.Done():
return 0, 0, types.ErrExitByCancelCtx
case <-time.After(time.Duration(sysstep) * time.Second):
if percpuStats.total == 0 {
percpuStats, err = getCPUStats()
if err != nil {
return 0, 0, fmt.Errorf("get cpuStats err %w", err)
}
time.Sleep(1 * time.Second)
continue
}
cpuStats, err := getCPUStats()
if err != nil {
return 0, 0, err
}
systotal := cpuStats.total - percpuStats.total
if systotal == 0 {
return 0, 0, fmt.Errorf("systotal is ZERO")
}
sys := (cpuStats.system - percpuStats.system) * 100 / systotal
if pervSys != 0 {
deltaSys = int64(sys - pervSys)
}
log.Debugf("cpusys alarm sys %v pervsys %v deltasys %v", sys, pervSys, deltaSys)
pervSys = sys
percpuStats = cpuStats
if sys > systh || deltaSys > sysdelta {
return sys, deltaSys, nil
}
}
}
}
func getCPUStats() (CPUStats, error) {
statData, err := os.ReadFile("/proc/stat")
if err != nil {
return CPUStats{}, err
}
lines := strings.Split(string(statData), "\n")
for _, line := range lines {
fields := strings.Fields(line)
if len(fields) < 5 {
continue
}
if fields[0] == "cpu" {
var cpuStats CPUStats
for i := 1; i < len(fields); i++ {
value, err := strconv.ParseUint(fields[i], 10, 64)
if err != nil {
return CPUStats{}, err
}
cpuStats.total += value
if i == 3 {
cpuStats.system = value
}
}
return cpuStats, nil
}
}
return CPUStats{}, fmt.Errorf("failed to parse /proc/stat")
}
type cpuSysTracing struct{}
type CpuSysTracingData struct {
NowSys string `json:"now_sys"`
SysThreshold string `json:"sys_threshold"`
DeltaSys string `json:"delta_sys"`
DeltaSysTh string `json:"delta_sys_th"`
FlameData []flamegraph.FrameData `json:"flamedata"`
}
// Start the tcpconnlat task.
func (c *cpuSysTracing) Start(ctx context.Context) error {
// TODO: Verify the conditions for startup.
cpuSys, delta, err := CpuSysDetect(ctx)
if err != nil {
return err
}
tracerTime := time.Now()
dur := conf.Get().Tracing.Cpusys.CPUSysToolduration
durstr := strconv.FormatInt(dur, 10)
// exec tracerperf
cmdctx, cancel := context.WithTimeout(ctx, time.Duration(dur+30)*time.Second)
defer cancel()
log.Infof("cpusys exec tracerperf dur %v", durstr)
cmd := exec.CommandContext(cmdctx, "./tracer/perf.bin", "--casename", "cpusys.o", "--dur", durstr)
output, err := cmd.CombinedOutput()
if err != nil {
log.Errorf("cpusys cmd output %v", strings.TrimSuffix(string(output), "\n"))
return fmt.Errorf("cpusys tracerperf exec err: %w", err)
}
// parse json
log.Infof("cpusys parse json")
tracerData := CpuSysTracingData{}
err = json.Unmarshal(output, &tracerData.FlameData)
if err != nil {
return fmt.Errorf("parse JSON err: %w", err)
}
// save
log.Infof("cpusys upload ES")
tracerData.NowSys = fmt.Sprintf("%d", cpuSys)
tracerData.SysThreshold = fmt.Sprintf("%d", conf.Get().Tracing.Cpusys.CPUSysth)
tracerData.DeltaSys = fmt.Sprintf("%d", delta)
tracerData.DeltaSysTh = fmt.Sprintf("%d", conf.Get().Tracing.Cpusys.CPUSysDelta)
storage.Save("cpusys", "", tracerTime, &tracerData)
log.Infof("cpusys upload ES end")
return err
}

427
core/autotracing/dload.go Normal file
View File

@ -0,0 +1,427 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package autotracing
import (
"bufio"
"bytes"
"context"
"fmt"
"io"
"os"
"strconv"
"strings"
"time"
"huatuo-bamai/internal/conf"
"huatuo-bamai/internal/log"
"huatuo-bamai/internal/pod"
"huatuo-bamai/internal/storage"
"huatuo-bamai/internal/utils/cgrouputil"
"huatuo-bamai/pkg/tracing"
"huatuo-bamai/pkg/types"
"github.com/google/cadvisor/utils/cpuload/netlink"
"github.com/prometheus/procfs"
"github.com/shirou/gopsutil/process"
)
func init() {
tracing.RegisterEventTracing("dload", newDload)
}
func newDload() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &dloadTracing{},
Internal: 30,
Flag: tracing.FlagTracing,
}, nil
}
type containerDloadInfo struct {
path string
name string
container *pod.Container
avgnrun [2]uint64
load [2]float64
avgnuni [2]uint64
loaduni [2]float64
alive bool
}
type DloadTracingData struct {
Avg float64 `json:"avg"`
Threshold float64 `json:"threshold"`
NrSleeping uint64 `json:"nr_sleeping"`
NrRunning uint64 `json:"nr_running"`
NrStopped uint64 `json:"nr_stopped"`
NrUninterruptible uint64 `json:"nr_uninterruptible"`
NrIoWait uint64 `json:"nr_iowait"`
LoadAvg float64 `json:"load_avg"`
DLoadAvg float64 `json:"dload_avg"`
KnowIssue string `json:"known_issue"`
InKnownList uint64 `json:"in_known_list"`
Stack string `json:"stack"`
}
func getStack(targetPid int32) (string, error) {
procStack := "/proc/" + strconv.Itoa(int(targetPid)) + "/stack"
content, err := os.ReadFile(procStack)
if err != nil {
log.Infof("%v", err)
return "", err
}
return string(content), nil
}
const (
isHost = 1
isCgrp = 2
)
func getUnTaskList(cgrpPath string, infoType int) ([]int32, error) {
var pidList []int32
var err error
if infoType == isCgrp {
taskPath := cgrpPath + "/tasks"
tskfi, err := os.Open(taskPath)
if err != nil {
log.Infof("%v", err)
return nil, err
}
r := bufio.NewReader(tskfi)
for {
lineBytes, err := r.ReadBytes('\n')
line := strings.TrimSpace(string(lineBytes))
if err != nil && err != io.EOF {
log.Infof("fail to read tasklist: %v", err)
break
}
if err == io.EOF {
break
}
pid, _ := strconv.ParseInt(line, 10, 32)
pidList = append(pidList, int32(pid))
}
} else {
procs, err := procfs.AllProcs()
if err != nil {
log.Infof("%v", err)
return nil, err
}
for _, p := range procs {
pidList = append(pidList, int32(p.PID))
}
}
return pidList, err
}
func dumpUnTaskStack(tskList []int32, dumpType int) (string, error) {
var infoTitle string
var getValidStackinfo bool = false
var strResult string = ""
stackInfo := new(bytes.Buffer)
switch dumpType {
case isHost:
infoTitle = "\nbacktrace of D process in Host:\n"
case isCgrp:
infoTitle = "\nbacktrace of D process in Cgroup:\n"
}
for _, pid := range tskList {
proc, err := process.NewProcess(pid)
if err != nil {
log.Debugf("fail to get process %d: %v", pid, err)
continue
}
status, err := proc.Status()
if err != nil {
log.Debugf("fail to get status %d: %v", pid, err)
continue
}
if status == "D" || status == "U" {
comm, err := proc.Name()
if err != nil {
log.Infof("%v", err)
continue
}
stack, err := getStack(pid)
if err != nil {
log.Infof("%v", err)
continue
}
if stack == "" {
continue
}
fmt.Fprintf(stackInfo, "Comm: %s\tPid: %d\n%s\n", comm, pid, stack)
getValidStackinfo = true
}
}
if getValidStackinfo {
strResult = fmt.Sprintf("%s%s", infoTitle, stackInfo)
}
return strResult, nil
}
// dloadIDMap is the container information
type dloadIDMap map[string]*containerDloadInfo
var dloadIdMap = make(dloadIDMap)
func updateIDMap(m dloadIDMap) error {
containers, err := pod.GetAllContainers()
if err != nil {
return fmt.Errorf("GetAllContainers: %w", err)
}
for _, container := range containers {
if _, ok := m[container.ID]; ok {
m[container.ID].name = container.CgroupSuffix
m[container.ID].path = cgrouputil.NewCPU().Path(container.CgroupSuffix)
m[container.ID].container = container
m[container.ID].alive = true
continue
}
m[container.ID] = &containerDloadInfo{
path: cgrouputil.NewCPU().Path(container.CgroupSuffix),
name: container.CgroupSuffix,
container: container,
alive: true,
}
}
return nil
}
const (
fShift = 11
fixed1 = 1 << fShift
exp1 = 1884
exp5 = 2014
exp15 = 2037
)
func calcLoad(load, exp, active uint64) uint64 {
var newload uint64
newload = load*exp + active*(fixed1-exp)
newload += 1 << (fShift - 1)
return newload / fixed1
}
func calcLoadavg(avgnrun [2]uint64, active uint64) (avgnresult [2]uint64) {
if active > 0 {
active *= fixed1
} else {
active = 0
}
avgnresult[0] = calcLoad(avgnrun[0], exp1, active)
avgnresult[1] = calcLoad(avgnrun[1], exp5, active)
return avgnresult
}
func loadInt(x uint64) (r uint64) {
r = x >> fShift
return r
}
func loadFrac(x uint64) (r uint64) {
r = loadInt((x & (fixed1 - 1)) * 100)
return r
}
func getAvenrun(avgnrun [2]uint64, offset uint64, shift int) (loadavgNew [2]float64) {
var loads [2]uint64
loads[0] = (avgnrun[0] + offset) << shift
loads[1] = (avgnrun[1] + offset) << shift
loadavgNew[0] = float64(loadInt(loads[0])) +
float64(loadFrac(loads[0]))/float64(100)
loadavgNew[1] = float64(loadInt(loads[1])) +
float64(loadFrac(loads[1]))/float64(100)
return loadavgNew
}
func updateLoad(info *containerDloadInfo, nrRunning, nrUninterruptible uint64) {
info.avgnrun = calcLoadavg(info.avgnrun, nrRunning+nrUninterruptible)
info.load = getAvenrun(info.avgnrun, fixed1/200, 0)
info.avgnuni = calcLoadavg(info.avgnuni, nrUninterruptible)
info.loaduni = getAvenrun(info.avgnuni, fixed1/200, 0)
}
func detect(ctx context.Context) (*containerDloadInfo, string, *DloadTracingData, error) {
var caseData DloadTracingData
n, err := netlink.New()
if err != nil {
log.Infof("Failed to create cpu load util: %s", err)
return nil, "", nil, err
}
defer n.Stop()
dloadThresh := conf.Get().Tracing.Dload.ThresholdLoad
monitorGap := conf.Get().Tracing.Dload.MonitorGap
for {
select {
case <-ctx.Done():
return nil, "", nil, types.ErrExitByCancelCtx
default:
if err := updateIDMap(dloadIdMap); err != nil {
return nil, "", nil, err
}
for k, v := range dloadIdMap {
if !v.alive {
delete(dloadIdMap, k)
} else {
v.alive = false
timeStartMonitor := v.container.StartedAt.Add(time.Second * time.Duration(monitorGap))
if time.Now().Before(timeStartMonitor) {
log.Debugf("%s were just started, we'll start monitoring it later.", v.container.Hostname)
continue
}
stats, err := n.GetCpuLoad(v.name, v.path)
if err != nil {
log.Debugf("failed to get %s load, probably the container has been deleted: %s", v.container.Hostname, err)
continue
}
updateLoad(v, stats.NrRunning, stats.NrUninterruptible)
if v.loaduni[0] > dloadThresh {
logTitle := fmt.Sprintf("Avg=%0.2f Threshold=%0.2f %+v ", v.loaduni[0], dloadThresh, stats)
logBody := fmt.Sprintf("LoadAvg=%0.2f, DLoadAvg=%0.2f", v.load[0], v.loaduni[0])
logLoad := fmt.Sprintf("%s%s", logTitle, logBody)
log.Infof("dload event %s", logLoad)
caseData.Avg = v.loaduni[0]
caseData.Threshold = dloadThresh
caseData.NrSleeping = stats.NrSleeping
caseData.NrRunning = stats.NrRunning
caseData.NrStopped = stats.NrStopped
caseData.NrUninterruptible = stats.NrUninterruptible
caseData.NrIoWait = stats.NrIoWait
caseData.LoadAvg = v.load[0]
caseData.DLoadAvg = v.loaduni[0]
return v, logLoad, &caseData, err
}
}
}
time.Sleep(5 * time.Second)
}
}
}
func dumpInfo(info *containerDloadInfo, logLoad string, caseData *DloadTracingData) error {
var tskList []int32
var err error
var stackCgrp string
var stackHost string
var containerHostNamespace string
cgrpPath := info.path
containerID := info.container.ID
containerHostNamespace = info.container.LabelHostNamespace()
tskList, err = getUnTaskList(cgrpPath, isCgrp)
if err != nil {
return fmt.Errorf("failed to get cgroup task list: %w", err)
}
stackCgrp, err = dumpUnTaskStack(tskList, isCgrp)
if err != nil {
return fmt.Errorf("failed to dump cgroup task backtrace: %w", err)
}
tskList, err = getUnTaskList("", isHost)
if err != nil {
return fmt.Errorf("failed to get host task list: %w", err)
}
stackHost, err = dumpUnTaskStack(tskList, isHost)
if err != nil {
return fmt.Errorf("failed to dump host task backtrace: %w", err)
}
// We'll not record it if got no cgroup stack info.
if stackCgrp == "" {
return nil
}
// Check if this is caused by known issues.
knownIssue, inKnownList := conf.KnownIssueSearch(stackCgrp, containerHostNamespace, "")
if knownIssue != "" {
caseData.KnowIssue = knownIssue
caseData.InKnownList = inKnownList
} else {
caseData.KnowIssue = "none"
caseData.InKnownList = inKnownList
}
// save storage
caseData.Stack = fmt.Sprintf("%s%s", stackCgrp, stackHost)
storage.Save("ctnDLoad", containerID, time.Now(), caseData)
return nil
}
type dloadTracing struct{}
// Start detect work, monitor the load of containers
func (c *dloadTracing) Start(ctx context.Context) error {
cntInfo, logLoad, caseData, err := detect(ctx)
if err != nil {
return err
}
select {
case <-ctx.Done():
log.Infof("caller requests stop !!!")
default:
err = dumpInfo(cntInfo, logLoad, caseData)
if err != nil {
return fmt.Errorf("failed to dump info: %w", err)
}
}
return err
}

View File

@ -0,0 +1,250 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package autotracing
import (
"bufio"
"context"
"os"
"sort"
"strconv"
"strings"
"time"
"huatuo-bamai/internal/conf"
"huatuo-bamai/internal/log"
"huatuo-bamai/internal/storage"
"huatuo-bamai/pkg/tracing"
"github.com/shirou/gopsutil/process"
)
func init() {
tracing.RegisterEventTracing("membust", newMemBurst)
}
func newMemBurst() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &memBurstTracing{},
Internal: 10,
Flag: tracing.FlagTracing,
}, nil
}
type memBurstTracing struct{}
type MemoryTracingData struct {
TopMemoryUsage []ProcessMemoryInfo `json:"top_memory_usage"`
}
// ProcessMemoryInfo holds process information for sorting
type ProcessMemoryInfo struct {
PID int32 `json:"pid"`
ProcessName string `json:"process_name"`
MemorySize uint64 `json:"memory_size"`
}
// ByMemory is used to sorting processes by memory usage
type ByMemory []ProcessMemoryInfo
func (a ByMemory) Len() int { return len(a) }
func (a ByMemory) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a ByMemory) Less(i, j int) bool { return a[i].MemorySize > a[j].MemorySize }
// getTopMemoryProcesses returns the top N processes consuming the most memory.
func getTopMemoryProcesses(topN int) ([]ProcessMemoryInfo, error) {
processes, err := process.Processes()
if err != nil {
return nil, err
}
var pmInfos []ProcessMemoryInfo
for _, p := range processes {
memInfo, err := p.MemoryInfo()
if err != nil {
continue
}
name, err := p.Name()
if err != nil {
continue
}
pmInfos = append(pmInfos, ProcessMemoryInfo{
PID: p.Pid,
ProcessName: name,
MemorySize: memInfo.RSS,
})
}
// Sort the processes by memory usage
sort.Sort(ByMemory(pmInfos))
if len(pmInfos) < topN {
return pmInfos, nil
}
return pmInfos[:topN], nil
}
// pass required keys and readMemInfo will return their values according to /proc/meminfo
func readMemInfo(requiredKeys map[string]bool) (map[string]int, error) {
file, err := os.Open("/proc/meminfo")
if err != nil {
return nil, err
}
defer file.Close()
results := make(map[string]int)
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
fields := strings.Fields(line)
if len(fields) < 2 {
continue
}
key := strings.Trim(fields[0], ":")
if _, ok := requiredKeys[key]; ok {
value, err := strconv.Atoi(strings.Trim(fields[1], " kB"))
if err != nil {
return nil, err
}
results[key] = value
if len(results) == len(requiredKeys) {
break
}
}
}
if err := scanner.Err(); err != nil {
return nil, err
}
return results, nil
}
func checkAndRecordMemoryUsage(currentIndex *int, isHistoryFull *bool,
memTotal int, history []int, historyWindowLength, topNProcesses int,
burstRatio float64, anonThreshold int,
) ([]ProcessMemoryInfo, error) {
memInfo, err := readMemInfo(map[string]bool{
"Active(anon)": true,
"Inactive(anon)": true,
})
if err != nil {
log.Errorf("Error reading memory info: %v\n", err)
return []ProcessMemoryInfo{}, nil
}
currentSum := memInfo["Active(anon)"] + memInfo["Inactive(anon)"]
history[*currentIndex] = currentSum
if *currentIndex == historyWindowLength-1 {
*isHistoryFull = true
}
*currentIndex = (*currentIndex + 1) % historyWindowLength
log.Debugf("Checked memory status. active_anon=%v KiB inactive_anon=%v KiB\n", memInfo["Active(anon)"], memInfo["Inactive(anon)"])
if *isHistoryFull {
oldestSum := history[*currentIndex] // current index is the oldest element
if float64(currentSum) >= burstRatio*float64(oldestSum) && currentSum >= (anonThreshold*memTotal/100) {
topProcesses, err := getTopMemoryProcesses(topNProcesses)
if err == nil {
return topProcesses, nil
}
log.Errorf("Fail to getTopMemoryProcesses")
return []ProcessMemoryInfo{}, err
}
}
return []ProcessMemoryInfo{}, nil
}
// Core function
func (c *memBurstTracing) Start(ctx context.Context) error {
var err error
historyWindowLength := conf.Get().Tracing.MemoryBurst.HistoryWindowLength
sampleInterval := conf.Get().Tracing.MemoryBurst.SampleInterval
silencePeriod := conf.Get().Tracing.MemoryBurst.SilencePeriod
topNProcesses := conf.Get().Tracing.MemoryBurst.TopNProcesses
burstRatio := conf.Get().Tracing.MemoryBurst.BurstRatio
anonThreshold := conf.Get().Tracing.MemoryBurst.AnonThreshold
memInfo, err := readMemInfo(map[string]bool{"MemTotal": true})
if err != nil {
log.Infof("Error reading MemTotal from memory info: %v\n", err)
return err
}
memTotal := memInfo["MemTotal"]
history := make([]int, historyWindowLength) // circular buffer
var currentIndex int
var isHistoryFull bool // don't check memory burst until we have enough data
var topProcesses []ProcessMemoryInfo
lastReportTime := time.Now().Add(-24 * time.Hour)
_, err = checkAndRecordMemoryUsage(&currentIndex, &isHistoryFull, memTotal, history, historyWindowLength, topNProcesses, burstRatio, anonThreshold)
if err != nil {
log.Errorf("Fail to checkAndRecordMemoryUsage")
return err
}
for {
ticker := time.NewTicker(time.Duration(sampleInterval) * time.Second)
stoppedByUser := false
for range ticker.C {
topProcesses, err = checkAndRecordMemoryUsage(&currentIndex, &isHistoryFull, memTotal, history, historyWindowLength, topNProcesses, burstRatio, anonThreshold)
if err != nil {
log.Errorf("Fail to checkAndRecordMemoryUsage")
return err
}
select {
case <-ctx.Done():
log.Info("Caller request to stop")
stoppedByUser = true
default:
}
if len(topProcesses) > 0 || stoppedByUser {
break
}
}
ticker.Stop()
if stoppedByUser {
break
}
currentTime := time.Now()
diff := currentTime.Sub(lastReportTime).Seconds()
if diff < float64(silencePeriod) {
continue
}
lastReportTime = currentTime
// save storage
caseData := &MemoryTracingData{
TopMemoryUsage: topProcesses,
}
storage.Save("memburst", "", time.Now(), caseData)
}
return nil
}

264
core/events/dropwatch.go Normal file
View File

@ -0,0 +1,264 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package events
import (
"context"
"fmt"
"net"
"strings"
"time"
"huatuo-bamai/internal/bpf"
"huatuo-bamai/internal/conf"
"huatuo-bamai/internal/log"
"huatuo-bamai/internal/storage"
"huatuo-bamai/internal/utils/bpfutil"
"huatuo-bamai/internal/utils/netutil"
"huatuo-bamai/internal/utils/symbolutil"
"huatuo-bamai/pkg/tracing"
)
const (
tracerName = "dropwatch"
logPrefix = tracerName + ": "
// type
typeTCPCommonDrop = 1
typeTCPSynFlood = 2
typeTCPListenOverflowHandshake1 = 3
typeTCPListenOverflowHandshake3 = 4
)
// from include/net/tcp_states.h
var tcpstateMap = []string{
"<nil>", // 0
"ESTABLISHED",
"SYN_SENT",
"SYN_RECV",
"FIN_WAIT1",
"FIN_WAIT2",
"TIME_WAIT",
"CLOSE",
"CLOSE_WAIT",
"LAST_ACK",
"LISTEN",
"CLOSING",
"NEW_SYN_RECV",
}
var typeMap = map[uint8]string{
typeTCPCommonDrop: "common_drop",
typeTCPSynFlood: "syn_flood",
typeTCPListenOverflowHandshake1: "listen_overflow_handshake1",
typeTCPListenOverflowHandshake3: "listen_overflow_handshake3",
}
type perfEventT struct {
TgidPid uint64 `json:"tgid_pid"`
Saddr uint32 `json:"saddr"`
Daddr uint32 `json:"daddr"`
Sport uint16 `json:"sport"`
Dport uint16 `json:"dport"`
Seq uint32 `json:"seq"`
AckSeq uint32 `json:"ack_seq"`
QueueMapping uint32 `json:"queue_mapping"`
PktLen uint64 `json:"pkt_len"`
StackSize int64 `json:"stack_size"`
Stack [symbolutil.KsymbolStackMaxDepth]uint64 `json:"stack"`
SkMaxAckBacklog uint32 `json:"sk_max_ack_backlog"`
State uint8 `json:"state"`
Type uint8 `json:"type"`
Comm [bpfutil.TaskCommLen]byte `json:"comm"`
}
type DropWatchTracingData struct {
Type string `json:"type"`
Comm string `json:"comm"`
Pid uint64 `json:"pid"`
Saddr string `json:"saddr"`
Daddr string `json:"daddr"`
Sport uint16 `json:"sport"`
Dport uint16 `json:"dport"`
SrcHostname string `json:"src_hostname"`
DestHostname string `json:"dest_hostname"`
MaxAckBacklog uint32 `json:"max_ack_backlog"`
Seq uint32 `json:"seq"`
AckSeq uint32 `json:"ack_seq"`
QueueMapping uint32 `json:"queue_mapping"`
PktLen uint64 `json:"pkt_len"`
State string `json:"state"`
Stack string `json:"stack"`
}
type dropWatchTracing struct{}
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/dropwatch.c -o $BPF_DIR/dropwatch.o
func init() {
tracing.RegisterEventTracing(tracerName, newDropWatch)
}
func newDropWatch() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &dropWatchTracing{},
Internal: 10,
Flag: tracing.FlagTracing,
}, nil
}
// Start starts the tracer.
func (c *dropWatchTracing) Start(ctx context.Context) error {
log.Info(logPrefix + "tracer will be starting.")
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
if err != nil {
return fmt.Errorf(logPrefix+"failed to load bpf: %w", err)
}
defer b.Close()
childCtx, cancel := context.WithCancel(ctx)
defer cancel()
// attach
reader, err := b.AttachAndEventPipe(childCtx, "perf_events", 8192)
if err != nil {
return fmt.Errorf(logPrefix+"failed to attach and event pipe: %w", err)
}
defer reader.Close()
// breaker
b.WaitDetachByBreaker(childCtx, cancel)
log.Info(logPrefix + "tracer is waitting for event.")
for {
select {
case <-childCtx.Done():
log.Info(logPrefix + "tracer is stopped.")
return nil
default:
var event perfEventT
if err := reader.ReadInto(&event); err != nil {
return fmt.Errorf(logPrefix+"failed to read from perf: %w", err)
}
// format
tracerTime := time.Now()
tracerData := c.formatEvent(&event)
// ignore
if c.ignore(tracerData) {
log.Debugf(logPrefix+"ignore dropwatch data: %v", tracerData)
continue
}
// save storage
storage.Save(tracerName, "", tracerTime, tracerData)
}
}
}
func (c *dropWatchTracing) formatEvent(event *perfEventT) *DropWatchTracingData {
// hostname
saddr := netutil.InetNtop(event.Saddr).String()
daddr := netutil.InetNtop(event.Daddr).String()
srcHostname := "<nil>"
destHostname := "<nil>"
h, err := net.LookupAddr(saddr)
if err == nil && len(h) > 0 {
srcHostname = h[0]
}
h, err = net.LookupAddr(daddr)
if err == nil && len(h) > 0 {
destHostname = h[0]
}
// stack
stacks := strings.Join(symbolutil.DumpKernelBackTrace(event.Stack[:], symbolutil.KsymbolStackMaxDepth).BackTrace, "\n")
// tracer data
data := &DropWatchTracingData{
Type: typeMap[event.Type],
Comm: strings.TrimRight(string(event.Comm[:]), "\x00"),
Pid: event.TgidPid >> 32,
Saddr: saddr,
Daddr: daddr,
Sport: netutil.InetNtohs(event.Sport),
Dport: netutil.InetNtohs(event.Dport),
SrcHostname: srcHostname,
DestHostname: destHostname,
Seq: netutil.InetNtohl(event.Seq),
AckSeq: netutil.InetNtohl(event.AckSeq),
QueueMapping: event.QueueMapping,
PktLen: event.PktLen,
State: tcpstateMap[event.State],
Stack: stacks,
MaxAckBacklog: event.SkMaxAckBacklog,
}
log.Debugf(logPrefix+"tracing data: %v", data)
return data
}
func (c *dropWatchTracing) ignore(data *DropWatchTracingData) bool {
stack := strings.Split(data.Stack, "\n")
// state: CLOSE_WAIT
// stack:
// 1. kfree_skb/ffffffff963047b0
// 2. kfree_skb/ffffffff963047b0
// 3. skb_rbtree_purge/ffffffff963089e0
// 4. tcp_fin/ffffffff963ac200
// 5. ...
if data.State == "CLOSE_WAIT" {
if len(stack) >= 3 && strings.HasPrefix(stack[2], "skb_rbtree_purge/") {
return true
}
}
// stack:
// 1. kfree_skb/ffffffff96d127b0
// 2. kfree_skb/ffffffff96d127b0
// 3. neigh_invalidate/ffffffff96d388b0
// 4. neigh_timer_handler/ffffffff96d3a870
// 5. ...
if conf.Get().Tracing.Dropwatch.IgnoreNeighInvalidate {
if len(stack) >= 3 && strings.HasPrefix(stack[2], "neigh_invalidate/") {
return true
}
}
// stack:
// 1. kfree_skb/ffffffff82283d10
// 2. kfree_skb/ffffffff82283d10
// 3. bnxt_tx_int/ffffffffc05c6f20
// 4. __bnxt_poll_work_done/ffffffffc05c50c0
// 5. ...
// stack:
// 1. kfree_skb/ffffffffaba83d10
// 2. kfree_skb/ffffffffaba83d10
// 3. __bnxt_tx_int/ffffffffc045df90
// 4. bnxt_tx_int/ffffffffc045e250
// 5. ...
if len(stack) >= 3 &&
(strings.HasPrefix(stack[2], "bnxt_tx_int/") || strings.HasPrefix(stack[2], "__bnxt_tx_int/")) {
return true
}
// default: false
return false
}

126
core/events/hungtask.go Normal file
View File

@ -0,0 +1,126 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package events
import (
"context"
"fmt"
"strings"
"time"
"huatuo-bamai/internal/bpf"
"huatuo-bamai/internal/log"
"huatuo-bamai/internal/storage"
"huatuo-bamai/internal/utils/bpfutil"
"huatuo-bamai/internal/utils/kmsgutil"
"huatuo-bamai/pkg/metric"
"huatuo-bamai/pkg/tracing"
)
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/hungtask.c -o $BPF_DIR/hungtask.o
type hungTaskPerfEventData struct {
Pid int32
Comm [bpfutil.TaskCommLen]byte
}
// HungTaskTracerData is the full data structure.
type HungTaskTracerData struct {
Pid int32 `json:"pid"`
Comm string `json:"comm"`
CPUsStack string `json:"cpus_stack"`
BlockedProcessesStack string `json:"blocked_processes_stack"`
}
type hungTaskTracing struct {
hungtaskMetric []*metric.Data
}
func init() {
tracing.RegisterEventTracing("hungtask", newHungTask)
}
func newHungTask() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &hungTaskTracing{
hungtaskMetric: []*metric.Data{
metric.NewGaugeData("happened", 0, "hungtask happened", nil),
},
},
Internal: 10,
Flag: tracing.FlagMetric | tracing.FlagTracing,
}, nil
}
var hungtaskCounter float64
func (c *hungTaskTracing) Update() ([]*metric.Data, error) {
c.hungtaskMetric[0].Value = hungtaskCounter
hungtaskCounter = 0
return c.hungtaskMetric, nil
}
func (c *hungTaskTracing) Start(ctx context.Context) error {
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
if err != nil {
log.Infof("failed to LoadBpf, err: %v", err)
return err
}
defer b.Close()
childCtx, cancel := context.WithCancel(ctx)
defer cancel()
reader, err := b.AttachAndEventPipe(childCtx, "hungtask_perf_events", 8192)
if err != nil {
log.Infof("failed to AttachAndEventPipe, err: %v", err)
return err
}
defer reader.Close()
b.WaitDetachByBreaker(childCtx, cancel)
for {
select {
case <-childCtx.Done():
return nil
default:
var data hungTaskPerfEventData
if err := reader.ReadInto(&data); err != nil {
return fmt.Errorf("ReadFromPerfEvent fail: %w", err)
}
cpusBT, err := kmsgutil.GetAllCPUsBT()
if err != nil {
cpusBT = err.Error()
}
blockedProcessesBT, err := kmsgutil.GetBlockedProcessesBT()
if err != nil {
blockedProcessesBT = err.Error()
}
caseData := &HungTaskTracerData{
Pid: data.Pid,
Comm: strings.TrimRight(string(data.Comm[:]), "\x00"),
CPUsStack: cpusBT,
BlockedProcessesStack: blockedProcessesBT,
}
hungtaskCounter++
// save storage
storage.Save("hungtask", "", time.Now(), caseData)
}
}
}

147
core/events/lacp.go Normal file
View File

@ -0,0 +1,147 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package events
import (
"context"
"fmt"
"os"
"path/filepath"
"sync/atomic"
"time"
"huatuo-bamai/internal/bpf"
"huatuo-bamai/internal/log"
"huatuo-bamai/internal/storage"
"huatuo-bamai/internal/utils/bpfutil"
"huatuo-bamai/pkg/metric"
"huatuo-bamai/pkg/tracing"
"github.com/vishvananda/netlink"
)
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/lacp.c -o $BPF_DIR/lacp.o
type lacpTracing struct {
count uint64
}
func init() {
// bond mode4 (802.3ad) requires bonding.ko module,
// the kprobe point is in bonding module, if not exist, should not load bpf
if !isLacpEnv() {
return
}
tracing.RegisterEventTracing("lacp", newLACPTracing)
}
func newLACPTracing() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &lacpTracing{},
Internal: 60,
Flag: tracing.FlagTracing | tracing.FlagMetric,
}, nil
}
func (lacp *lacpTracing) Start(ctx context.Context) (err error) {
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
if err != nil {
return fmt.Errorf("Load lacp err: %w", err)
}
defer b.Close()
childCtx, cancel := context.WithCancel(ctx)
defer cancel()
reader, err := b.AttachAndEventPipe(childCtx, "ad_event_map", 8192)
if err != nil {
return fmt.Errorf("failed to AttachAndEventPipe, err: %w", err)
}
defer reader.Close()
for {
select {
case <-childCtx.Done():
log.Info("lacp tracing is stopped.")
return nil
default:
var tmp uint64
if err := reader.ReadInto(&tmp); err != nil {
return fmt.Errorf("read lacp perf event fail: %w", err)
}
atomic.AddUint64(&lacp.count, 1)
bondInfo, err := readAllFiles("/proc/net/bonding")
if err != nil {
log.Warnf("read dir /proc/net/bonding err: %v", err)
continue
}
tracerData := struct {
Content string `json:"content"`
}{
Content: bondInfo,
}
log.Debugf("bond info: %s", tracerData.Content)
storage.Save("lacp", "", time.Now(), tracerData)
}
}
}
func (lacp *lacpTracing) Update() ([]*metric.Data, error) {
return []*metric.Data{
metric.NewGaugeData("lacp", float64(atomic.LoadUint64(&lacp.count)),
"lacp disabled count", nil),
}, nil
}
func readAllFiles(dir string) (string, error) {
var content string
return content, filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() {
return nil
}
data, err := os.ReadFile(path)
if err != nil {
return err
}
content += path + "\n" + string(data)
return nil
})
}
func isLacpEnv() bool {
links, err := netlink.LinkList()
if err != nil {
return false
}
for _, l := range links {
if l.Type() == "bond" &&
l.(*netlink.Bond).Mode == netlink.BOND_MODE_802_3AD {
return true
}
}
return false
}

View File

@ -0,0 +1,121 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package events
import (
"context"
"fmt"
"strings"
"time"
"huatuo-bamai/internal/bpf"
"huatuo-bamai/internal/conf"
"huatuo-bamai/internal/log"
"huatuo-bamai/internal/pod"
"huatuo-bamai/internal/storage"
"huatuo-bamai/internal/utils/bpfutil"
"huatuo-bamai/pkg/tracing"
)
type memoryReclaimTracing struct{}
type memoryReclaimPerfEvent struct {
Comm [bpfutil.TaskCommLen]byte
Deltatime uint64
CSS uint64
Pid uint64
}
// MemoryReclaimTracingData is the full data structure.
type MemoryReclaimTracingData struct {
Pid uint64 `json:"pid"`
Comm string `json:"comm"`
Deltatime uint64 `json:"deltatime"`
}
func init() {
tracing.RegisterEventTracing("memreclaim", newMemoryReclaim)
}
func newMemoryReclaim() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &memoryReclaimTracing{},
Internal: 5,
Flag: tracing.FlagTracing,
}, nil
}
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/memory_reclaim.c -o $BPF_DIR/memory_reclaim.o
// Start detect work, load bpf and wait data form perfevent
func (c *memoryReclaimTracing) Start(ctx context.Context) error {
log.Infof("memory reclaim start.")
deltath := conf.Get().Tracing.MemoryReclaim.Deltath
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), map[string]any{"deltath": deltath})
if err != nil {
log.Infof("LoadBpf: %v", err)
return err
}
defer b.Close()
childCtx, cancel := context.WithCancel(ctx)
defer cancel()
reader, err := b.AttachAndEventPipe(childCtx, "reclaim_perf_events", 8192)
if err != nil {
log.Infof("AttachAndEventPipe: %v", err)
return err
}
defer reader.Close()
b.WaitDetachByBreaker(childCtx, cancel)
for {
select {
case <-childCtx.Done():
return nil
default:
var data memoryReclaimPerfEvent
if err := reader.ReadInto(&data); err != nil {
return fmt.Errorf("ReadFromPerfEvent fail: %w", err)
}
container, err := pod.GetContainerByCSS(data.CSS, "cpu")
if err != nil {
return fmt.Errorf("GetContainerByCSS by CSS %d: %w", data.CSS, err)
}
// We only care about the container and nothing else.
// Though it may be unfair, that's just how life is.
//
// -- Tonghao Zhang, tonghao@bamaicloud.com
if container == nil {
continue
}
// save storage
tracingData := &MemoryReclaimTracingData{
Pid: data.Pid,
Comm: strings.Trim(string(data.Comm[:]), "\x00"),
Deltatime: data.Deltatime,
}
log.Infof("memory_reclaim saves storage: %+v", tracingData)
storage.Save("memory_reclaim", container.ID, time.Now(), tracingData)
}
}
}

229
core/events/netdev.go Normal file
View File

@ -0,0 +1,229 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package events
import (
"context"
"fmt"
"slices"
"sync"
"time"
"huatuo-bamai/internal/conf"
"huatuo-bamai/internal/log"
"huatuo-bamai/internal/storage"
"huatuo-bamai/pkg/metric"
"huatuo-bamai/pkg/tracing"
"github.com/vishvananda/netlink"
"golang.org/x/sys/unix"
)
type linkStatusType uint8
const (
linkStatusUnknown linkStatusType = iota
linkStatusAdminUp
linkStatusAdminDown
linkStatusCarrierUp
linkStatusCarrierDown
maxLinkStatus
)
func (l linkStatusType) String() string {
return [...]string{"linkStatusUnknown", "linkStatusAdminUp", "linkStatusAdminDown", "linkStatusCarrierUp", "linkStatusCarrierDown"}[l]
}
func flags2status(flags, change uint32) []linkStatusType {
var status []linkStatusType
if change&unix.IFF_UP != 0 {
if flags&unix.IFF_UP != 0 {
status = append(status, linkStatusAdminUp)
} else {
status = append(status, linkStatusAdminDown)
}
}
if change&unix.IFF_LOWER_UP != 0 {
if flags&unix.IFF_LOWER_UP != 0 {
status = append(status, linkStatusCarrierUp)
} else {
status = append(status, linkStatusCarrierDown)
}
}
return status
}
type netdevTracing struct {
name string
linkUpdateCh chan netlink.LinkUpdate
linkDoneCh chan struct{}
mu sync.Mutex
ifFlagsMap map[string]uint32 // [ifname]ifinfomsg::if_flags
metricsLinkStatusCountMap map[linkStatusType]map[string]int // [netdevEventType][ifname]count
}
type netdevEventData struct {
linkFlags uint32
flagsChange uint32
Ifname string `json:"ifname"`
Index int `json:"index"`
LinkStatus string `json:"linkstatus"`
Mac string `json:"mac"`
AtStart bool `json:"start"` // true: be scanned at start, false: event trigger
}
func init() {
tracing.RegisterEventTracing("netdev_event", newNetdevTracing)
}
func newNetdevTracing() (*tracing.EventTracingAttr, error) {
initMap := make(map[linkStatusType]map[string]int)
for i := linkStatusUnknown; i < maxLinkStatus; i++ {
initMap[i] = make(map[string]int)
}
return &tracing.EventTracingAttr{
TracingData: &netdevTracing{
linkUpdateCh: make(chan netlink.LinkUpdate),
linkDoneCh: make(chan struct{}),
ifFlagsMap: make(map[string]uint32),
metricsLinkStatusCountMap: initMap,
name: "netdev_event",
},
Internal: 10,
Flag: tracing.FlagTracing | tracing.FlagMetric,
}, nil
}
func (nt *netdevTracing) Start(ctx context.Context) (err error) {
if err := nt.checkLinkStatus(); err != nil {
return err
}
if err := netlink.LinkSubscribe(nt.linkUpdateCh, nt.linkDoneCh); err != nil {
return err
}
defer nt.close()
for {
update, ok := <-nt.linkUpdateCh
if !ok {
return nil
}
switch update.Header.Type {
case unix.NLMSG_ERROR:
return fmt.Errorf("NLMSG_ERROR")
case unix.RTM_NEWLINK:
ifname := update.Link.Attrs().Name
if _, ok := nt.ifFlagsMap[ifname]; !ok {
// new interface
continue
}
nt.handleEvent(&update)
}
}
}
// Update implement Collector
func (nt *netdevTracing) Update() ([]*metric.Data, error) {
nt.mu.Lock()
defer nt.mu.Unlock()
var metrics []*metric.Data
for typ, value := range nt.metricsLinkStatusCountMap {
for ifname, count := range value {
metrics = append(metrics, metric.NewGaugeData(
typ.String(), float64(count), typ.String(), map[string]string{"device": ifname}))
}
}
return metrics, nil
}
func (nt *netdevTracing) checkLinkStatus() error {
links, err := netlink.LinkList()
if err != nil {
return err
}
for _, link := range links {
ifname := link.Attrs().Name
if !slices.Contains(conf.Get().Tracing.Netdev.Whitelist,
ifname) {
continue
}
flags := link.Attrs().RawFlags
nt.ifFlagsMap[ifname] = flags
data := &netdevEventData{
linkFlags: flags,
Ifname: ifname,
Index: link.Attrs().Index,
Mac: link.Attrs().HardwareAddr.String(),
AtStart: true,
}
nt.record(data)
}
return nil
}
func (nt *netdevTracing) record(data *netdevEventData) {
for _, status := range flags2status(data.linkFlags, data.flagsChange) {
nt.mu.Lock()
nt.metricsLinkStatusCountMap[status][data.Ifname]++
nt.mu.Unlock()
if data.LinkStatus == "" {
data.LinkStatus = status.String()
} else {
data.LinkStatus = data.LinkStatus + ", " + status.String()
}
}
if !data.AtStart && data.LinkStatus != "" {
log.Infof("%s %+v", data.LinkStatus, data)
storage.Save(nt.name, "", time.Now(), data)
}
}
func (nt *netdevTracing) handleEvent(ev *netlink.LinkUpdate) {
ifname := ev.Link.Attrs().Name
currFlags := ev.Attrs().RawFlags
lastFlags := nt.ifFlagsMap[ifname]
change := currFlags ^ lastFlags
nt.ifFlagsMap[ifname] = currFlags
data := &netdevEventData{
linkFlags: currFlags,
flagsChange: change,
Ifname: ifname,
Index: ev.Link.Attrs().Index,
Mac: ev.Link.Attrs().HardwareAddr.String(),
AtStart: false,
}
nt.record(data)
}
func (nt *netdevTracing) close() {
close(nt.linkDoneCh)
close(nt.linkUpdateCh)
}

294
core/events/netrecvlat.go Normal file
View File

@ -0,0 +1,294 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package events
import (
"context"
"errors"
"fmt"
"strings"
"syscall"
"time"
"huatuo-bamai/internal/bpf"
"huatuo-bamai/internal/conf"
"huatuo-bamai/internal/log"
"huatuo-bamai/internal/pod"
"huatuo-bamai/internal/storage"
"huatuo-bamai/internal/utils/bpfutil"
"huatuo-bamai/internal/utils/netutil"
"huatuo-bamai/internal/utils/procfsutil"
"huatuo-bamai/pkg/tracing"
"golang.org/x/sys/unix"
)
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/netrecvlat.c -o $BPF_DIR/netrecvlat.o
type netRecvLatTracing struct{}
// NetTracingData is the full data structure.
type NetTracingData struct {
Comm string `json:"comm"`
Pid uint64 `json:"pid"`
Where string `json:"where"`
Latency uint64 `json:"latency_ms"`
State string `json:"state"`
Saddr string `json:"saddr"`
Daddr string `json:"daddr"`
Sport uint16 `json:"sport"`
Dport uint16 `json:"dport"`
Seq uint32 `json:"seq"`
AckSeq uint32 `json:"ack_seq"`
PktLen uint64 `json:"pkt_len"`
}
// from bpf perf
type netRcvPerfEvent struct {
Comm [bpfutil.TaskCommLen]byte
Latency uint64
TgidPid uint64
PktLen uint64
Sport uint16
Dport uint16
Saddr uint32
Daddr uint32
Seq uint32
AckSeq uint32
State uint8
Where uint8
}
// from include/net/tcp_states.h
var tcpStateMap = []string{
"<nil>", // 0
"ESTABLISHED",
"SYN_SENT",
"SYN_RECV",
"FIN_WAIT1",
"FIN_WAIT2",
"TIME_WAIT",
"CLOSE",
"CLOSE_WAIT",
"LAST_ACK",
"LISTEN",
"CLOSING",
"NEW_SYN_RECV",
}
const userCopyCase = 2
var toWhere = []string{
"TO_NETIF_RCV",
"TO_TCPV4_RCV",
"TO_USER_COPY",
}
func init() {
tracing.RegisterEventTracing("netrcvlat", newNetRcvLat)
}
func newNetRcvLat() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &netRecvLatTracing{},
Internal: 10,
Flag: tracing.FlagTracing,
}, nil
}
func (c *netRecvLatTracing) Start(ctx context.Context) error {
toNetIf := conf.Get().Tracing.NetRecvLat.ToNetIf // ms, before RPS to a core recv(__netif_receive_skb)
toTCPV4 := conf.Get().Tracing.NetRecvLat.ToTCPV4 // ms, before RPS to TCP recv(tcp_v4_rcv)
toUserCopy := conf.Get().Tracing.NetRecvLat.ToUserCopy // ms, before RPS to user recv(skb_copy_datagram_iovec)
if toNetIf == 0 || toTCPV4 == 0 || toUserCopy == 0 {
return fmt.Errorf("netrecvlat threshold [%v %v %v]ms invalid", toNetIf, toTCPV4, toUserCopy)
}
log.Infof("netrecvlat start, latency threshold [%v %v %v]ms", toNetIf, toTCPV4, toUserCopy)
monoWallOffset, err := estMonoWallOffset()
if err != nil {
return fmt.Errorf("estimate monoWallOffset failed: %w", err)
}
log.Infof("netrecvlat offset of mono to walltime: %v ns", monoWallOffset)
args := map[string]any{
"mono_wall_offset": monoWallOffset,
"to_netif": toNetIf * 1000 * 1000,
"to_tcpv4": toTCPV4 * 1000 * 1000,
"to_user_copy": toUserCopy * 1000 * 1000,
}
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), args)
if err != nil {
log.Infof("failed to LoadBpf, err: %v", err)
return err
}
defer b.Close()
childCtx, cancel := context.WithCancel(ctx)
defer cancel()
reader, err := b.AttachAndEventPipe(childCtx, "net_recv_lat_event_map", 8192)
if err != nil {
log.Infof("failed to AttachAndEventPipe, err: %v", err)
return err
}
defer reader.Close()
b.WaitDetachByBreaker(childCtx, cancel)
// save host netns
hostNetNsInode, err := procfsutil.NetNSInodeByPid(1)
if err != nil {
return fmt.Errorf("get host netns inode: %w", err)
}
for {
select {
case <-childCtx.Done():
return nil
default:
var pd netRcvPerfEvent
if err := reader.ReadInto(&pd); err != nil {
return fmt.Errorf("read rrom perf event fail: %w", err)
}
tracerTime := time.Now()
comm := "<nil>" // not in process context
var pid uint64
var containerID string
if pd.TgidPid != 0 {
comm = strings.TrimRight(string(pd.Comm[:]), "\x00")
pid = pd.TgidPid >> 32
// check if its netns same as host netns
if pd.Where == userCopyCase {
cid, skip, err := ignore(pid, comm, hostNetNsInode)
if err != nil {
return err
}
if skip {
continue
}
containerID = cid
}
}
where := toWhere[pd.Where]
lat := pd.Latency / 1000 / 1000 // ms
state := tcpStateMap[pd.State]
saddr, daddr := netutil.InetNtop(pd.Saddr).String(), netutil.InetNtop(pd.Daddr).String()
sport, dport := netutil.InetNtohs(pd.Sport), netutil.InetNtohs(pd.Dport)
seq, ackSeq := netutil.InetNtohl(pd.Seq), netutil.InetNtohl(pd.AckSeq)
pktLen := pd.PktLen
title := fmt.Sprintf("comm=%s:%d to=%s lat(ms)=%v state=%s saddr=%s sport=%d daddr=%s dport=%d seq=%d ackSeq=%d pktLen=%d",
comm, pid, where, lat, state, saddr, sport, daddr, dport, seq, ackSeq, pktLen)
// tcp state filter
if (state != "ESTABLISHED") && (state != "<nil>") {
continue
}
// known issue filter
caseName, _ := conf.KnownIssueSearch(title, "", "")
if caseName == "netrecvlat" {
log.Debugf("netrecvlat known issue")
continue
}
tracerData := &NetTracingData{
Comm: comm,
Pid: pid,
Where: where,
Latency: lat,
State: state,
Saddr: saddr,
Daddr: daddr,
Sport: sport,
Dport: dport,
Seq: seq,
AckSeq: ackSeq,
PktLen: pktLen,
}
log.Debugf("netrecvlat tracerData: %+v", tracerData)
// save storage
storage.Save("netrecvlat", containerID, tracerTime, tracerData)
}
}
}
func ignore(pid uint64, comm string, hostNetnsInode uint64) (containerID string, skip bool, err error) {
// check if its netns same as host netns
dstInode, err := procfsutil.NetNSInodeByPid(int(pid))
if err != nil {
// ignore the missing program
if errors.Is(err, syscall.ENOENT) {
return "", true, nil
}
return "", skip, fmt.Errorf("get netns inode of pid %v failed: %w", pid, err)
}
if conf.Get().Tracing.NetRecvLat.IgnoreHost && dstInode == hostNetnsInode {
log.Debugf("ignore %s:%v the same netns as host", comm, pid)
return "", true, nil
}
// check container level
var container *pod.Container
if container, err = pod.GetContainerByNetNamespaceInode(dstInode); err != nil {
log.Warnf("get container info by netns inode %v pid %v, failed: %v", dstInode, pid, err)
}
if container != nil {
for _, level := range conf.Get().Tracing.NetRecvLat.IgnoreContainerLevel {
if container.Qos.Int() == level {
log.Debugf("ignore container %+v", container)
skip = true
break
}
}
containerID = container.ID
}
return containerID, skip, nil
}
// estimate the offset between clock monotonic and real time
// bpf_ktime_get_ns() access to clock monotonic, but skb->tstamp = ktime_get_real() at netif_receive_skb_internal
// ref: https://github.com/torvalds/linux/blob/v4.18/net/core/dev.c#L4736
// t3 - t2 + (t3 - t1) / 2 => (t3 + t1) / 2 - t2
func estMonoWallOffset() (int64, error) {
var t1, t2, t3 unix.Timespec
var bestDelta int64
var offset int64
for i := 0; i < 10; i++ {
err1 := unix.ClockGettime(unix.CLOCK_REALTIME, &t1)
err2 := unix.ClockGettime(unix.CLOCK_MONOTONIC, &t2)
err3 := unix.ClockGettime(unix.CLOCK_REALTIME, &t3)
if err1 != nil || err2 != nil || err3 != nil {
return 0, fmt.Errorf("%w, %w, %w", err1, err2, err3)
}
delta := unix.TimespecToNsec(t3) - unix.TimespecToNsec(t1)
if i == 0 || delta < bestDelta {
bestDelta = delta
offset = (unix.TimespecToNsec(t3)+unix.TimespecToNsec(t1))/2 - unix.TimespecToNsec(t2)
}
}
return offset, nil
}

191
core/events/oom.go Normal file
View File

@ -0,0 +1,191 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package events
import (
"context"
"fmt"
"strings"
"sync"
"time"
"huatuo-bamai/internal/bpf"
"huatuo-bamai/internal/log"
"huatuo-bamai/internal/pod"
"huatuo-bamai/internal/storage"
"huatuo-bamai/internal/utils/bpfutil"
"huatuo-bamai/pkg/metric"
"huatuo-bamai/pkg/tracing"
)
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/oom.c -o $BPF_DIR/oom.o
type perfEventData struct {
TriggerProcessName [16]byte
VictimProcessName [16]byte
TriggerPid int32
VictimPid int32
TriggerMemcgCSS uint64
VictimMemcgCSS uint64
}
type OOMTracingData struct {
TriggerMemcgCSS string `json:"trigger_memcg_css"`
TriggerContainerID string `json:"trigger_container_id"`
TriggerContainerHostname string `json:"trigger_container_hostname"`
TriggerPid int32 `json:"trigger_pid"`
TriggerProcessName string `json:"trigger_process_name"`
VictimMemcgCSS string `json:"victim_memcg_css"`
VictimContainerID string `json:"victim_container_id"`
VictimContainerHostname string `json:"victim_container_hostname"`
VictimPid int32 `json:"victim_pid"`
VictimProcessName string `json:"victim_process_name"`
}
type oomMetric struct {
count int
victimProcessName string
}
type oomCollector struct{}
func init() {
tracing.RegisterEventTracing("oom", newOOMCollector)
}
func newOOMCollector() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &oomCollector{},
Internal: 10,
Flag: tracing.FlagTracing | tracing.FlagMetric,
}, nil
}
var (
hostOOMCounter float64
containerOOMCounter = make(map[string]oomMetric)
mutex sync.Mutex
)
func (c *oomCollector) Update() ([]*metric.Data, error) {
containers, err := pod.GetNormalContainers()
if err != nil {
return nil, fmt.Errorf("get normal container: %w", err)
}
metrics := []*metric.Data{}
mutex.Lock()
metrics = append(metrics, metric.NewGaugeData("host_happened", hostOOMCounter, "host oom happened", nil))
for _, container := range containers {
if val, exists := containerOOMCounter[container.ID]; exists {
metrics = append(metrics,
metric.NewContainerGaugeData(container, "counter", float64(val.count), "ct oom happened", map[string]string{"process": val.victimProcessName}),
)
}
}
hostOOMCounter = 0
containerOOMCounter = make(map[string]oomMetric)
mutex.Unlock()
return metrics, nil
}
// Info return case's base info
func (c *oomCollector) Start(ctx context.Context) error {
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
if err != nil {
log.Infof("failed to LoadBpf, err: %v", err)
return err
}
defer b.Close()
childCtx, cancel := context.WithCancel(ctx)
defer cancel()
reader, err := b.AttachAndEventPipe(childCtx, "oom_perf_events", 8192)
if err != nil {
log.Infof("failed to AttachAndEventPipe, err: %v", err)
return err
}
defer reader.Close()
b.WaitDetachByBreaker(childCtx, cancel)
for {
select {
case <-childCtx.Done():
return nil
default:
var data perfEventData
if err := reader.ReadInto(&data); err != nil {
return fmt.Errorf("ReadFromPerfEvent fail: %w", err)
}
cssToCtMap, err := pod.GetCSSToContainerID("memory")
if err != nil {
log.Errorf("failed to GetCSSToContainerID, err: %v", err)
continue
}
cts, err := pod.GetAllContainers()
if err != nil {
log.Errorf("Can't get GetAllContainers, err: %v", err)
return err
}
caseData := &OOMTracingData{
TriggerMemcgCSS: fmt.Sprintf("0x%x", data.TriggerMemcgCSS),
TriggerPid: data.TriggerPid,
TriggerProcessName: strings.TrimRight(string(data.TriggerProcessName[:]), "\x00"),
TriggerContainerID: cssToCtMap[data.TriggerMemcgCSS],
VictimMemcgCSS: fmt.Sprintf("0x%x", data.VictimMemcgCSS),
VictimPid: data.VictimPid,
VictimProcessName: strings.TrimRight(string(data.VictimProcessName[:]), "\x00"),
VictimContainerID: cssToCtMap[data.VictimMemcgCSS],
}
if caseData.TriggerContainerID == "" {
caseData.TriggerContainerID = "None"
caseData.TriggerContainerHostname = "Non-Container Cgroup"
} else {
caseData.TriggerContainerHostname = cts[caseData.TriggerContainerID].Hostname
if caseData.TriggerContainerHostname == "" {
caseData.TriggerContainerHostname = "unknown"
}
}
mutex.Lock()
if caseData.VictimContainerID == "" {
hostOOMCounter++
caseData.VictimContainerID = "None"
caseData.VictimContainerHostname = "Non-Container Cgroup"
} else {
if val, exists := containerOOMCounter[cts[caseData.VictimContainerID].ID]; exists {
val.count++
val.victimProcessName = val.victimProcessName + "," + caseData.VictimProcessName
containerOOMCounter[cts[caseData.VictimContainerID].ID] = val
} else {
containerOOMCounter[cts[caseData.VictimContainerID].ID] = oomMetric{
count: 1,
victimProcessName: caseData.VictimProcessName,
}
}
caseData.VictimContainerHostname = cts[caseData.VictimContainerID].Hostname
if caseData.VictimContainerHostname == "" {
caseData.VictimContainerHostname = "unknown"
}
}
mutex.Unlock()
// save storage
storage.Save("oom", "", time.Now(), caseData)
}
}
}

188
core/events/softirq.go Normal file
View File

@ -0,0 +1,188 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package events
import (
"context"
"fmt"
"strings"
"time"
"huatuo-bamai/internal/bpf"
"huatuo-bamai/internal/conf"
"huatuo-bamai/internal/log"
"huatuo-bamai/internal/storage"
"huatuo-bamai/internal/utils/bpfutil"
"huatuo-bamai/internal/utils/symbolutil"
"huatuo-bamai/pkg/tracing"
)
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/softirq.c -o $BPF_DIR/softirq.o
type softirqTracing struct{}
type softirqPerfEvent struct {
Stack [symbolutil.KsymbolStackMaxDepth]uint64
StackSize int64
Now uint64
StallTime uint64
Comm [bpfutil.TaskCommLen]byte
Pid uint32
CPU uint32
}
// SoftirqTracingData is the full data structure.
type SoftirqTracingData struct {
OffTime uint64 `json:"offtime"`
Threshold uint64 `json:"threshold"`
Comm string `json:"comm"`
Pid uint32 `json:"pid"`
CPU uint32 `json:"cpu"`
Now uint64 `json:"now"`
Stack string `json:"stack"`
}
func init() {
tracing.RegisterEventTracing("softirq", newSoftirq)
}
func newSoftirq() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &softirqTracing{},
Internal: 10,
Flag: tracing.FlagTracing,
}, nil
}
func (c *softirqTracing) Start(ctx context.Context) error {
log.Infof("Softirq start")
softirqThresh := conf.Get().Tracing.Softirq.ThresholdTime
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), map[string]any{"softirq_thresh": softirqThresh})
if err != nil {
log.Infof("failed to LoadBpf, err: %v", err)
return err
}
defer b.Close()
childCtx, cancel := context.WithCancel(ctx)
defer cancel()
reader, err := attachIrqAndEventPipe(childCtx, b)
if err != nil {
log.Infof("failed to attachIrqAndEventPipe, err: %v", err)
return err
}
defer reader.Close()
b.WaitDetachByBreaker(childCtx, cancel)
for {
select {
case <-childCtx.Done():
return nil
default:
var data softirqPerfEvent
if err := reader.ReadInto(&data); err != nil {
return fmt.Errorf("Read From Perf Event fail: %w", err)
}
comm := fmt.Sprintf("%s", data.Comm)
index := strings.Index(comm, "ksoftirqd")
if index == 0 {
continue
}
// stop recording the noise from swapper
index = strings.Index(comm, "swapper")
if index == 0 {
continue
}
var stack string
if data.StackSize > 0 {
stack = softirqDumpTrace(data.Stack[:])
}
// save storage
caseData := &SoftirqTracingData{
OffTime: data.StallTime,
Threshold: softirqThresh,
Comm: strings.TrimRight(comm, "\x00"),
Pid: data.Pid,
CPU: data.CPU,
Now: data.Now,
Stack: fmt.Sprintf("stack:\n%s", stack),
}
storage.Save("softirq", "", time.Now(), caseData)
}
} // forever
}
// softirqDumpTrace is an interface for dump stacks in this case with offset and module info
func softirqDumpTrace(addrs []uint64) string {
stacks := symbolutil.DumpKernelBackTrace(addrs, symbolutil.KsymbolStackMaxDepth)
return strings.Join(stacks.BackTrace, "\n")
}
func attachIrqAndEventPipe(ctx context.Context, b bpf.BPF) (bpf.PerfEventReader, error) {
var err error
reader, err := b.EventPipeByName(ctx, "irqoff_event_map", 8192)
if err != nil {
return nil, err
}
defer func() {
if err != nil {
reader.Close()
}
}()
/*
* NOTE: There might be more than 100ms gap between the attachment of hooks,
* so the order of attaching the kprobe and tracepoint is important for us.
* probe_scheduler_tick should not be attached before probe_tick_stop and not be
* attached later than probe_tick_nohz_restart_sched_tick. So only
* probe_tick_stop -> probe_scheduler_tick -> probe_tick_nohz_restart_sched_tick
* works for the scenario.
*
* But we can't control the order of detachment, as it is executed in a random
* sequence in HuaTuo. Therefore, when we exit due to some special reasons, a
* small number of false alarm might be hit.
*/
if err := b.AttachWithOptions([]bpf.AttachOption{
{
ProgramName: "probe_scheduler_tick",
Symbol: "scheduler_tick",
},
{
ProgramName: "probe_tick_nohz_restart_sched_tick",
Symbol: "tick_nohz_restart_sched_tick",
},
{
ProgramName: "probe_tick_stop",
Symbol: "timer/tick_stop",
},
}); err != nil {
return nil, err
}
return reader, nil
}

123
core/events/softlockup.go Normal file
View File

@ -0,0 +1,123 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package events
import (
"context"
"fmt"
"strings"
"time"
"huatuo-bamai/internal/bpf"
"huatuo-bamai/internal/log"
"huatuo-bamai/internal/storage"
"huatuo-bamai/internal/utils/bpfutil"
"huatuo-bamai/internal/utils/kmsgutil"
"huatuo-bamai/pkg/metric"
"huatuo-bamai/pkg/tracing"
)
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/softlockup.c -o $BPF_DIR/softlockup.o
type softLockupPerfEventData struct {
CPU int32
Pid int32
Comm [16]byte
}
// TracerData is the full data structure.
type SoftLockupTracerData struct {
CPU int32 `json:"cpu"`
Pid int32 `json:"pid"`
Comm string `json:"comm"`
CPUsStack string `json:"cpus_stack"`
}
type softLockupTracing struct {
softlockupMetric []*metric.Data
}
func init() {
tracing.RegisterEventTracing("softlockup", newSoftLockup)
}
func newSoftLockup() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &softLockupTracing{
softlockupMetric: []*metric.Data{
metric.NewGaugeData("happened", 0, "softlockup happened", nil),
},
},
Internal: 10,
Flag: tracing.FlagTracing | tracing.FlagMetric,
}, nil
}
var softlockupCounter float64
func (c *softLockupTracing) Update() ([]*metric.Data, error) {
c.softlockupMetric[0].Value = softlockupCounter
softlockupCounter = 0
return c.softlockupMetric, nil
}
func (c *softLockupTracing) Start(ctx context.Context) error {
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
if err != nil {
log.Infof("failed to LoadBpf, err: %v", err)
return err
}
defer b.Close()
childCtx, cancel := context.WithCancel(ctx)
defer cancel()
reader, err := b.AttachAndEventPipe(childCtx, "softlockup_perf_events", 8192)
if err != nil {
log.Infof("failed to AttachAndEventPipe, err: %v", err)
return err
}
defer reader.Close()
b.WaitDetachByBreaker(childCtx, cancel)
for {
select {
case <-childCtx.Done():
return nil
default:
var data softLockupPerfEventData
if err := reader.ReadInto(&data); err != nil {
return fmt.Errorf("ReadFromPerfEvent fail: %w", err)
}
bt, err := kmsgutil.GetAllCPUsBT()
if err != nil {
bt = err.Error()
}
caseData := &SoftLockupTracerData{
CPU: data.CPU,
Pid: data.Pid,
Comm: strings.TrimRight(string(data.Comm[:]), "\x00"),
CPUsStack: bt,
}
softlockupCounter++
// save storage
storage.Save("softlockup", "", time.Now(), caseData)
}
}
}

121
core/metrics/arp.go Normal file
View File

@ -0,0 +1,121 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"bufio"
"fmt"
"os"
"strconv"
"strings"
"huatuo-bamai/internal/pod"
"huatuo-bamai/pkg/metric"
"huatuo-bamai/pkg/tracing"
)
var arpCachePath = "/proc/net/stat/arp_cache"
type arpCollector struct {
metric []*metric.Data
}
func init() {
tracing.RegisterEventTracing("arp", newArp)
}
func newArp() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &arpCollector{
metric: []*metric.Data{
metric.NewGaugeData("entries", 0, "host init namespace", nil),
metric.NewGaugeData("total", 0, "arp_cache entries", nil),
},
},
Flag: tracing.FlagMetric,
}, nil
}
// NetStat contains statistics for all the counters from one file.
// should be exported for /proc/net/stat/ndisc_cache
type NetStat struct {
Stats map[string]uint64
Filename string
}
func parseNetstatCache(filePath string) (NetStat, error) {
netStat := NetStat{
Stats: make(map[string]uint64),
}
file, err := os.Open(filePath)
if err != nil {
return netStat, err
}
defer file.Close()
scanner := bufio.NewScanner(file)
scanner.Scan()
// First string is always a header for stats
var headers []string
headers = append(headers, strings.Fields(scanner.Text())...)
// Fast path ...
scanner.Scan()
for num, counter := range strings.Fields(scanner.Text()) {
value, err := strconv.ParseUint(counter, 16, 64)
if err != nil {
return NetStat{}, err
}
netStat.Stats[headers[num]] = value
}
return netStat, nil
}
func (c *arpCollector) Update() ([]*metric.Data, error) {
arpMetric := []*metric.Data{}
containers, err := pod.GetNormalContainers()
if err != nil {
return nil, fmt.Errorf("GetNormalContainers: %w", err)
}
for _, container := range containers {
count, err := fileLineCounter(fmt.Sprintf("/proc/%d/net/arp", container.InitPid))
if err != nil {
return nil, err
}
arpMetric = append(arpMetric, metric.NewContainerGaugeData(container, "entries", float64(count-1), "arp for container and host", nil))
}
count, err := fileLineCounter("/proc/1/net/arp")
if err != nil {
return nil, err
}
stat, err := parseNetstatCache(arpCachePath)
if err != nil {
return nil, err
}
c.metric[0].Value = float64(count - 1)
c.metric[1].Value = float64(stat.Stats["entries"])
arpMetric = append(arpMetric, c.metric...)
return arpMetric, nil
}

70
core/metrics/buddyinfo.go Normal file
View File

@ -0,0 +1,70 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"fmt"
"strconv"
"github.com/prometheus/procfs"
"huatuo-bamai/pkg/metric"
"huatuo-bamai/pkg/tracing"
)
type buddyInfoCollector struct {
fs procfs.FS
}
func init() {
tracing.RegisterEventTracing("buddyinfo", newBuddyInfo)
}
func newBuddyInfo() (*tracing.EventTracingAttr, error) {
fs, err := procfs.NewDefaultFS()
if err != nil {
return nil, fmt.Errorf("open procfs: %w", err)
}
return &tracing.EventTracingAttr{
TracingData: &buddyInfoCollector{fs: fs},
Flag: tracing.FlagMetric,
}, nil
}
func (c *buddyInfoCollector) Update() ([]*metric.Data, error) {
buddyInfo, err := c.fs.BuddyInfo()
if err != nil {
return nil, err
}
var (
buddyLabel = make(map[string]string)
metrics = []*metric.Data{}
)
for _, entry := range buddyInfo {
for size, value := range entry.Sizes {
buddyLabel["node"] = entry.Node
buddyLabel["zone"] = entry.Zone
buddyLabel["size"] = strconv.Itoa(size)
metrics = append(metrics,
metric.NewGaugeData("blocks", value, "buddy info", buddyLabel))
}
}
return metrics, nil
}

166
core/metrics/cpu_stat.go Normal file
View File

@ -0,0 +1,166 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"reflect"
"sync"
"time"
"huatuo-bamai/internal/log"
"huatuo-bamai/internal/pod"
"huatuo-bamai/internal/utils/cgrouputil"
"huatuo-bamai/pkg/metric"
"huatuo-bamai/pkg/tracing"
)
type cpuStat struct {
nrThrottled uint64
throttledTime uint64
nrBursts uint64
burstTime uint64
// calculated values
hierarchyWaitSum uint64
innerWaitSum uint64
cpuTotal uint64
waitrateHierarchy float64
waitrateInner float64
waitrateExter float64
waitrateThrottled float64
lastUpdate time.Time
}
type cpuStatCollector struct {
cpu *cgrouputil.CPU
cpuacct *cgrouputil.CPUAcct
mutex sync.Mutex
}
func init() {
tracing.RegisterEventTracing("cpu_stat", newCPUStat)
_ = pod.RegisterContainerLifeResources("collector_cpu_stat", reflect.TypeOf(&cpuStat{}))
}
func newCPUStat() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &cpuStatCollector{
cpu: cgrouputil.NewCPU(),
cpuacct: cgrouputil.NewCPUAcctDefault(),
},
Flag: tracing.FlagMetric,
}, nil
}
func (c *cpuStatCollector) cpuMetricUpdate(cpu *cpuStat, container *pod.Container) error {
var (
deltaThrottledSum uint64
deltaHierarchyWaitSum uint64
deltaInnerWaitSum uint64
deltaExterWaitSum uint64
)
c.mutex.Lock()
defer c.mutex.Unlock()
now := time.Now()
if now.Sub(cpu.lastUpdate).Nanoseconds() < 1000000000 {
return nil
}
raw, err := c.cpu.StatRaw(container.CgroupSuffix)
if err != nil {
return err
}
usageTotal, err := c.cpuacct.Usage(container.CgroupSuffix)
if err != nil {
return err
}
stat := cpuStat{
nrThrottled: raw["nr_throttled"],
throttledTime: raw["throttled_time"],
hierarchyWaitSum: raw["hierarchy_wait_sum"],
innerWaitSum: raw["inner_wait_sum"],
nrBursts: raw["nr_bursts"],
burstTime: raw["burst_time"],
cpuTotal: usageTotal,
lastUpdate: now,
}
deltaHierarchyWaitSum = stat.hierarchyWaitSum - cpu.hierarchyWaitSum
if deltaHierarchyWaitSum <= 0 {
deltaThrottledSum = 0
deltaHierarchyWaitSum = 0
deltaInnerWaitSum = 0
deltaExterWaitSum = 0
} else {
deltaThrottledSum = stat.throttledTime - cpu.throttledTime
deltaInnerWaitSum = stat.innerWaitSum - cpu.innerWaitSum
if deltaHierarchyWaitSum < deltaThrottledSum+deltaInnerWaitSum {
deltaHierarchyWaitSum = deltaThrottledSum + deltaInnerWaitSum
}
deltaExterWaitSum = deltaHierarchyWaitSum - deltaThrottledSum - deltaInnerWaitSum
}
deltaWaitRunSum := deltaHierarchyWaitSum + stat.cpuTotal - cpu.cpuTotal
if deltaWaitRunSum == 0 {
stat.waitrateHierarchy = 0
stat.waitrateInner = 0
stat.waitrateExter = 0
stat.waitrateThrottled = 0
} else {
stat.waitrateHierarchy = float64(deltaHierarchyWaitSum) * 100 / float64(deltaWaitRunSum)
stat.waitrateInner = float64(deltaInnerWaitSum) * 100 / float64(deltaWaitRunSum)
stat.waitrateExter = float64(deltaExterWaitSum) * 100 / float64(deltaWaitRunSum)
stat.waitrateThrottled = float64(deltaThrottledSum) * 100 / float64(deltaWaitRunSum)
}
*cpu = stat
return nil
}
func (c *cpuStatCollector) Update() ([]*metric.Data, error) {
metrics := []*metric.Data{}
containers, err := pod.GetContainersByType(pod.ContainerTypeNormal | pod.ContainerTypeSidecar)
if err != nil {
return nil, err
}
for _, container := range containers {
containerMetric := container.LifeResouces("collector_cpu_stat").(*cpuStat)
if err := c.cpuMetricUpdate(containerMetric, container); err != nil {
log.Infof("failed to update cpu info of %s, %v", container, err)
continue
}
metrics = append(metrics, metric.NewContainerGaugeData(container, "wait_rate", containerMetric.waitrateHierarchy, "wait rate for containers", nil),
metric.NewContainerGaugeData(container, "inner_wait_rate", containerMetric.waitrateInner, "inner wait rate for container", nil),
metric.NewContainerGaugeData(container, "exter_wait_rate", containerMetric.waitrateExter, "exter wait rate for container", nil),
metric.NewContainerGaugeData(container, "throttle_wait_rate", containerMetric.waitrateThrottled, "throttle wait rate for container", nil),
metric.NewContainerGaugeData(container, "nr_throttled", float64(containerMetric.nrThrottled), "throttle nr for container", nil),
metric.NewContainerGaugeData(container, "nr_bursts", float64(containerMetric.nrBursts), "burst nr for container", nil),
metric.NewContainerGaugeData(container, "burst_time", float64(containerMetric.burstTime), "burst time for container", nil),
)
}
return metrics, nil
}

177
core/metrics/cpu_util.go Normal file
View File

@ -0,0 +1,177 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"reflect"
"runtime"
"sync"
"time"
"huatuo-bamai/internal/log"
"huatuo-bamai/internal/pod"
"huatuo-bamai/internal/utils/cgrouputil"
"huatuo-bamai/pkg/metric"
"huatuo-bamai/pkg/tracing"
)
type cpuMetric struct {
lastUsrTime uint64
lastSysTime uint64
lastCPUTotal uint64
lasTimestamp time.Time
utilTotal float64
utilSys float64
utilUsr float64
}
type cpuUtilCollector struct {
cpuUtil []*metric.Data
cpuacct *cgrouputil.CPUAcct
cpu *cgrouputil.CPU
// included struct for used in multi modules
hostCPUCount int
hostCPUMetric cpuMetric
mutex sync.Mutex
}
func init() {
tracing.RegisterEventTracing("cpu_util", newCPUUtil)
_ = pod.RegisterContainerLifeResources("collector_cpu_util", reflect.TypeOf(&cpuMetric{}))
}
func newCPUUtil() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &cpuUtilCollector{
cpuUtil: []*metric.Data{
metric.NewGaugeData("usr", 0, "usr for container and host", nil),
metric.NewGaugeData("sys", 0, "sys for container and host", nil),
metric.NewGaugeData("total", 0, "total for container and host", nil),
},
cpuacct: cgrouputil.NewCPUAcctDefault(),
cpu: cgrouputil.NewCPU(),
hostCPUCount: runtime.NumCPU(),
},
Flag: tracing.FlagMetric,
}, nil
}
func (c *cpuUtilCollector) cpuMetricUpdate(cpuMetric *cpuMetric, container *pod.Container, cpuCount int) error {
var (
utilUsr float64
utilSys float64
utilTotal float64
cgroupPath string
)
c.mutex.Lock()
defer c.mutex.Unlock()
now := time.Now()
if now.Sub(cpuMetric.lasTimestamp).Nanoseconds() < 1000000000 {
return nil
}
if container != nil {
cgroupPath = container.CgroupSuffix
}
usageTotal, err := c.cpuacct.Usage(cgroupPath)
if err != nil {
return err
}
usageUsr, usageSys, err := c.cpuacct.Stat(cgroupPath)
if err != nil {
return err
}
// allow statistics 0
deltaTotal := usageTotal - cpuMetric.lastCPUTotal
deltaUsrTime := usageUsr - cpuMetric.lastUsrTime
deltaSysTime := usageSys - cpuMetric.lastSysTime
deltaUsageSum := float64(cpuCount) * float64(now.Sub(cpuMetric.lasTimestamp).Nanoseconds())
if (float64(deltaTotal) > deltaUsageSum) || (float64(deltaUsrTime+deltaSysTime) > deltaUsageSum) {
cpuMetric.lastUsrTime = usageUsr
cpuMetric.lastSysTime = usageSys
cpuMetric.lastCPUTotal = usageTotal
cpuMetric.lasTimestamp = now
return nil
}
utilTotal = float64(deltaTotal) * 100 / deltaUsageSum
utilUsr = float64(deltaUsrTime) * 100 / deltaUsageSum
utilSys = float64(deltaSysTime) * 100 / deltaUsageSum
cpuMetric.lastUsrTime = usageUsr
cpuMetric.lastSysTime = usageSys
cpuMetric.lastCPUTotal = usageTotal
cpuMetric.utilTotal = utilTotal
cpuMetric.utilUsr = utilUsr
cpuMetric.utilSys = utilSys
cpuMetric.lasTimestamp = now
return nil
}
func (c *cpuUtilCollector) hostMetricUpdate() error {
if err := c.cpuMetricUpdate(&c.hostCPUMetric, nil, c.hostCPUCount); err != nil {
return err
}
c.cpuUtil[0].Value = c.hostCPUMetric.utilUsr
c.cpuUtil[1].Value = c.hostCPUMetric.utilSys
c.cpuUtil[2].Value = c.hostCPUMetric.utilTotal
return nil
}
func (c *cpuUtilCollector) Update() ([]*metric.Data, error) {
metrics := []*metric.Data{}
containers, err := pod.GetContainersByType(pod.ContainerTypeNormal | pod.ContainerTypeSidecar)
if err != nil {
return nil, err
}
for _, container := range containers {
count, err := c.cpu.CPUNum(container.CgroupSuffix)
if err != nil {
log.Infof("failed to get cpu count of %s, %v", container, err)
continue
}
containerMetric := container.LifeResouces("collector_cpu_util").(*cpuMetric)
if err := c.cpuMetricUpdate(containerMetric, container, count); err != nil {
log.Infof("failed to update cpu info of %s, %v", container, err)
continue
}
metrics = append(metrics, metric.NewContainerGaugeData(container, "count", float64(count), "cpu count for containers", nil),
metric.NewContainerGaugeData(container, "usr", containerMetric.utilUsr, "usr for container and host", nil),
metric.NewContainerGaugeData(container, "sys", containerMetric.utilSys, "sys for container and host", nil),
metric.NewContainerGaugeData(container, "total", containerMetric.utilTotal, "total for container and host", nil))
}
if err := c.hostMetricUpdate(); err != nil {
log.Errorf("c.hostCpuMetricUpdate :%v", err)
return nil, err
}
metrics = append(metrics, c.cpuUtil...)
return metrics, nil
}

41
core/metrics/filter.go Normal file
View File

@ -0,0 +1,41 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import "regexp"
type fieldFilter struct {
ignorePattern *regexp.Regexp
acceptPattern *regexp.Regexp
}
func newFieldFilter(ignoredPattern, acceptPattern string) *fieldFilter {
f := &fieldFilter{}
if ignoredPattern != "" {
f.ignorePattern = regexp.MustCompile(ignoredPattern)
}
if acceptPattern != "" {
f.acceptPattern = regexp.MustCompile(acceptPattern)
}
return f
}
// ignored returns whether the field should be ignored
func (f *fieldFilter) ignored(name string) bool {
return (f.ignorePattern != nil && f.ignorePattern.MatchString(name)) ||
(f.acceptPattern != nil && !f.acceptPattern.MatchString(name))
}

106
core/metrics/loadavg.go Normal file
View File

@ -0,0 +1,106 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"fmt"
"huatuo-bamai/internal/log"
"huatuo-bamai/internal/pod"
"huatuo-bamai/internal/utils/cgrouputil"
"huatuo-bamai/pkg/metric"
"huatuo-bamai/pkg/tracing"
"github.com/google/cadvisor/utils/cpuload/netlink"
"github.com/prometheus/procfs"
)
type loadavgCollector struct {
loadAvg []*metric.Data
}
func init() {
tracing.RegisterEventTracing("loadavg", newLoadavg)
}
// NewLoadavgCollector returns a new Collector exposing load average stats.
func newLoadavg() (*tracing.EventTracingAttr, error) {
collector := &loadavgCollector{
// Load average of last 1, 5 & 15 minutes.
// See linux kernel Documentation/filesystems/proc.rst
loadAvg: []*metric.Data{
metric.NewGaugeData("load1", 0, "1m load average", nil),
metric.NewGaugeData("load5", 0, "5m load average", nil),
metric.NewGaugeData("load15", 0, "15m load average", nil),
},
}
return &tracing.EventTracingAttr{
TracingData: collector, Flag: tracing.FlagMetric,
}, nil
}
// Read loadavg from /proc.
func (c *loadavgCollector) hostLoadAvg() error {
fs, err := procfs.NewDefaultFS()
if err != nil {
return err
}
load, err := fs.LoadAvg()
if err != nil {
return err
}
c.loadAvg[0].Value = load.Load1
c.loadAvg[1].Value = load.Load5
c.loadAvg[2].Value = load.Load15
return nil
}
func (c *loadavgCollector) Update() ([]*metric.Data, error) {
loadAvgMetrics := []*metric.Data{}
n, err := netlink.New()
if err != nil {
log.Infof("Failed to create netlink: %s", err)
return nil, err
}
defer n.Stop()
containers, err := pod.GetContainersByType(pod.ContainerTypeNormal | pod.ContainerTypeSidecar)
if err != nil {
return nil, fmt.Errorf("GetContainersByType: %w", err)
}
for _, container := range containers {
stats, err := n.GetCpuLoad(container.Hostname, cgrouputil.NewCPU().Path(container.CgroupSuffix))
if err != nil {
log.Debugf("failed to get %s load, %v", container, err)
continue
}
loadAvgMetrics = append(loadAvgMetrics,
metric.NewContainerGaugeData(container, "container_nr_running", float64(stats.NrRunning), "nr_running of container", nil),
metric.NewContainerGaugeData(container, "container_nr_uninterruptible", float64(stats.NrUninterruptible), "nr_uninterruptible of container", nil))
}
if err := c.hostLoadAvg(); err != nil {
return nil, err
}
loadAvgMetrics = append(loadAvgMetrics, c.loadAvg...)
return loadAvgMetrics, nil
}

View File

@ -0,0 +1,129 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"bytes"
"context"
"encoding/binary"
"fmt"
"huatuo-bamai/internal/bpf"
"huatuo-bamai/internal/pod"
"huatuo-bamai/internal/utils/bpfutil"
"huatuo-bamai/pkg/metric"
"huatuo-bamai/pkg/tracing"
)
func init() {
tracing.RegisterEventTracing("mmcgroup", newMemoryCgroup)
}
func newMemoryCgroup() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &memoryCgroup{},
Internal: 10,
Flag: tracing.FlagTracing | tracing.FlagMetric,
}, nil
}
type memoryCgroupMetric struct {
DirectstallCount uint64
}
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/memory_cgroup.c -o $BPF_DIR/memory_cgroup.o
type memoryCgroup struct {
bpf bpf.BPF
isRuning bool
}
func (c *memoryCgroup) Update() ([]*metric.Data, error) {
if !c.isRuning {
return nil, nil
}
containersMap := make(map[uint64]*pod.Container)
containers, err := pod.GetNormalContainers()
if err != nil {
return nil, fmt.Errorf("Can't get normal container: %w", err)
}
for _, container := range containers {
containersMap[container.CSS["memory"]] = container
}
items, err := c.bpf.DumpMapByName("mem_cgroup_map")
if err != nil {
return nil, fmt.Errorf("Can't dump mem_cgroup_map: %w", err)
}
var (
cgroupMetric memoryCgroupMetric
containersMetric []*metric.Data
css uint64
)
for _, v := range items {
keyBuf := bytes.NewReader(v.Key)
if err := binary.Read(keyBuf, binary.LittleEndian, &css); err != nil {
return nil, fmt.Errorf("mem_cgroup_map key: %w", err)
}
valBuf := bytes.NewReader(v.Value)
if err := binary.Read(valBuf, binary.LittleEndian, &cgroupMetric); err != nil {
return nil, fmt.Errorf("mem_cgroup_map value: %w", err)
}
if container, exist := containersMap[css]; exist {
containersMetric = append(containersMetric,
metric.NewContainerGaugeData(container, "directstallcount",
float64(cgroupMetric.DirectstallCount),
"counting of cgroup try_charge reclaim", nil))
}
}
// if events haven't happened, upload zero for all containers.
if len(items) == 0 {
for _, container := range containersMap {
containersMetric = append(containersMetric,
metric.NewContainerGaugeData(container, "directstallcount", float64(0),
"counting of cgroup try_charge reclaim", nil))
}
}
return containersMetric, nil
}
func (c *memoryCgroup) Start(ctx context.Context) error {
var err error
c.bpf, err = bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
if err != nil {
return fmt.Errorf("LoadBpf memory_cgroup.o: %w", err)
}
defer c.bpf.Close()
if err = c.bpf.Attach(); err != nil {
return fmt.Errorf("failed to Attach, err: %w", err)
}
childCtx, cancel := context.WithCancel(ctx)
defer cancel()
c.bpf.WaitDetachByBreaker(childCtx, cancel)
c.isRuning = true
<-childCtx.Done()
c.isRuning = false
return nil
}

View File

@ -0,0 +1,70 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"fmt"
"huatuo-bamai/internal/conf"
"huatuo-bamai/internal/pod"
"huatuo-bamai/internal/utils/cgrouputil"
"huatuo-bamai/pkg/metric"
"huatuo-bamai/pkg/tracing"
)
type memEventsCollector struct {
mem cgrouputil.Memory
}
func init() {
tracing.RegisterEventTracing("memory_events", newMemEvents)
}
func newMemEvents() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &memEventsCollector{
mem: *cgrouputil.NewMemory(),
}, Flag: tracing.FlagMetric,
}, nil
}
func (c *memEventsCollector) Update() ([]*metric.Data, error) {
filter := newFieldFilter(conf.Get().MetricCollector.MemoryEvents.ExcludedMetrics,
conf.Get().MetricCollector.MemoryEvents.IncludedMetrics)
containers, err := pod.GetNormalContainers()
if err != nil {
return nil, fmt.Errorf("get normal container: %w", err)
}
metrics := []*metric.Data{}
for _, container := range containers {
raw, err := c.mem.EventsRaw(container.CgroupSuffix)
if err != nil {
return nil, err
}
for key, value := range raw {
if filter.ignored(key) {
continue
}
metrics = append(metrics,
metric.NewContainerGaugeData(container, key, float64(value), fmt.Sprintf("memory events %s", key), nil))
}
}
return metrics, nil
}

View File

@ -0,0 +1,110 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"bytes"
"context"
"encoding/binary"
"fmt"
"huatuo-bamai/internal/bpf"
"huatuo-bamai/internal/utils/bpfutil"
"huatuo-bamai/pkg/metric"
"huatuo-bamai/pkg/tracing"
)
func init() {
tracing.RegisterEventTracing("mmhost", newMemoryHost)
}
func newMemoryHost() (*tracing.EventTracingAttr, error) {
mm := &memoryHost{
metrics: []*metric.Data{
metric.NewGaugeData("compactionstat", 0, "time spent during mm compaction", nil),
metric.NewGaugeData("allocstallstat", 0, "time spent during mm allocstall", nil),
},
}
return &tracing.EventTracingAttr{
TracingData: mm,
Internal: 10,
Flag: tracing.FlagTracing | tracing.FlagMetric,
}, nil
}
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/memory_free_compact.c -o $BPF_DIR/memory_free_compact.o
type memoryHost struct {
metrics []*metric.Data
bpf bpf.BPF
isRuning bool
}
type memoryHostMetric struct {
/* host: compaction latency */
CompactionStat uint64
/* host: page alloc latency in direct reclaim */
AllocstallStat uint64
}
func (c *memoryHost) Update() ([]*metric.Data, error) {
if !c.isRuning {
return nil, nil
}
items, err := c.bpf.DumpMapByName("mm_free_compact_map")
if err != nil {
return nil, fmt.Errorf("Can't dump mm_host_metrictable_relay: %w", err)
}
if len(items) == 0 {
c.metrics[0].Value = float64(0)
c.metrics[1].Value = float64(0)
} else {
mmMetric := memoryHostMetric{}
buf := bytes.NewReader(items[0].Value)
err := binary.Read(buf, binary.LittleEndian, &mmMetric)
if err != nil {
return nil, fmt.Errorf("read mem_cgroup_map: %w", err)
}
c.metrics[0].Value = float64(mmMetric.CompactionStat) / 1000 / 1000
c.metrics[1].Value = float64(mmMetric.AllocstallStat) / 1000 / 1000
}
return c.metrics, nil
}
// Start detect work, load bpf and wait data form perfevent
func (c *memoryHost) Start(ctx context.Context) error {
var err error
c.bpf, err = bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
if err != nil {
return fmt.Errorf("LoadBpf mmhostbpf.o: %w", err)
}
defer c.bpf.Close()
if err = c.bpf.Attach(); err != nil {
return fmt.Errorf("Attach memory_free_compact.o: %w", err)
}
childCtx, cancel := context.WithCancel(ctx)
defer cancel()
c.bpf.WaitDetachByBreaker(childCtx, cancel)
c.isRuning = true
<-childCtx.Done()
c.isRuning = false
return nil
}

View File

@ -0,0 +1,101 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"fmt"
"path/filepath"
"huatuo-bamai/internal/log"
"huatuo-bamai/internal/pod"
"huatuo-bamai/internal/utils/cgrouputil"
"huatuo-bamai/internal/utils/parseutil"
"huatuo-bamai/pkg/metric"
"huatuo-bamai/pkg/tracing"
)
type memOthersCollector struct{}
func init() {
// only for didicloud
tracing.RegisterEventTracing("memory_others", newMemOthersCollector)
}
func newMemOthersCollector() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &memOthersCollector{},
Flag: tracing.FlagMetric,
}, nil
}
func parseValueWithKey(path, key string) (uint64, error) {
filePath := filepath.Join(cgrouputil.V1MemoryPath(), path)
if key == "" {
return parseutil.ReadUint(filePath)
}
raw, err := parseutil.ParseRawKV(filePath)
if err != nil {
return 0, err
}
return raw[key], nil
}
func (c *memOthersCollector) Update() ([]*metric.Data, error) {
containers, err := pod.GetNormalContainers()
if err != nil {
return nil, fmt.Errorf("Can't get normal container: %w", err)
}
metrics := []*metric.Data{}
for _, container := range containers {
for _, t := range []struct {
path string
key string
name string
}{
{
path: "memory.directstall_stat",
key: "directstall_time",
name: "directstall_time",
},
{
path: "memory.asynreclaim_stat",
key: "asyncreclaim_time",
name: "asyncreclaim_time",
},
{
path: "memory.local_direct_reclaim_time",
key: "",
name: "local_direct_reclaim_time",
},
} {
path := filepath.Join(container.CgroupSuffix, t.path)
value, err := parseValueWithKey(path, t.key)
if err != nil {
// FIXME: os maynot support this metric
log.Debugf("parse %s: %s", path, err)
continue
}
metrics = append(metrics,
metric.NewContainerGaugeData(container, t.name, float64(value), fmt.Sprintf("memory cgroup %s", t.name), nil))
}
}
return metrics, nil
}

View File

@ -0,0 +1,70 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"fmt"
"huatuo-bamai/internal/conf"
"huatuo-bamai/internal/log"
"huatuo-bamai/internal/pod"
"huatuo-bamai/internal/utils/cgrouputil"
"huatuo-bamai/internal/utils/parseutil"
"huatuo-bamai/pkg/metric"
"huatuo-bamai/pkg/tracing"
)
type memStatCollector struct{}
func init() {
tracing.RegisterEventTracing("memory_stat", newMemStat)
}
func newMemStat() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &memStatCollector{},
Flag: tracing.FlagMetric,
}, nil
}
func (c *memStatCollector) Update() ([]*metric.Data, error) {
filter := newFieldFilter(conf.Get().MetricCollector.MemoryStat.ExcludedMetrics,
conf.Get().MetricCollector.MemoryStat.IncludedMetrics)
metrics := []*metric.Data{}
containers, err := pod.GetNormalContainers()
if err != nil {
return nil, fmt.Errorf("GetNormalContainers: %w", err)
}
for _, container := range containers {
raw, err := parseutil.ParseRawKV(cgrouputil.V1MemoryPath() + container.CgroupSuffix + "/memory.stat")
if err != nil {
log.Infof("parse %s memory.stat %v", container.CgroupSuffix, err)
continue
}
for m, v := range raw {
if filter.ignored(m) {
log.Debugf("Ignoring memory_stat metric: %s", m)
continue
}
metrics = append(metrics, metric.NewContainerGaugeData(container, m, float64(v), fmt.Sprintf("memory stat %s", m), nil))
}
}
return metrics, nil
}

View File

@ -0,0 +1,136 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"huatuo-bamai/pkg/metric"
)
const (
softirqHi = iota
softirqTime
softirqNetTx
softirqNetRx
softirqBlock
softirqIrqPoll
softirqTasklet
softirqSched
softirqHrtimer
sofirqRcu
softirqMax
)
const (
latZONE0 = iota // 0 ~ 10us
latZONE1 // 10us ~ 100us
latZONE2 // 100us ~ 1ms
latZONE3 // 1ms ~ inf
latZoneMax
)
const (
// HI:0x1
// TIMER:0x2
// NET_TX:0x4
// NET_RX:0x8
// BLOCK:0x10
// IRQ_POLL:0x20
// TASKLET:0x40
// SCHED:0x80
// HRTIMER:0x100
// RCU:0x200
// fullmask => 0x2ff
defaultSiTypeMask = 0x0c // default: only report NET_TX and NET_RX so far
// Because bpf access array is strictly checked,
// the size of the array must be aligned in order
// of 2, so we should not use softirqMax, but
// use softirqArrayMax as the size of the array
softirqArrayMax = 16 // must be 2^order
)
var monTracerIsRunning bool
func latZoneName(latZone int) string {
switch latZone {
case latZONE0: // 0 ~ 10us
return "0~10 us"
case latZONE1: // 10us ~ 100us
return "10us ~ 100us"
case latZONE2: // 100us ~ 1ms
return "100us ~ 1ms"
case latZONE3: // 1ms ~ inf
return "1ms ~ inf"
default:
return "ERR_ZONE"
}
}
func siTypeName(siType int) string {
switch siType {
case softirqHi:
return "HI"
case softirqTime:
return "TIMER"
case softirqNetTx:
return "NET_TX"
case softirqNetRx:
return "NET_RX"
case softirqBlock:
return "BLOCK"
case softirqIrqPoll:
return "IRQ_POLL"
case softirqTasklet:
return "TASKLET"
case softirqSched:
return "SCHED"
case softirqHrtimer:
return "HRTIMER"
case sofirqRcu:
return "RCU"
default:
return "ERR_TYPE"
}
}
func getMonsoftirqInfo() ([]*metric.Data, error) {
siLabel := make(map[string]string)
monsoftirqMetric := []*metric.Data{}
for siType, lats := range &monsoftirqData.SoftirqLat {
if (1<<siType)&defaultSiTypeMask == 0 {
continue
}
siLabel["softirqType"] = siTypeName(siType)
for zone, count := range lats {
siLabel["zone"] = latZoneName(zone)
monsoftirqMetric = append(monsoftirqMetric, metric.NewGaugeData("latency", float64(count), "softirq latency", siLabel))
}
}
return monsoftirqMetric, nil
}
func (c *monsoftirqTracing) Update() ([]*metric.Data, error) {
if !monTracerIsRunning {
return nil, nil
}
monsoftirqMetric, err := getMonsoftirqInfo()
if err != nil {
return nil, err
}
return monsoftirqMetric, nil
}

View File

@ -0,0 +1,92 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"bytes"
"context"
"encoding/binary"
"fmt"
"time"
"huatuo-bamai/internal/bpf"
"huatuo-bamai/internal/log"
"huatuo-bamai/internal/utils/bpfutil"
"huatuo-bamai/pkg/tracing"
)
func init() {
tracing.RegisterEventTracing("monsoftirq", newSoftirqCollector)
}
func newSoftirqCollector() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &monsoftirqTracing{},
Internal: 10,
Flag: tracing.FlagTracing | tracing.FlagMetric,
}, nil
}
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/monsoftirq_tracing.c -o $BPF_DIR/monsoftirq_tracing.o
type monsoftirqBpfData struct {
SoftirqLat [softirqArrayMax][latZoneMax]uint64
}
type monsoftirqTracing struct{}
var monsoftirqData monsoftirqBpfData
// Start monsoftirq work, load bpf and wait data form perfevent
func (c *monsoftirqTracing) Start(ctx context.Context) error {
// load bpf.
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
if err != nil {
return fmt.Errorf("failed to LoadBpf, err: %w", err)
}
defer b.Close()
if err = b.Attach(); err != nil {
return err
}
childCtx, cancel := context.WithCancel(ctx)
defer cancel()
b.WaitDetachByBreaker(childCtx, cancel)
ticker := time.NewTicker(2 * time.Second)
defer ticker.Stop()
monTracerIsRunning = true
defer func() { monTracerIsRunning = false }()
for {
select {
case <-childCtx.Done():
return nil
case <-ticker.C:
item, err := b.ReadMap(b.MapIDByName("softirq_lats"), []byte{0, 0, 0, 0})
if err != nil {
return fmt.Errorf("failed to read softirq_lats: %w", err)
}
buf := bytes.NewReader(item)
if err = binary.Read(buf, binary.LittleEndian, &monsoftirqData); err != nil {
log.Errorf("can't read softirq_lats: %v", err)
return err
}
}
}
}

View File

@ -0,0 +1,62 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"github.com/prometheus/procfs"
"huatuo-bamai/internal/conf"
"huatuo-bamai/pkg/metric"
"huatuo-bamai/pkg/tracing"
)
type mountPointStatCollector struct{}
func init() {
tracing.RegisterEventTracing("mountpoint_perm", newMountPointStat)
}
func newMountPointStat() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &mountPointStatCollector{},
Flag: tracing.FlagMetric,
}, nil
}
func (c *mountPointStatCollector) Update() ([]*metric.Data, error) {
mountinfo, err := procfs.GetMounts()
if err != nil {
return nil, err
}
filter := newFieldFilter("", conf.Get().MetricCollector.MountPointStat.IncludedMountPoints)
metrics := []*metric.Data{}
for _, v := range mountinfo {
if filter.ignored(v.MountPoint) {
continue
}
mountTag := map[string]string{"mountpoint": v.MountPoint}
ro := 0
if _, ok := v.Options["ro"]; ok {
ro = 1
}
metrics = append(metrics,
metric.NewGaugeData("ro", float64(ro), "whether mountpoint is readonly or not", mountTag))
}
return metrics, nil
}

261
core/metrics/netdev.go Normal file
View File

@ -0,0 +1,261 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
// ref: https://github.com/prometheus/node_exporter/tree/master/collector
// - netdev_common.go
// - netdev_linuxt.go
import (
"fmt"
"os"
"path/filepath"
"strconv"
"huatuo-bamai/internal/conf"
"huatuo-bamai/internal/log"
"huatuo-bamai/internal/pod"
"huatuo-bamai/pkg/metric"
"huatuo-bamai/pkg/tracing"
"github.com/jsimonetti/rtnetlink"
"github.com/mdlayher/netlink"
"github.com/prometheus/procfs"
)
type (
netdevStats map[string]map[string]uint64
netdevCollector struct{}
)
func init() {
tracing.RegisterEventTracing("netdev", newNetdevCollector)
}
func newNetdevCollector() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &netdevCollector{},
Flag: tracing.FlagMetric,
}, nil
}
func (c *netdevCollector) Update() ([]*metric.Data, error) {
filter := newFieldFilter(conf.Get().MetricCollector.Netdev.IgnoredDevices,
conf.Get().MetricCollector.Netdev.AcceptDevices)
log.Debugf("Updating netdev metrics by filter: %v", filter)
// normal containers
containers, err := pod.GetNormalContainers()
if err != nil {
return nil, fmt.Errorf("GetNormalContainers: %w", err)
}
// support the empty container
if containers == nil {
containers = make(map[string]*pod.Container)
}
// append host into containers
containers[""] = nil
var metrics []*metric.Data
for _, container := range containers {
devStats, err := c.getStats(container, filter)
if err != nil {
return nil, fmt.Errorf("couldn't get netdev statistic for container %v: %w", container, err)
}
for dev, stats := range devStats {
for key, val := range stats {
tags := map[string]string{"device": dev}
if container != nil {
metrics = append(metrics,
metric.NewContainerGaugeData(container, key+"_total", float64(val), fmt.Sprintf("Network device statistic %s.", key), tags))
} else {
metrics = append(metrics,
metric.NewGaugeData(key+"_total", float64(val), fmt.Sprintf("Network device statistic %s.", key), tags))
}
}
}
}
log.Debugf("Updated netdev metrics by filter %v: %v", filter, metrics)
return metrics, nil
}
func (c *netdevCollector) getStats(container *pod.Container, filter *fieldFilter) (netdevStats, error) {
if conf.Get().MetricCollector.Netdev.EnableNetlink {
return c.netlinkStats(container, filter)
}
return c.procStats(container, filter)
}
func (c *netdevCollector) netlinkStats(container *pod.Container, filter *fieldFilter) (netdevStats, error) {
pid := 1 // host
if container != nil {
pid = container.InitPid
}
file, err := os.Open(filepath.Join("/proc", strconv.Itoa(pid), "ns/net"))
if err != nil {
return nil, err
}
defer file.Close()
conn, err := rtnetlink.Dial(&netlink.Config{NetNS: int(file.Fd())})
if err != nil {
return nil, err
}
defer conn.Close()
links, err := conn.Link.List()
if err != nil {
return nil, err
}
metrics := netdevStats{}
for _, msg := range links {
if msg.Attributes == nil {
log.Debug("No netlink attributes, skipping")
continue
}
name := msg.Attributes.Name
stats := msg.Attributes.Stats64
if stats32 := msg.Attributes.Stats; stats == nil && stats32 != nil {
stats = &rtnetlink.LinkStats64{
RXPackets: uint64(stats32.RXPackets),
TXPackets: uint64(stats32.TXPackets),
RXBytes: uint64(stats32.RXBytes),
TXBytes: uint64(stats32.TXBytes),
RXErrors: uint64(stats32.RXErrors),
TXErrors: uint64(stats32.TXErrors),
RXDropped: uint64(stats32.RXDropped),
TXDropped: uint64(stats32.TXDropped),
Multicast: uint64(stats32.Multicast),
Collisions: uint64(stats32.Collisions),
RXLengthErrors: uint64(stats32.RXLengthErrors),
RXOverErrors: uint64(stats32.RXOverErrors),
RXCRCErrors: uint64(stats32.RXCRCErrors),
RXFrameErrors: uint64(stats32.RXFrameErrors),
RXFIFOErrors: uint64(stats32.RXFIFOErrors),
RXMissedErrors: uint64(stats32.RXMissedErrors),
TXAbortedErrors: uint64(stats32.TXAbortedErrors),
TXCarrierErrors: uint64(stats32.TXCarrierErrors),
TXFIFOErrors: uint64(stats32.TXFIFOErrors),
TXHeartbeatErrors: uint64(stats32.TXHeartbeatErrors),
TXWindowErrors: uint64(stats32.TXWindowErrors),
RXCompressed: uint64(stats32.RXCompressed),
TXCompressed: uint64(stats32.TXCompressed),
RXNoHandler: uint64(stats32.RXNoHandler),
RXOtherhostDropped: 0,
}
}
if filter.ignored(name) {
log.Debugf("Ignoring device: %s", name)
continue
}
// Make sure we don't panic when accessing `stats` attributes below.
if stats == nil {
log.Debug("No netlink stats, skipping")
continue
}
// https://github.com/torvalds/linux/blob/master/include/uapi/linux/if_link.h#L42-L246
metrics[name] = map[string]uint64{
"receive_packets": stats.RXPackets,
"transmit_packets": stats.TXPackets,
"receive_bytes": stats.RXBytes,
"transmit_bytes": stats.TXBytes,
"receive_errors": stats.RXErrors,
"transmit_errors": stats.TXErrors,
"receive_dropped": stats.RXDropped,
"transmit_dropped": stats.TXDropped,
"multicast": stats.Multicast,
"collisions": stats.Collisions,
// detailed rx_errors
"receive_length_errors": stats.RXLengthErrors,
"receive_over_errors": stats.RXOverErrors,
"receive_crc_errors": stats.RXCRCErrors,
"receive_frame_errors": stats.RXFrameErrors,
"receive_fifo_errors": stats.RXFIFOErrors,
"receive_missed_errors": stats.RXMissedErrors,
// detailed tx_errors
"transmit_aborted_errors": stats.TXAbortedErrors,
"transmit_carrier_errors": stats.TXCarrierErrors,
"transmit_fifo_errors": stats.TXFIFOErrors,
"transmit_heartbeat_errors": stats.TXHeartbeatErrors,
"transmit_window_errors": stats.TXWindowErrors,
// for cslip etc
"receive_compressed": stats.RXCompressed,
"transmit_compressed": stats.TXCompressed,
"receive_nohandler": stats.RXNoHandler,
}
}
return metrics, nil
}
func (c *netdevCollector) procStats(container *pod.Container, filter *fieldFilter) (netdevStats, error) {
pid := 1 // host
if container != nil {
pid = container.InitPid
}
fs, err := procfs.NewProc(pid)
if err != nil {
return nil, fmt.Errorf("failed to open procfs: %w", err)
}
netdev, err := fs.NetDev()
if err != nil {
return nil, fmt.Errorf("failed to parse /proc/[%d]/net/dev: %w", pid, err)
}
metrics := netdevStats{}
for name := range netdev {
stats := netdev[name]
if filter.ignored(name) {
log.Debugf("Ignoring device: %s", name)
continue
}
metrics[name] = map[string]uint64{
"receive_bytes": stats.RxBytes,
"receive_packets": stats.RxPackets,
"receive_errors": stats.RxErrors,
"receive_dropped": stats.RxDropped,
"receive_fifo": stats.RxFIFO,
"receive_frame": stats.RxFrame,
"receive_compressed": stats.RxCompressed,
"receive_multicast": stats.RxMulticast,
"transmit_bytes": stats.TxBytes,
"transmit_packets": stats.TxPackets,
"transmit_errors": stats.TxErrors,
"transmit_dropped": stats.TxDropped,
"transmit_fifo": stats.TxFIFO,
"transmit_colls": stats.TxCollisions,
"transmit_carrier": stats.TxCarrier,
"transmit_compressed": stats.TxCompressed,
}
}
return metrics, nil
}

162
core/metrics/netstat.go Normal file
View File

@ -0,0 +1,162 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
// ref: https://github.com/prometheus/node_exporter/tree/master/collector
// - netstat_linux.go
import (
"bufio"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"huatuo-bamai/internal/conf"
"huatuo-bamai/internal/log"
"huatuo-bamai/internal/pod"
"huatuo-bamai/pkg/metric"
"huatuo-bamai/pkg/tracing"
)
type netstatCollector struct{}
func init() {
tracing.RegisterEventTracing("netstat", newNetstatCollector)
}
func newNetstatCollector() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &netstatCollector{},
Flag: tracing.FlagMetric,
}, nil
}
func (c *netstatCollector) Update() ([]*metric.Data, error) {
filter := newFieldFilter(conf.Get().MetricCollector.Netstat.ExcludedMetrics, conf.Get().MetricCollector.Netstat.IncludedMetrics)
log.Debugf("Updating netstat metrics by filter: %v", filter)
// normal containers
containers, err := pod.GetNormalContainers()
if err != nil {
return nil, fmt.Errorf("GetNormalContainers: %w", err)
}
// support the empty container
if containers == nil {
containers = make(map[string]*pod.Container)
}
// append host into containers
containers[""] = nil
var metrics []*metric.Data
for _, container := range containers {
m, err := c.getStatMetrics(container, filter)
if err != nil {
return nil, fmt.Errorf("couldn't get netstat metrics for container %v: %w", container, err)
}
metrics = append(metrics, m...)
}
log.Debugf("Updated netstat metrics by filter %v: %v", filter, metrics)
return metrics, nil
}
func (c *netstatCollector) getStatMetrics(container *pod.Container, filter *fieldFilter) ([]*metric.Data, error) {
pid := 1 // host
if container != nil {
pid = container.InitPid
}
pidProc := filepath.Join("/proc", strconv.Itoa(pid))
netStats, err := c.procNetstats(filepath.Join(pidProc, "net/netstat"))
if err != nil {
return nil, fmt.Errorf("couldn't get netstats for %v: %w", container, err)
}
snmpStats, err := c.procNetstats(filepath.Join(pidProc, "net/snmp"))
if err != nil {
return nil, fmt.Errorf("couldn't get SNMP stats for %v: %w", container, err)
}
// Merge the results of snmpStats into netStats (collisions are possible, but
// we know that the keys are always unique for the given use case).
for k, v := range snmpStats {
netStats[k] = v
}
var metrics []*metric.Data
for protocol, protocolStats := range netStats {
for name, value := range protocolStats {
key := protocol + "_" + name
v, err := strconv.ParseFloat(value, 64)
if err != nil {
return nil, fmt.Errorf("invalid value %s in netstats for %v: %w", value, container, err)
}
if filter.ignored(key) {
log.Debugf("Ignoring netstat metric %s", key)
continue
}
if container != nil {
metrics = append(metrics,
metric.NewContainerGaugeData(container, key, v, fmt.Sprintf("Statistic %s.", protocol+name), nil))
} else {
metrics = append(metrics,
metric.NewGaugeData(key, v, fmt.Sprintf("Statistic %s.", protocol+name), nil))
}
}
}
return metrics, nil
}
func (c *netstatCollector) procNetstats(fileName string) (map[string]map[string]string, error) {
file, err := os.Open(fileName)
if err != nil {
return nil, err
}
defer file.Close()
var (
netStats = map[string]map[string]string{}
scanner = bufio.NewScanner(file)
)
for scanner.Scan() {
nameParts := strings.Split(scanner.Text(), " ")
scanner.Scan()
valueParts := strings.Split(scanner.Text(), " ")
// Remove trailing :.
protocol := nameParts[0][:len(nameParts[0])-1]
// protocol: only for Tcp/TcpExt
if protocol != "Tcp" && protocol != "TcpExt" {
continue
}
netStats[protocol] = map[string]string{}
if len(nameParts) != len(valueParts) {
return nil, fmt.Errorf("mismatch field count mismatch in %s: %s",
fileName, protocol)
}
for i := 1; i < len(nameParts); i++ {
netStats[protocol][nameParts[i]] = valueParts[i]
}
}
return netStats, scanner.Err()
}

132
core/metrics/qdisc.go Normal file
View File

@ -0,0 +1,132 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
// ref: https://github.com/prometheus/node_exporter/tree/master/collector
// - qdisc_linux.go
import (
"huatuo-bamai/internal/conf"
"huatuo-bamai/pkg/metric"
"huatuo-bamai/pkg/tracing"
"github.com/ema/qdisc"
)
type qdiscStats struct {
ifaceName string
kind string
bytes uint64
packets uint32
drops uint32
requeues uint32
overlimits uint32
qlen uint32
backlog uint32
}
const tcHMajMask = 0xFFFF0000
type qdiscCollector struct{}
func init() {
tracing.RegisterEventTracing("qdisc", newQdiscCollector)
}
func newQdiscCollector() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &qdiscCollector{},
Flag: tracing.FlagMetric,
}, nil
}
// sum of same level(parent major) for a device, example:
// <device0> (1+2, 3)
// 1: qidsc <kind> handle0 parent0
// 2: qidsc <kind> handle1 parent0
// 3: qidsc <kind> handle2 parent1
//
// <device1> (1, 2+3)
// 1: qidsc <kind> handle0 parent0
// 2: qidsc <kind> handle1 parent1
// 3: qidsc <kind> handle2 parent1
func (c *qdiscCollector) Update() ([]*metric.Data, error) {
filter := newFieldFilter(conf.Get().MetricCollector.Qdisc.IgnoredDevices,
conf.Get().MetricCollector.Qdisc.AcceptDevices)
allQdisc, err := qdisc.Get()
if err != nil {
return nil, err
}
allQdiscMap := make(map[string]map[uint32]*qdiscStats)
for _, q := range allQdisc {
if filter.ignored(q.IfaceName) || q.Kind == "noqueue" {
continue
}
parentMaj := (q.Parent & tcHMajMask) >> 16
if _, ok := allQdiscMap[q.IfaceName]; !ok {
allQdiscMap[q.IfaceName] = make(map[uint32]*qdiscStats)
}
netQdisc, ok := allQdiscMap[q.IfaceName][parentMaj]
if !ok {
allQdiscMap[q.IfaceName][parentMaj] = &qdiscStats{
ifaceName: q.IfaceName,
kind: q.Kind,
bytes: q.Bytes,
packets: q.Packets,
drops: q.Drops,
requeues: q.Requeues,
overlimits: q.Overlimits,
qlen: q.Qlen,
backlog: q.Backlog,
}
} else {
netQdisc.bytes += q.Bytes
netQdisc.packets += q.Packets
netQdisc.drops += q.Drops
netQdisc.requeues += q.Requeues
netQdisc.overlimits += q.Overlimits
netQdisc.qlen += q.Qlen
netQdisc.backlog += q.Backlog
}
}
var metrics []*metric.Data
for _, netdevQdisc := range allQdiscMap {
for _, oneQdisc := range netdevQdisc {
tags := map[string]string{"device": oneQdisc.ifaceName, "kind": oneQdisc.kind}
metrics = append(metrics,
metric.NewGaugeData("bytes_total", float64(oneQdisc.bytes),
"Number of bytes sent.", tags),
metric.NewGaugeData("packets_total", float64(oneQdisc.packets),
"Number of packets sent.", tags),
metric.NewGaugeData("drops_total", float64(oneQdisc.drops),
"Number of packet drops.", tags),
metric.NewGaugeData("requeues_total", float64(oneQdisc.requeues),
"Number of packets dequeued, not transmitted, and requeued.", tags),
metric.NewGaugeData("overlimits_total", float64(oneQdisc.overlimits),
"Number of packet overlimits.", tags),
metric.NewGaugeData("current_queue_length", float64(oneQdisc.qlen),
"Number of packets currently in queue to be sent.", tags),
metric.NewGaugeData("backlog", float64(oneQdisc.backlog),
"Number of bytes currently in queue to be sent.", tags),
)
}
}
return metrics, nil
}

View File

@ -0,0 +1,82 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"fmt"
"reflect"
"huatuo-bamai/internal/pod"
"huatuo-bamai/pkg/metric"
"huatuo-bamai/pkg/tracing"
)
type runqlatCollector struct {
runqlatMetric []*metric.Data
}
func init() {
_ = pod.RegisterContainerLifeResources("runqlat", reflect.TypeOf(&latencyBpfData{}))
tracing.RegisterEventTracing("runqlat", newRunqlatCollector)
}
func newRunqlatCollector() (*tracing.EventTracingAttr, error) {
collector := &runqlatCollector{
runqlatMetric: []*metric.Data{
metric.NewGaugeData("g_nlat_01", 0, "nlat_01 of host", nil),
metric.NewGaugeData("g_nlat_02", 0, "nlat_02 of host", nil),
metric.NewGaugeData("g_nlat_03", 0, "nlat_03 of host", nil),
metric.NewGaugeData("g_nlat_04", 0, "nlat_04 of host", nil),
},
}
return &tracing.EventTracingAttr{
TracingData: collector,
Internal: 10,
Flag: tracing.FlagTracing | tracing.FlagMetric,
}, nil
}
func (c *runqlatCollector) Update() ([]*metric.Data, error) {
runqlatMetric := []*metric.Data{}
if !runqlatRunning {
return nil, nil
}
containers, err := pod.GetContainersByType(pod.ContainerTypeNormal)
if err != nil {
return nil, fmt.Errorf("GetContainersByType: %w", err)
}
for _, container := range containers {
metrics := container.LifeResouces("runqlat").(*latencyBpfData)
runqlatMetric = append(runqlatMetric,
metric.NewContainerGaugeData(container, "nlat_01", float64(metrics.NumLatency01), "nlat_01", nil),
metric.NewContainerGaugeData(container, "nlat_02", float64(metrics.NumLatency02), "nlat_02", nil),
metric.NewContainerGaugeData(container, "nlat_03", float64(metrics.NumLatency03), "nlat_03", nil),
metric.NewContainerGaugeData(container, "nlat_04", float64(metrics.NumLatency04), "nlat_04", nil))
}
c.runqlatMetric[0].Value = float64(globalRunqlat.NumLatency01)
c.runqlatMetric[1].Value = float64(globalRunqlat.NumLatency02)
c.runqlatMetric[2].Value = float64(globalRunqlat.NumLatency03)
c.runqlatMetric[3].Value = float64(globalRunqlat.NumLatency04)
runqlatMetric = append(runqlatMetric, c.runqlatMetric...)
return runqlatMetric, nil
}

View File

@ -0,0 +1,120 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"bytes"
"context"
"encoding/binary"
"fmt"
"time"
"huatuo-bamai/internal/bpf"
"huatuo-bamai/internal/log"
"huatuo-bamai/internal/pod"
"huatuo-bamai/internal/utils/bpfutil"
)
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/runqlat_tracing.c -o $BPF_DIR/runqlat_tracing.o
type latencyBpfData struct {
NumVoluntarySwitch uint64
NumInVoluntarySwitch uint64
NumLatency01 uint64
NumLatency02 uint64
NumLatency03 uint64
NumLatency04 uint64
}
var (
globalRunqlat latencyBpfData
runqlatRunning bool
)
func startRunqlatTracerWork(ctx context.Context) error {
// load bpf.
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
if err != nil {
return fmt.Errorf("failed to LoadBpf, err: %w", err)
}
defer b.Close()
if err = b.Attach(); err != nil {
return err
}
childCtx, cancel := context.WithCancel(ctx)
defer cancel()
b.WaitDetachByBreaker(childCtx, cancel)
runqlatRunning = true
for {
select {
case <-ctx.Done():
return nil
default:
var css uint64
items, err := b.DumpMapByName("cpu_tg_metric")
if err != nil {
return fmt.Errorf("failed to dump cpu_tg_metric: %w", err)
}
for _, v := range items {
buf := bytes.NewReader(v.Key)
if err = binary.Read(buf, binary.LittleEndian, &css); err != nil {
return fmt.Errorf("can't read cpu_tg_metric key: %w", err)
}
container, _ := pod.GetContainerByCSS(css, "cpu")
if container == nil {
continue
}
buf = bytes.NewReader(v.Value)
if err = binary.Read(buf, binary.LittleEndian, container.LifeResouces("runqlat").(*latencyBpfData)); err != nil {
return fmt.Errorf("can't read cpu_tg_metric value: %w", err)
}
}
item, err := b.ReadMap(b.MapIDByName("cpu_host_metric"), []byte{0, 0, 0, 0})
if err != nil {
return fmt.Errorf("failed to read cpu_host_metric: %w", err)
}
buf := bytes.NewReader(item)
if err = binary.Read(buf, binary.LittleEndian, &globalRunqlat); err != nil {
log.Errorf("can't read cpu_host_metric: %v", err)
return err
}
time.Sleep(2 * time.Second)
}
}
}
// Start runqlat work, load bpf and wait data form perfevent
func (c *runqlatCollector) Start(ctx context.Context) error {
err := startRunqlatTracerWork(ctx)
containers, _ := pod.GetContainersByType(pod.ContainerTypeNormal)
for _, container := range containers {
runqlatData := container.LifeResouces("runqlat").(*latencyBpfData)
*runqlatData = latencyBpfData{}
}
runqlatRunning = false
return err
}

122
core/metrics/runtime.go Normal file
View File

@ -0,0 +1,122 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"time"
"huatuo-bamai/internal/log"
"huatuo-bamai/internal/utils/cgrouputil"
"huatuo-bamai/internal/utils/parseutil"
"huatuo-bamai/pkg/metric"
"huatuo-bamai/pkg/tracing"
"github.com/prometheus/procfs"
)
const (
// CLK_TCK is a constant on Linux for all architectures except alpha and ia64.
// See e.g.
// https://git.musl-libc.org/cgit/musl/tree/src/conf/sysconf.c#n30
// https://github.com/containerd/cgroups/pull/12
// https://lore.kernel.org/lkml/agtlq6$iht$1@penguin.transmeta.com/
userHZ int64 = 100
)
type runtimeCollector struct {
oldStat *procfs.ProcStat
oldTs int64
}
func init() {
tracing.RegisterEventTracing("runtime", newQosCollector)
}
func newQosCollector() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &runtimeCollector{},
Flag: tracing.FlagMetric,
}, nil
}
func (c *runtimeCollector) Update() ([]*metric.Data, error) {
runtimeMetric := make([]*metric.Data, 0)
p, err := procfs.Self()
if err != nil {
return nil, err
}
runtimeMetric = append(runtimeMetric, getCPUMetric(c, &p)...)
runtimeMetric = append(runtimeMetric, getMemoryMetric(&p)...)
return runtimeMetric, nil
}
func getCPUMetric(c *runtimeCollector, p *procfs.Proc) []*metric.Data {
stat, err := p.Stat()
if err != nil {
log.Warnf("not get process stat: %v", err)
return nil
}
ts := time.Now().Unix()
if c.oldStat == nil {
c.oldStat = &stat
}
if c.oldTs == 0 {
c.oldTs = ts
return nil
}
data := make([]*metric.Data, 2)
duration := ts - c.oldTs
// huatuo-bamai.cpu.user(*100)
user := float64(stat.UTime-c.oldStat.UTime) / float64(userHZ*duration)
data[0] = metric.NewGaugeData("cpu_user", user*100, "user cpu", nil)
// huatuo-bamai.cpu.sys(*100)
sys := float64(stat.STime-c.oldStat.STime) / float64(userHZ*duration)
data[1] = metric.NewGaugeData("cpu_sys", sys*100, "sys cpu", nil)
// save stat
c.oldStat = &stat
c.oldTs = ts
return data
}
func getMemoryMetric(p *procfs.Proc) []*metric.Data {
data := make([]*metric.Data, 3)
status, err := p.NewStatus()
if err != nil {
log.Warnf("not get process status: %v", err)
return nil
}
data[0] = metric.NewGaugeData("memory_vss", float64(status.VmSize)/1024, "memory vss", nil)
data[1] = metric.NewGaugeData("memory_rss", float64(status.VmRSS)/1024, "memory rss", nil)
rssI, err := parseutil.ReadUint(cgrouputil.V1MemoryPath() + "/huatuo-bamai/memory.usage_in_bytes")
if err != nil {
log.Warnf("can't ParseUint, err: %v", err)
return nil
}
data[2] = metric.NewGaugeData("memory_cgroup_rss", float64(rssI)/1024, "memory cgroup rss", nil)
return data
}

188
core/metrics/sockstat.go Normal file
View File

@ -0,0 +1,188 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
// ref: https://github.com/prometheus/node_exporter/tree/master/collector
// - sockstat_linux.go
import (
"errors"
"fmt"
"os"
"path/filepath"
"strconv"
"huatuo-bamai/internal/log"
"huatuo-bamai/internal/pod"
"huatuo-bamai/pkg/metric"
"huatuo-bamai/pkg/tracing"
"github.com/prometheus/procfs"
)
type sockstatCollector struct{}
func init() {
tracing.RegisterEventTracing("sockstat", newSockstatCollector)
}
func newSockstatCollector() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &sockstatCollector{},
Flag: tracing.FlagMetric,
}, nil
}
func (c *sockstatCollector) Update() ([]*metric.Data, error) {
log.Debugf("Updating sockstat metrics")
// normal containers
containers, err := pod.GetNormalContainers()
if err != nil {
return nil, fmt.Errorf("GetNormalContainers: %w", err)
}
// support the empty container
if containers == nil {
containers = make(map[string]*pod.Container)
}
// append host into containers
containers[""] = nil
var metrics []*metric.Data
for _, container := range containers {
m, err := c.procStatMetrics(container)
if err != nil {
return nil, fmt.Errorf("couldn't get sockstat metrics for container %v: %w", container, err)
}
metrics = append(metrics, m...)
}
log.Debugf("Updated sockstat metrics: %v", metrics)
return metrics, nil
}
func (c *sockstatCollector) procStatMetrics(container *pod.Container) ([]*metric.Data, error) {
pid := 1 // host
if container != nil {
pid = container.InitPid
}
// NOTE: non-standard using procfs.NewFS.
fs, err := procfs.NewFS(filepath.Join("/proc", strconv.Itoa(pid)))
if err != nil {
return nil, fmt.Errorf("failed to open procfs: %w", err)
}
// If IPv4 and/or IPv6 are disabled on this kernel, handle it gracefully.
stat, err := fs.NetSockstat()
switch {
case err == nil:
case errors.Is(err, os.ErrNotExist):
log.Debug("IPv4 sockstat statistics not found, skipping")
default:
return nil, fmt.Errorf("failed to get IPv4 sockstat data: %w", err)
}
if stat == nil { // nothing to do.
return nil, nil
}
var metrics []*metric.Data
// If sockstat contains the number of used sockets, export it.
if stat.Used != nil {
if container != nil {
metrics = append(metrics,
metric.NewContainerGaugeData(container, "sockets_used", float64(*stat.Used), "Number of IPv4 sockets in use.", nil))
} else {
metrics = append(metrics,
metric.NewGaugeData("sockets_used", float64(*stat.Used), "Number of IPv4 sockets in use.", nil))
}
}
// A name and optional value for a sockstat metric.
type ssPair struct {
name string
v *int
}
// Previously these metric names were generated directly from the file output.
// In order to keep the same level of compatibility, we must map the fields
// to their correct names.
for i := range stat.Protocols {
p := stat.Protocols[i]
pairs := []ssPair{
{
name: "inuse",
v: &p.InUse,
},
{
name: "orphan",
v: p.Orphan,
},
{
name: "tw",
v: p.TW,
},
{
name: "alloc",
v: p.Alloc,
},
{
name: "mem",
v: p.Mem,
},
{
name: "memory",
v: p.Memory,
},
}
// Also export mem_bytes values for sockets which have a mem value
// stored in pages.
if p.Mem != nil {
v := *p.Mem * skMemQuantum
pairs = append(pairs, ssPair{
name: "mem_bytes",
v: &v,
})
}
for _, pair := range pairs {
if pair.v == nil {
// This value is not set for this protocol; nothing to do.
continue
}
// mem, mem_bytes are only for `Host` environment.
if container != nil && (pair.name == "mem" || pair.name == "mem_bytes") {
continue
}
if container != nil {
metrics = append(metrics,
metric.NewContainerGaugeData(container, fmt.Sprintf("%s_%s", p.Protocol, pair.name), float64(*pair.v),
fmt.Sprintf("Number of %s sockets in state %s.", p.Protocol, pair.name), nil))
} else {
metrics = append(metrics,
metric.NewGaugeData(fmt.Sprintf("%s_%s", p.Protocol, pair.name), float64(*pair.v),
fmt.Sprintf("Number of %s sockets in state %s.", p.Protocol, pair.name), nil))
}
}
}
return metrics, nil
}

103
core/metrics/tcpmem.go Normal file
View File

@ -0,0 +1,103 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"fmt"
"huatuo-bamai/internal/log"
"huatuo-bamai/pkg/metric"
"huatuo-bamai/pkg/tracing"
"github.com/prometheus/procfs"
)
const (
skMemQuantum = 4096
)
type tcpMemCollector struct {
tcpMemMetric []*metric.Data
}
func init() {
tracing.RegisterEventTracing("tcp_mem", newTCPMemCollector)
}
func newTCPMemCollector() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &tcpMemCollector{
tcpMemMetric: []*metric.Data{
metric.NewGaugeData("usage_pages", 0, "tcp mem usage(pages)", nil),
metric.NewGaugeData("usage_bytes", 0, "tcp mem usage(bytes)", nil),
metric.NewGaugeData("limit_pages", 0, "tcp mem limit(pages)", nil),
metric.NewGaugeData("usage_percent", 0, "tcp mem usage percent", nil),
},
},
Flag: tracing.FlagMetric,
}, nil
}
func (c *tcpMemCollector) getTCPMem() (tcpMem, tcpMemBytes, tcpMemLimit float64, err error) {
fs, err := procfs.NewDefaultFS()
if err != nil {
log.Infof("failed to open sysfs: %v", err)
return -1, -1, -1, err
}
values, err := fs.SysctlInts("net.ipv4.tcp_mem")
if err != nil {
log.Infof("error obtaining sysctl info: %v", err)
return -1, -1, -1, err
}
tcpMemLimit = float64(values[2])
stat4, err := fs.NetSockstat()
if err != nil {
log.Infof("failed to get NetSockstat: %v", err)
return -1, -1, -1, err
}
for _, p := range stat4.Protocols {
if p.Protocol != "TCP" {
continue
}
if p.Mem == nil {
return -1, -1, -1, fmt.Errorf("failed to read tcpmem usage")
}
tcpMem = float64(*p.Mem)
tcpMemBytes = float64(*p.Mem * skMemQuantum)
}
return tcpMem, tcpMemBytes, tcpMemLimit, nil
}
func (c *tcpMemCollector) Update() ([]*metric.Data, error) {
tcpMem, tcpMemBytes, tcpMemLimit, err := c.getTCPMem()
if err != nil {
log.Infof("couldn't get tcpmem: %v", err)
return nil, err
}
c.tcpMemMetric[0].Value = tcpMem
c.tcpMemMetric[1].Value = tcpMemBytes
c.tcpMemMetric[2].Value = tcpMemLimit
c.tcpMemMetric[3].Value = tcpMem / tcpMemLimit
return c.tcpMemMetric, nil
}

101
core/metrics/utils.go Normal file
View File

@ -0,0 +1,101 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"bytes"
"encoding/binary"
"io"
"os"
"huatuo-bamai/internal/log"
)
// xfs_util maps superblocks of XFS devices to retrieve
// essential information from superblock.
const (
XFS_SB_MAGIC = 0x58465342
XFSLABEL_MAX = 12
)
// Construct the XFS superblock, hiding unused variables
type xfsSuperBlock struct {
SbMagicnum uint32
SbBlocksize uint32
_ [16]byte
_ [7]uint64
_ [4]uint32
SbLogblocks uint32
_ [6]uint16
_ [XFSLABEL_MAX]byte
_ [12]uint8
_ [8]uint64
_ [12]uint32
_ [16]byte
}
func fileLineCounter(filePath string) (int, error) {
count := 0
buf := make([]byte, 8*20*4096)
file, err := os.Open(filePath)
if err != nil {
return count, err
}
defer file.Close()
r := io.Reader(file)
for {
c, err := r.Read(buf)
count += bytes.Count(buf[:c], []byte("\n"))
if err == io.EOF {
break
}
if err != nil {
return count, err
}
}
return count, nil
}
// Calculate the Xlog size from superblock
func xfsLogSize(path string) (float64, error) {
file, err := os.Open(path)
if err != nil {
log.Infof("open failed: %v", err)
return -1, err
}
defer file.Close()
var sb xfsSuperBlock
err = binary.Read(file, binary.BigEndian, &sb)
if err != nil {
log.Infof("read superblock failed: err%v", err)
return -1, err
}
// Check Magic Number of Super Block
if sb.SbMagicnum != XFS_SB_MAGIC {
log.Infof("Not a valid XFS superblock (Magic: 0x%x)", sb.SbMagicnum)
return -1, err
}
xlogBytes := float64(sb.SbLogblocks * sb.SbBlocksize)
return xlogBytes, nil
}

96
core/metrics/vmstat.go Normal file
View File

@ -0,0 +1,96 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"bufio"
"os"
"strconv"
"strings"
"huatuo-bamai/internal/conf"
"huatuo-bamai/internal/log"
"huatuo-bamai/pkg/metric"
"huatuo-bamai/pkg/tracing"
)
type vmStatCollector struct{}
func init() {
tracing.RegisterEventTracing("vmstat", newVMStatCollector)
}
var vmStatMetricDesc = map[string]string{
"allocstall_normal": "host direct reclaim count on normal zone",
"allocstall_movable": "host direct reclaim count on movable zone",
"compact_stall": "memory compaction count",
"nr_active_anon": "anonymous pages on active lru",
"nr_active_file": "file pages on active lru",
"nr_boost_pages": "kswapd boost pages",
"nr_dirty": "dirty pages",
"nr_free_pages": "free pages in buddy system",
"nr_inactive_anon": "anonymous pages on inactive lru",
"nr_inactive_file": "file pages on inactive lru",
"nr_kswapd_boost": "kswapd boosting count",
"nr_mlock": "mlocked pages",
"nr_shmem": "shared memory pages",
"nr_slab_reclaimable": "reclaimable slab pages",
"nr_slab_unreclaimable": "unreclaimable slab pages",
"nr_unevictable": "unevictable pages",
"nr_writeback": "writing-back pages",
"numa_pages_migrated": "numa migrated pages",
"pgdeactivate": "pages deactivated from active lru to inactive lru",
"pgrefill": "pages scanned on active lru",
"pgscan_direct": "scanned pages in host direct reclaim",
"pgscan_kswapd": "scanned pages in host kswapd reclaim",
"pgsteal_direct": "reclaimed pages in host direct reclaim",
"pgsteal_kswapd": "reclaimed pages in host kswapd reclaim",
}
func newVMStatCollector() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &vmStatCollector{},
Flag: tracing.FlagMetric,
}, nil
}
func (c *vmStatCollector) Update() ([]*metric.Data, error) {
filter := newFieldFilter(conf.Get().MetricCollector.Vmstat.ExcludedMetrics,
conf.Get().MetricCollector.Vmstat.IncludedMetrics)
file, err := os.Open("/proc/vmstat")
if err != nil {
log.Error("Fail to open vmstat")
return nil, err
}
defer file.Close()
scanner := bufio.NewScanner(file)
var metrics []*metric.Data
for scanner.Scan() {
parts := strings.Fields(scanner.Text())
if filter.ignored(parts[0]) {
log.Debugf("Ignoring vmstat metric: %s", parts[0])
continue
}
value, err := strconv.ParseFloat(parts[1], 64)
if err != nil {
log.Error("Fail to strconv")
return nil, err
}
metrics = append(metrics,
metric.NewGaugeData(parts[0], value, vmStatMetricDesc[parts[0]], nil))
}
return metrics, nil
}

136
docs/CUSTOM.md Normal file
View File

@ -0,0 +1,136 @@
[简体中文](./CUSTOM_CN.md) | English
HuaTuo framework provides three data collection modes: `autotracing`, `event`, and `metrics`, covering different monitoring scenarios, helping users gain comprehensive insights into system performance.
## Collection Mode Comparison
| Mode | Type | Trigger Condition | Data Output | Use Case |
|-----------------|----------------|-------------------|------------------|----------------|
| **Autotracing** | Event-driven | Triggered on system anomalies | ES + Local Storage, Prometheus (optional) | Non-routine operations, triggered on anomalies |
| **Event** | Event-driven | Continuously running, triggered on preset thresholds | ES + Local Storage, Prometheus (optional) | Continuous operations, directly dump context |
| **Metrics** | Metric collection | Passive collection | Prometheus format | Monitoring system metrics |
- **Autotracing**
- **Type**: Event-driven (tracing).
- **Function**: Automatically tracks system anomalies and dump context when anomalies occur.
- **Features**:
- When a system anomaly occurs, `autotracing` is triggered automatically to dump relevant context.
- Data is stored to ES in real-time and stored locally for subsequent analysis and troubleshooting. It can also be monitored in Prometheus format for statistics and alerts.
- Suitable for scenarios with high performance overhead, such as triggering captures when metrics exceed a threshold or rise too quickly.
- **Integrated Features**: CPU anomaly tracking (cpu idle), D-state tracking (dload), container contention (waitrate), memory burst allocation (memburst), disk anomaly tracking (iotracer).
- **Event**
- **Type**: Event-driven (tracing).
- **Function**: Continuously operates within the system context, directly dump context when preset thresholds are met.
- **Features**:
- Unlike `autotracing`, `event` continuously operates within the system context, rather than being triggered by anomalies.
- Data is also stored to ES and locally, and can be monitored in Prometheus format.
- Suitable for continuous monitoring and real-time analysis, enabling timely detection of abnormal behaviors. The performance impact of `event` collection is negligible.
- **Integrated Features**: Soft interrupt anomalies (softirq), memory allocation anomalies (oom), soft lockups (softlockup), D-state processes (hungtask), memory reclamation (memreclaim), packet droped abnormal (dropwatch), network ingress latency (netrecvlat).
- **Metrics**
- **Type**: Metric collection.
- **Function**: Collects performance metrics from subsystems.
- **Features**:
- Metric data can be sourced from regular procfs collection or derived from `tracing` (autotracing, event) data.
- Outputs in Prometheus format for easy integration into Prometheus monitoring systems.
- Unlike `tracing` data, `metrics` primarily focus on system performance metrics such as CPU usage, memory usage, and network traffic, etc.
- Suitable for monitoring system performance metrics, supporting real-time analysis and long-term trend observation.
- **Integrated Features**: CPU (sys, usr, util, load, nr_running, etc.), memory (vmstat, memory_stat, directreclaim, asyncreclaim, etc.), IO (d2c, q2c, freeze, flush, etc.), network (arp, socket mem, qdisc, netstat, netdev, sockstat, etc.).
## Multiple Purpose of Tracing Mode
Both `autotracing` and `event` belong to the **tracing** collection mode, offering the following dual purposes:
1. **Real-time storage to ES and local storage**: For tracing and analyzing anomalies, helping users quickly identify root causes.
2. **Output in Prometheus format**: As metric data integrated into Prometheus monitoring systems, providing comprehensive system monitoring capabilities.
By flexibly combining these three modes, users can comprehensively monitor system performance, capturing both contextual information during anomalies and continuous performance metrics to meet various monitoring needs.
# How to Add Custom Collection
The framework provides convenient APIs, including module startup, data storage, container information, BPF-related (load, attach, read, detach, unload), etc. You can implement custom collection logic and flexibly choose the appropriate collection mode and storage method.
## Tracing Type
Based on your scenarios, you can implement the `ITracingEvent` interface in the `core/autotracing` or `core/events` directory to complete tracing-type collection.
```go
// ITracingEvent represents a tracing/event
type ITracingEvent interface {
Start(ctx context.Context) error
}
```
example:
```go
type exampleTracing struct{}
// Register callback
func init() {
tracing.RegisterEventTracing("example", newExample)
}
// Create tracing
func newExample() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &exampleTracing{},
Internal: 10, // Interval for enable tracing again (in seconds)
Flag: tracing.FlagTracing, // mark as tracing type
}, nil
}
// Implement ITracingEvent
func (t *exampleTracing) Start(ctx context.Context) error {
// do something
...
// Save data to ES and local file
storage.Save("example", ccontainerID, time.Now(), tracerData)
}
// Implement Collector interface for Prometheus format output (optional)
func (c *exampleTracing) Update() ([]*metric.Data, error) {
// from tracerData to prometheus.Metric
...
return data, nil
}
```
## Metric Type
Implement the `Collector` interface in the path `core/metrics` to complete metric-type collection.
```go
type Collector interface {
// Get new metrics and expose them via prometheus registry.
Update() ([]*Data, error)
}
```
example:
```go
type exampleMetric struct{}
// Register callback
func init() {
tracing.RegisterEventTracing("example", newExample)
}
// Create Metric
func newExample() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &filenrCollector{
metric: []*metric.Data{
metric.NewGaugeData("name1", 0, "description of example_name1", nil),
metric.NewGaugeData("name2", 0, "description of example_name2", nil),
},
},
Flag: tracing.FlagMetric, // mark as Metric type
}, nil
}
// Implement Collector interface for Prometheus format output
func (c *exampleMetric) Update() ([]*metric.Data, error) {
// do something
...
return data, nil
}
```
The path `core` of the project includes multiple useful examples of the three collection modules, covering BPF code, map data interaction, container information, and more. For further details, refer to the corresponding code implementations.

136
docs/CUSTOM_CN.md Normal file
View File

@ -0,0 +1,136 @@
[English](./CUSTOM.md) | 简体中文
本框架提供三种数据采集模式:`autotracing`、`event` 和 `metrics`,分别针对不同的监控场景和需求,帮助用户全面掌握系统的运行状态。
## 采集模式对比
| 模式 | 类型 | 触发条件 | 数据输出 | 适用场景 |
|------------- |----------------|--------------|------------------|-----------------|
| **Autotracing** | 异常事件驱动 | 系统异常时触发 | ES + 本地存储Prometheus可选| 不能常态运行,异常时触发运行 |
| **Event** | 异常事件驱动 | 常态运行 | ES + 本地存储Prometheus可选| 常态运行,直接抓取上下文信息 |
| **Metrics** | 指标数据采集 | 被动采集 | Prometheus 格式 | 监控系统性能指标 |
- **Autotracing**
- **类型**异常事件驱动tracing
- **功能**:自动跟踪系统异常状态,并在异常发生时再触发抓取现场上下文信息。
- **特点**
- 当系统出现异常时,`autotracing` 会自动触发,捕获相关的上下文信息。
- 数据会实时上报到 ES 并存储在本地,便于后续分析和排查问题,也可通过 Prometheus 格式进行监控,便于统计和告警。
- 适用于获取现场时性能开销较大的场景,例如检测到指标上升到一定阈值、上升速度过快再触发抓取。
- **已集成**cpu 异常使用跟踪cpu idle、D状态跟踪dload、容器内外部争抢waitrate、内存突发分配memburst、磁盘异常跟踪iotracer
- **Event**
- **类型**异常事件驱动tracing
- **功能**:常态运行在系统上下文中,达到预设阈值直接抓取上下文信息。
- **特点**
- 与 `autotracing` 不同,`event` 是常态运行,而不是在异常时再触发。
- 数据同样会实时上报到 ES 并存储在本地,也可通过 Prometheus 格式进行监控。
- 适合用于常态监控和实时分析,能够及时发现系统中的异常行为, `event` 类型的采集对系统性能影响可忽略。
- **已集成**软中断异常softirq、内存异常分配oom、软锁定softlockup、D 状态进程hungtask、内存回收memreclaim、异常丢包dropwatch、网络入向延迟netrecvlat
- **Metrics**
- **类型**:指标数据采集。
- **功能**:采集各子系统的性能指标数据。
- **特点**
- 指标数据可以来自常规 procfs 采集,也可以从 `tracing` (autotracing,event) 类型获取数据。
- 以 Prometheus 格式输出,便于集成到 Prometheus 监控系统中。
- 与 `tracing` 类数据不同,`metrics` 主要用于采集系统的性能指标,如 CPU 使用率、内存使用率、网络等。
- 适合用于监控系统的性能指标,支持实时分析和长期趋势观察。
- **已集成**cpu (sys, usr, util, load, nr_running...), memoryvmstat, memory_stat, directreclaim, asyncreclaim..., IO(d2c, q2c, freeze, flush...), 网络arp, socket mem, qdisc, netstat, netdev, socketstat...
## Tracing 模式的多重用途
`autotracing``event` 都属于 **tracing** 类数据采集模式,它们具备以下双重用途:
1. **实时保存到 ES 和 本地存储**:用于异常事件的追踪和分析,帮助用户快速根因定位。
2. **以 Prometheus 格式输出**:作为指标数据集成到 Prometheus 监控系统中,提供更全面的系统监控能力。
通过这三种模式的灵活组合,用户可以全面监控系统的运行状态,既能捕获异常事件的上下文信息,也能持续采集性能指标数据,满足不同场景下的监控需求。
# 如何添加自定义采集
框架提供了非常便捷的 API包括模块启动、数据存储、容器信息、bpf 相关 load, attach, read, detach, unload用户可通过自定义的采集逻辑灵活选择合适的采集模式和数据存储的方式。
## tracing 类型
根据实际场景,你可以在 `core/autotracing``core/events` 目录下实现接口 `ITracingEvent` 即可完成 tracing 类型的采集。
```go
// ITracingEvent represents a tracing/event
type ITracingEvent interface {
Start(ctx context.Context) error
}
```
步骤如下:
```go
type exampleTracing struct{}
// 注册回调
func init() {
tracing.RegisterEventTracing("example", newExample)
}
// 创建 tracing
func newExample() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &exampleTracing{},
Internal: 10, // 再次开启 tracing 的间隔时间 seconds
Flag: tracing.FlagTracing, // 标记为 tracing 类型
}, nil
}
// 实现接口 ITracingEvent
func (t *exampleTracing) Start(ctx context.Context) error {
// do something
...
// 存储数据到 ES 和 本地
storage.Save("example", ccontainerID, time.Now(), tracerData)
}
// 也可同时实现接口 Collector 以 Prometheus 格式输出 (可选)
func (c *exampleTracing) Update() ([]*metric.Data, error) {
// from tracerData to prometheus.Metric
...
return data, nil
}
```
## Metric 类型
`core/metrics` 目录下添加接口 `Collector` 的实现即可完成 Metric 类型的采集。
```go
type Collector interface {
// Get new metrics and expose them via prometheus registry.
Update() ([]*Data, error)
}
```
步骤如下:
```go
type exampleMetric struct{}
// 注册回调
func init() {
tracing.RegisterEventTracing("example", newExample)
}
// 创建 Metric
func newExample() (*tracing.EventTracingAttr, error) {
return &tracing.EventTracingAttr{
TracingData: &filenrCollector{
metric: []*metric.Data{
metric.NewGaugeData("name1", 0, "description of example_name1", nil),
metric.NewGaugeData("name2", 0, "description of example_name2", nil),
},
},
Flag: tracing.FlagMetric, // 标记为 Metric 类型
}, nil
}
// 实现接口 Collector 以 Prometheus 格式输出
func (c *exampleMetric) Update() ([]*metric.Data, error) {
// do something
...
return data, nil
}
```
在项目 core 目录下已集成了 3 个采集模块的多种实际场景的示例,包括 bpf 代码、map 数据交互、容器信息等,更多详情可参考对应代码实现。

1
docs/architecture.svg Normal file

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 630 KiB

1
docs/huatuo-arch.svg Normal file

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 629 KiB

BIN
docs/huatuo-cluster00.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 883 KiB

BIN
docs/huatuo-cluster01.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 107 KiB

BIN
docs/huatuo-dropwatch.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 322 KiB

BIN
docs/huatuo-netlatency.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 174 KiB

BIN
docs/huatuo-profiling.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 MiB

270
docs/metrics.md Normal file
View File

@ -0,0 +1,270 @@
| Subsystem | Metric | Description | Unit | Dimension | Source |
| --------- | ------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------- | -------------- | ------------------------------------------------------------------------------------- |
| cpu | cpu_util_sys | Time of running kernel processes percentage of host | % | host | Calculate base on cpuacct.stat and cpuacct.usage |
| cpu | cpu_util_usr | Time of running user processes percentage of host | % | host | Calculate base on cpuacct.stat and cpuacct.usage |
| cpu | cpu_util_total | Total time of running percentage of host | % | host | Calculate base on cpuacct.stat and cpuacct.usage |
| cpu | cpu_util_container_sys | Time of running kernel processes percentage of container | % | container | Calculate base on cpuacct.stat and cpuacct.usage |
| cpu | cpu_util_container_usr | Time of running user processes percentage of container | % | container | Calculate base on cpuacct.stat and cpuacct.usage |
| cpu | cpu_util_container_total | Total time of running percentage of container | % | container | Calculate base on cpuacct.stat and cpuacct.usage |
| cpu | cpu_stat_container_burst_time | Cumulative wall-time (in nanoseconds) that any CPUs has used above quota in respective periods | ns | container | cpu.stat |
| cpu | cpu_stat_container_nr_bursts | Number of periods burst occurs | count | container | cpu.stat |
| cpu | cpu_stat_container_nr_throttled | Number of times the group has been throttled/limited | count | container | cpu.stat |
| cpu | cpu_stat_container_exter_wait_rate | Wait rate caused by processes outside the container | % | container | Calculate base on throttled_time/hierarchy_wait_sum/inner_wait_sum read from cpu.stat |
| cpu | cpu_stat_container_inner_wait_rate | Wait rate caused by processes inside the container | % | container | Calculate base on throttled_time/hierarchy_wait_sum/inner_wait_sum read from cpu.stat |
| cpu | cpu_stat_container_throttle_wait_rate | Wait rate caused by throttle of container | % | container | Calculate base on throttled_time/hierarchy_wait_sum/inner_wait_sum read from cpu.stat |
| cpu | cpu_stat_container_wait_rate | Total wait rate: exter_wait_rate + inner_wait_rate + throttle_wait_rate | % | container | Calculate base on throttled_time/hierarchy_wait_sum/inner_wait_sum read from cpu.stat |
| cpu | loadavg_container_container_nr_running | The number of running tasks in the container | count | container | get from kernel via netlink |
| cpu | loadavg_container_container_nr_uninterruptible | The number of uninterruptible tasks in the container | count | container | get from kernel via netlink |
| cpu | loadavg_load1 | System load avg over the last 1 minute | count | host | proc fs |
| cpu | loadavg_load5 | System load avg over the last 5 minute | count | host | proc fs |
| cpu | loadavg_load15 | system load avg over the last 15 minute | count | host | proc fs |
| cpu | monsoftirq_latency | The number of NET_RX/NET_TX irq latency happend in the following regions:<br>0~10 us<br>100us ~ 1ms<br>10us ~ 100us<br>1ms ~ inf | count | host | hook the softirq event and do time statistics via bpf |
| cpu | runqlat_container_nlat_01 | The number of times when schedule latency of processes in the container is within 0~10ms | count | container | hook the scheduling switch event and do time statistics via bpf |
| cpu | runqlat_container_nlat_02 | The number of times when schedule latency of processes in the container is within 10~20ms | count | container | hook the scheduling switch event and do time statistics via bpf |
| cpu | runqlat_container_nlat_03 | The number of times when schedule latency of processes in the container is within 20~50ms | count | container | hook the scheduling switch event and do time statistics via bpf |
| cpu | runqlat_container_nlat_04 | The number of times when schedule latency of processes in the container is more than 50ms | count | container | hook the scheduling switch event and do time statistics via bpf |
| cpu | runqlat_g_nlat_01 | The number of times when schedule latency of processes in the host is within<br>0~10ms | count | host | hook the scheduling switch event and do time statistics via bpf |
| cpu | runqlat_g_nlat_02 | The number of times when schedule latency of processes in the host is within 10~20ms | count | host | hook the scheduling switch event and do time statistics via bpf |
| cpu | runqlat_g_nlat_03 | The number of times when schedule latency of processes in the host is within 20~50ms | count | host | hook the scheduling switch event and do time statistics via bpf |
| cpu | runqlat_g_nlat_04 | The number of times when schedule latency of processes in the host is more than 50ms | count | host | hook the scheduling switch event and do time statistics via bpf |
| cpu | reschedipi_oversell_probability | The possibility of cpu overselling exists on the host where the vm is located | 0-1 | host | hook the scheduling ipi event and do time statistics via bpf |
| memory | buddyinfo_blocks | Kernel memory allocator information | pages | host | proc fs |
| memory | memory_events_container_watermark_inc | Counts of memory allocation watermark increasing | count | container | memory.events |
| memory | memory_events_container_watermark_dec | Counts of memory allocation watermark decreasing | count | container | memory.events |
| memory | memory_others_container_local_direct_reclaim_time | Time speed in page allocation in memory cgroup | nanosecond | container | memory.local_direct_reclaim_time |
| memory | memory_others_container_directstall_time | Memory cgroup's direct reclaim time in try_charge | nanosecond | container | memory.directstall_stat |
| memory | memory_others_container_asyncreclaim_time | Memory cgroup's direct reclaim time in cgroup async memory reclaim | nanosecond | container | memory.asynreclaim_stat |
| memory | priority_reclaim_kswapd | Kswapd's reclaim stat in priority reclaiming | pages | host | proc fs |
| memory | priority_reclaim_direct | Direct reclaim stat in priority reclaiming | pages | host | proc fs |
| memory | memory_stat_container_writeback | Bytes of file/anon cache that are queued for syncing to disk | bytes | container | memory.stat |
| memory | memory_stat_container_unevictable | Bytes of memory that cannot be reclaimed (mlocked etc) | bytes | container | memory.stat |
| memory | memory_stat_container_shmem | Bytes of shmem memory | bytes | container | memory.stat |
| memory | memory_stat_container_pgsteal_kswapd | Bytes of reclaimed memory by kswapd and cswapd | bytes | container | memory.stat |
| memory | memory_stat_container_pgsteal_globalkswapd | Bytes of reclaimed memory by kswapd | bytes | container | memory.stat |
| memory | memory_stat_container_pgsteal_globaldirect | Bytes of reclaimed memory by direct reclaim during page allocation | bytes | container | memory.stat |
| memory | memory_stat_container_pgsteal_direct | Bytes of reclaimed memory by direct reclaim during page allocation and try_charge | bytes | container | memory.stat |
| memory | memory_stat_container_pgsteal_cswapd | Bytes of reclaimed memory by cswapd | bytes | container | memory.stat |
| memory | memory_stat_container_pgscan_kswapd | Bytes of scanned memory by kswapd and cswapd | bytes | container | memory.stat |
| memory | memory_stat_container_pgscan_globalkswapd | Bytes of scanned memory by kswapd | bytes | container | memory.stat |
| memory | memory_stat_container_pgscan_globaldirect | Bytes of scanned memory by direct reclaim during page allocation | bytes | container | memory.stat |
| memory | memory_stat_container_pgscan_direct | Bytes of scanned memory by direct reclaim during page allocation and try_charge | bytes | container | memory.stat |
| memory | memory_stat_container_pgscan_cswapd | Bytes of scanned memory by cswapd | bytes | container | memory.stat |
| memory | memory_stat_container_pgrefill | Bytes of memory that is scanned in active list | bytes | container | memory.stat |
| memory | memory_stat_container_pgdeactivate | Bytes of memory that is deactivated into inactive list | bytes | container | memory.stat |
| memory | memory_stat_container_inactive_file | Bytes of file-backed memory on inactive lru list. | bytes | container | memory.stat |
| memory | memory_stat_container_inactive_anon | Bytes of anonymous and swap cache memory on inactive lru list | bytes | container | memory.stat |
| memory | memory_stat_container_dirty | Bytes that are waiting to get written back to the disk | bytes | container | memory.stat |
| memory | memory_stat_container_active_file | Bytes of file-backed memory on active lru list | bytes | container | memory.stat |
| memory | memory_stat_container_active_anon | Bytes of anonymous and swap cache memory on active lru list | bytes | container | memory.stat |
| memory | mountpoint_perm_ro | Whether mountpoint is readonly or not | bool | host | proc fs |
| memory | vmstat_allocstall_normal | Host direct reclaim count on normal zone | count | host | /proc/vmstat |
| memory | vmstat_allocstall_movable | Host direct reclaim count on movable zone | count | host | /proc/vmstat |
| memory | vmstat_compact_stall | Count of memory compaction | count | host | /proc/vmstat |
| memory | vmstat_nr_active_anon | Number of anonymous pages on active lru | pages | host | /proc/vmstat |
| memory | vmstat_nr_active_file | Number of file-backed pages on active lru | pages | host | /proc/vmstat |
| memory | vmstat_nr_boost_pages | Number of pages in kswapd boosting | pages | host | /proc/vmstat |
| memory | vmstat_nr_dirty | Number of dirty pages | pages | host | /proc/vmstat |
| memory | vmstat_nr_free_pages | Number of free pages | pages | host | /proc/vmstat |
| memory | vmstat_nr_inactive_anon | Number of anonymous pages on inactive lru | pages | host | /proc/vmstat |
| memory | vmstat_nr_inactive_file | Number of file-backed pages on inactive lru | pages | host | /proc/vmstat |
| memory | vmstat_nr_kswapd_boost | Count of kswapd boosting | pages | host | /proc/vmstat |
| memory | vmstat_nr_mlock | Number of locked pages | pages | host | /proc/vmstat |
| memory | vmstat_nr_shmem | Number of shmem pages | pages | host | /proc/vmstat |
| memory | vmstat_nr_slab_reclaimable | Number of relcaimable slab pages | pages | host | /proc/vmstat |
| memory | vmstat_nr_slab_unreclaimable | Number of unrelcaimable slab pages | pages | host | /proc/vmstat |
| memory | vmstat_nr_unevictable | Number of unevictable pages | pages | host | /proc/vmstat |
| memory | vmstat_nr_writeback | Number of writebacking pages | pages | host | /proc/vmstat |
| memory | vmstat_numa_pages_migrated | Number of pages in numa migrating | pages | host | /proc/vmstat |
| memory | vmstat_pgdeactivate | Number of pages which are deactivated into inactive lru | pages | host | /proc/vmstat |
| memory | vmstat_pgrefill | Number of pages which are scanned on active lru | pages | host | /proc/vmstat |
| memory | vmstat_pgscan_direct | Number of pages which are scanned in direct reclaim | pages | host | /proc/vmstat |
| memory | vmstat_pgscan_kswapd | Number of pages which are scanned in kswapd reclaim | pages | host | /proc/vmstat |
| memory | vmstat_pgsteal_direct | Number of pages which are reclaimed in direct reclaim | pages | host | /proc/vmstat |
| memory | vmstat_pgsteal_kswapd | Number of pages which are reclaimed in kswapd reclaim | pages | host | /proc/vmstat |
| memory | hungtask_happened | Count of hungtask events | count | host | performance and statistics monitoring for BPF Programs |
| memory | oom_happened | Count of oom events | count | host,container | performance and statistics monitoring for BPF Programs |
| memory | softlockup_happened | Count of softlockup events | count | host | performance and statistics monitoring for BPF Programs |
| memory | mmhostbpf_compactionstat | Time speed in memory compaction | nanosecond | host | performance and statistics monitoring for BPF Programs |
| memory | mmhostbpf_allocstallstat | Time speed in memory direct reclaim on host | nanosecond | host | performance and statistics monitoring for BPF Programs |
| memory | mmcgroupbpf_container_directstallcount | Count of cgroup's try_charge direct reclaim | count | container | performance and statistics monitoring for BPF Programs |
| IO | iolatency_disk_d2c | Statistics of io latency when accessing the disk, including the time consumed by the driver and hardware components | count | host | performance and statistics monitoring for BPF Programs |
| IO | iolatency_disk_q2c | Statistics of io latency for the entire io lifecycle when accessing the disk | count | host | performance and statistics monitoring for BPF Programs |
| IO | iolatency_container_d2c | Statistics of io latency when accessing the disk, including the time consumed by the driver and hardware components | count | container | performance and statistics monitoring for BPF Programs |
| IO | iolatency_container_q2c | Statistics of io latency for the entire io lifecycle when accessing the disk | count | container | performance and statistics monitoring for BPF Programs |
| IO | iolatency_disk_flush | Statistics of delay for flush operations on disk raid device | count | host | performance and statistics monitoring for BPF Programs |
| IO | iolatency_container_flush | Statistics of delay for flush operations on disk raid devices caused by containers | count | container | performance and statistics monitoring for BPF Programs |
| IO | iolatency_disk_freeze | Statistics of disk freeze events | count | host | performance and statistics monitoring for BPF Programs |
| network | tcp_mem_limit_pages | System TCP total memory size limit | pages | system | proc fs |
| network | tcp_mem_usage_bytes | The total number of bytes of TCP memory used by the system | bytes | system | tcp_mem_usage_pages \* page_size |
| network | tcp_mem_usage_pages | The total size of TCP memory used by the system | pages | system | proc fs |
| network | tcp_mem_usage_percent | The percentage of TCP memory used by the system to the limit size | % | system | tcp_mem_usage_pages / tcp_mem_limit_pages |
| network | arp_entries | The number of arp cache entries | count | host,container | proc fs |
| network | arp_total | Total number of arp cache entries | count | system | proc fs |
| network | qdisc_backlog | The number of bytes queued to be sent | bytes | host | sum of same level(parent major) for a device |
| network | qdisc_bytes_total | The number of bytes sent | bytes | host | sum of same level(parent major) for a device |
| network | qdisc_current_queue_length | The number of packets queued for sending | count | host | sum of same level(parent major) for a device |
| network | qdisc_drops_total | The number of discarded packets | count | host | sum of same level(parent major) for a device |
| network | qdisc_overlimits_total | The number of queued packets exceeds the limit | count | host | sum of same level(parent major) for a device |
| network | qdisc_packets_total | The number of packets sent | count | host | sum of same level(parent major) for a device |
| network | qdisc_requeues_total | The number of packets that were not sent successfully and were requeued | count | host | sum of same level(parent major) for a device |
| network | ethtool_hardware_rx_dropped_errors | Statistics of inbound packet droped or errors of interface | count | host | related to hardware drivers, such as mlx, ixgbe, bnxt_en, etc. |
| network | netdev_receive_bytes_total | Number of good received bytes | bytes | host,container | proc fs |
| network | netdev_receive_compressed_total | Number of correctly received compressed packets | count | host,container | proc fs |
| network | netdev_receive_dropped_total | Number of packets received but not processed | count | host,container | proc fs |
| network | netdev_receive_errors_total | Total number of bad packets received on this network device | count | host,container | proc fs |
| network | netdev_receive_fifo_total | Receiver FIFO error counter | count | host,container | proc fs |
| network | netdev_receive_frame_total | Receiver frame alignment errors | count | host,container | proc fs |
| network | netdev_receive_multicast_total | Multicast packets received. For hardware interfaces this statistic is commonly calculated at the device level (unlike rx_packets) and therefore may include packets which did not reach the host | count | host,container | proc fs |
| network | netdev_receive_packets_total | Number of good packets received by the interface | count | host,container | proc fs |
| network | netdev_transmit_bytes_total | Number of good transmitted bytes, corresponding to tx_packets | bytes | host,container | proc fs |
| network | netdev_transmit_carrier_total | Number of frame transmission errors due to loss of carrier during transmission | count | host,container | proc fs |
| network | netdev_transmit_colls_total | Number of collisions during packet transmissions | count | host,container | proc fs |
| network | netdev_transmit_compressed_total | Number of transmitted compressed packets | count | host,container | proc fs |
| network | netdev_transmit_dropped_total | Number of packets dropped on their way to transmission, e.g. due to lack of resources | count | host,container | proc fs |
| network | netdev_transmit_errors_total | Total number of transmit problems | count | host,container | proc fs |
| network | netdev_transmit_fifo_total | Number of frame transmission errors due to device FIFO underrun / underflow | count | host,container | proc fs |
| network | netdev_transmit_packets_total | Number of packets successfully transmitted | count | host,container | proc fs |
| network | netstat_TcpExt_ArpFilter | \- | count | host,container | proc fs |
| network | netstat_TcpExt_BusyPollRxPackets | \- | count | host,container | proc fs |
| network | netstat_TcpExt_DelayedACKLocked | A delayed ACK timer expires, but the TCP stack cant send an ACK immediately due to the socket is locked by a userspace program. The TCP stack will send a pure ACK later (after the userspace program unlock the socket). When the TCP stack sends the pure ACK later, the TCP stack will also update TcpExtDelayedACKs and exit the delayed ACK mode | count | host,container | proc fs |
| network | netstat_TcpExt_DelayedACKLost | It will be updated when the TCP stack receives a packet which has been ACKed. A Delayed ACK loss might cause this issue, but it would also be triggered by other reasons, such as a packet is duplicated in the network | count | host,container | proc fs |
| network | netstat_TcpExt_DelayedACKs | A delayed ACK timer expires. The TCP stack will send a pure ACK packet and exit the delayed ACK mode | count | host,container | proc fs |
| network | netstat_TcpExt_EmbryonicRsts | \- | count | host,container | proc fs |
| network | netstat_TcpExt_IPReversePathFilter | \- | count | host,container | proc fs |
| network | netstat_TcpExt_ListenDrops | When kernel receives a SYN from a client, and if the TCP accept queue is full, kernel will drop the SYN and add 1 to TcpExtListenOverflows. At the same time kernel will also add 1 to TcpExtListenDrops. When a TCP socket is in LISTEN state, and kernel need to drop a packet, kernel would always add 1 to TcpExtListenDrops. So increase TcpExtListenOverflows would let TcpExtListenDrops increasing at the same time, but TcpExtListenDrops would also increase without TcpExtListenOverflows increasing, e.g. a memory allocation fail would also let TcpExtListenDrops increase | count | host,container | proc fs |
| network | netstat_TcpExt_ListenOverflows | When kernel receives a SYN from a client, and if the TCP accept queue is full, kernel will drop the SYN and add 1 to TcpExtListenOverflows. At the same time kernel will also add 1 to TcpExtListenDrops. When a TCP socket is in LISTEN state, and kernel need to drop a packet, kernel would always add 1 to TcpExtListenDrops. So increase TcpExtListenOverflows would let TcpExtListenDrops increasing at the same time, but TcpExtListenDrops would also increase without TcpExtListenOverflows increasing, e.g. a memory allocation fail would also let TcpExtListenDrops increase | count | host,container | proc fs |
| network | netstat_TcpExt_LockDroppedIcmps | \- | count | host,container | proc fs |
| network | netstat_TcpExt_OfoPruned | The TCP stack tries to discard packet on the out of order queue | count | host,container | proc fs |
| network | netstat_TcpExt_OutOfWindowIcmps | \- | count | host,container | proc fs |
| network | netstat_TcpExt_PAWSActive | Packets are dropped by PAWS in Syn-Sent status | count | host,container | proc fs |
| network | netstat_TcpExt_PAWSEstab | Packets are dropped by PAWS in any status other than Syn-Sent | count | host,container | proc fs |
| network | netstat_TcpExt_PFMemallocDrop | \- | count | host,container | proc fs |
| network | netstat_TcpExt_PruneCalled | The TCP stack tries to reclaim memory for a socket. After updates this counter, the TCP stack will try to collapse the out of order queue and the receiving queue. If the memory is still not enough, the TCP stack will try to discard packets from the out of order queue (and update the TcpExtOfoPruned counter) | count | host,container | proc fs |
| network | netstat_TcpExt_RcvPruned | After collapse and discard packets from the out of order queue, if the actually used memory is still larger than the max allowed memory, this counter will be updated. It means the prune fails | count | host,container | proc fs |
| network | netstat_TcpExt_SyncookiesFailed | The MSS decoded from the SYN cookie is invalid. When this counter is updated, the received packet wont be treated as a SYN cookie and the TcpExtSyncookiesRecv counter wont be updated | count | host,container | proc fs |
| network | netstat_TcpExt_SyncookiesRecv | How many reply packets of the SYN cookies the TCP stack receives | count | host,container | proc fs |
| network | netstat_TcpExt_SyncookiesSent | It indicates how many SYN cookies are sent | count | host,container | proc fs |
| network | netstat_TcpExt_TCPACKSkippedChallenge | The ACK is skipped if the ACK is a challenge ACK | count | host,container | proc fs |
| network | netstat_TcpExt_TCPACKSkippedFinWait2 | The ACK is skipped in Fin-Wait-2 status, the reason would be either PAWS check fails or the received sequence number is out of window | count | host,container | proc fs |
| network | netstat_TcpExt_TCPACKSkippedPAWS | The ACK is skipped due to PAWS (Protect Against Wrapped Sequence numbers) check fails | count | host,container | proc fs |
| network | netstat_TcpExt_TCPACKSkippedSeq | The sequence number is out of window and the timestamp passes the PAWS check and the TCP status is not Syn-Recv, Fin-Wait-2, and Time-Wait | count | host,container | proc fs |
| network | netstat_TcpExt_TCPACKSkippedSynRecv | The ACK is skipped in Syn-Recv status. The Syn-Recv status means the TCP stack receives a SYN and replies SYN+ACK | count | host,container | proc fs |
| network | netstat_TcpExt_TCPACKSkippedTimeWait | The ACK is skipped in Time-Wait status, the reason would be either PAWS check failed or the received sequence number is out of window | count | host,container | proc fs |
| network | netstat_TcpExt_TCPAbortFailed | The kernel TCP layer will send RST if the RFC2525 2.17 section is satisfied. If an internal error occurs during this process, TcpExtTCPAbortFailed will be increased | count | host,container | proc fs |
| network | netstat_TcpExt_TCPAbortOnClose | Number of sockets closed when the user-mode program has data in the buffer | count | host,container | proc fs |
| network | netstat_TcpExt_TCPAbortOnData | It means TCP layer has data in flight, but need to close the connection | count | host,container | proc fs |
| network | netstat_TcpExt_TCPAbortOnLinger | When a TCP connection comes into FIN_WAIT_2 state, instead of waiting for the fin packet from the other side, kernel could send a RST and delete the socket immediately | count | host,container | proc fs |
| network | netstat_TcpExt_TCPAbortOnMemory | When an application closes a TCP connection, kernel still need to track the connection, let it complete the TCP disconnect process | count | host,container | proc fs |
| network | netstat_TcpExt_TCPAbortOnTimeout | This counter will increase when any of the TCP timers expire. In such situation, kernel wont send RST, just give up the connection | count | host,container | proc fs |
| network | netstat_TcpExt_TCPAckCompressed | \- | count | host,container | proc fs |
| network | netstat_TcpExt_TCPAutoCorking | When sending packets, the TCP layer will try to merge small packets to a bigger one | count | host,container | proc fs |
| network | netstat_TcpExt_TCPBacklogDrop | \- | count | host,container | proc fs |
| network | netstat_TcpExt_TCPChallengeACK | The number of challenge acks sent | count | host,container | proc fs |
| network | netstat_TcpExt_TCPDSACKIgnoredNoUndo | When a DSACK block is invalid, one of these two counters would be updated. Which counter will be updated depends on the undo_marker flag of the TCP socket | count | host,container | proc fs |
| network | netstat_TcpExt_TCPDSACKIgnoredOld | When a DSACK block is invalid, one of these two counters would be updated. Which counter will be updated depends on the undo_marker flag of the TCP socket | count | host,container | proc fs |
| network | netstat_TcpExt_TCPDSACKOfoRecv | The TCP stack receives a DSACK, which indicate an out of order duplicate packet is received | count | host,container | proc fs |
| network | netstat_TcpExt_TCPDSACKOfoSent | The TCP stack receives an out of order duplicate packet, so it sends a DSACK to the sender | count | host,container | proc fs |
| network | netstat_TcpExt_TCPDSACKOldSent | The TCP stack receives a duplicate packet which has been acked, so it sends a DSACK to the sender | count | host,container | proc fs |
| network | netstat_TcpExt_TCPDSACKRecv | The TCP stack receives a DSACK, which indicates an acknowledged duplicate packet is received | count | host,container | proc fs |
| network | netstat_TcpExt_TCPDSACKUndo | \- | count | host,container | proc fs |
| network | netstat_TcpExt_TCPDeferAcceptDrop | \- | count | host,container | proc fs |
| network | netstat_TcpExt_TCPDelivered | \- | count | host,container | proc fs |
| network | netstat_TcpExt_TCPDeliveredCE | \- | count | host,container | proc fs |
| network | netstat_TcpExt_TCPFastOpenActive | When the TCP stack receives an ACK packet in the SYN-SENT status, and the ACK packet acknowledges the data in the SYN packet, the TCP stack understand the TFO cookie is accepted by the other side, then it updates this counter | count | host,container | proc fs |
| network | netstat_TcpExt_TCPFastOpenActiveFail | Fast Open attempts (SYN/data) failed because the remote does not accept it or the attempts timed out | count | host,container | proc fs |
| network | netstat_TcpExt_TCPFastOpenBlackhole | \- | count | host,container | proc fs |
| network | netstat_TcpExt_TCPFastOpenCookieReqd | This counter indicates how many times a client wants to request a TFO cookie | count | host,container | proc fs |
| network | netstat_TcpExt_TCPFastOpenListenOverflow | When the pending fast open request number is larger than fastopenq->max_qlen, the TCP stack will reject the fast open request and update this counter | count | host,container | proc fs |
| network | netstat_TcpExt_TCPFastOpenPassive | This counter indicates how many times the TCP stack accepts the fast open request | count | host,container | proc fs |
| network | netstat_TcpExt_TCPFastOpenPassiveFail | This counter indicates how many times the TCP stack rejects the fast open request. It is caused by either the TFO cookie is invalid or the TCP stack finds an error during the socket creating process | count | host,container | proc fs |
| network | netstat_TcpExt_TCPFastRetrans | The TCP stack wants to retransmit a packet and the congestion control state is not Loss | count | host,container | proc fs |
| network | netstat_TcpExt_TCPFromZeroWindowAdv | The TCP receive window is set to no-zero value from zero | count | host,container | proc fs |
| network | netstat_TcpExt_TCPFullUndo | \- | count | host,container | proc fs |
| network | netstat_TcpExt_TCPHPAcks | If a packet set ACK flag and has no data, it is a pure ACK packet, if kernel handles it in the fast path, TcpExtTCPHPAcks will increase 1 | count | host,container | proc fs |
| network | netstat_TcpExt_TCPHPHits | If a TCP packet has data (which means it is not a pure ACK packet), and this packet is handled in the fast path, TcpExtTCPHPHits will increase 1 | count | host,container | proc fs |
| network | netstat_TcpExt_TCPHystartDelayCwnd | The sum of CWND detected by packet delay. Dividing this value by TcpExtTCPHystartDelayDetect is the average CWND which detected by the packet delay | count | host,container | proc fs |
| network | netstat_TcpExt_TCPHystartDelayDetect | How many times the packet delay threshold is detected | count | host,container | proc fs |
| network | netstat_TcpExt_TCPHystartTrainCwnd | The sum of CWND detected by ACK train length. Dividing this value by TcpExtTCPHystartTrainDetect is the average CWND which detected by the ACK train length | count | host,container | proc fs |
| network | netstat_TcpExt_TCPHystartTrainDetect | How many times the ACK train length threshold is detected | count | host,container | proc fs |
| network | netstat_TcpExt_TCPKeepAlive | This counter indicates many keepalive packets were sent. The keepalive wont be enabled by default. A userspace program could enable it by setting the SO_KEEPALIVE socket option | count | host,container | proc fs |
| network | netstat_TcpExt_TCPLossFailures | Number of connections that enter the TCP_CA_Loss phase and then undergo RTO timeout | count | host,container | proc fs |
| network | netstat_TcpExt_TCPLossProbeRecovery | A packet loss is detected and recovered by TLP | count | host,container | proc fs |
| network | netstat_TcpExt_TCPLossProbes | A TLP probe packet is sent | count | host,container | proc fs |
| network | netstat_TcpExt_TCPLossUndo | \- | count | host,container | proc fs |
| network | netstat_TcpExt_TCPLostRetransmit | A SACK points out that a retransmission packet is lost again | count | host,container | proc fs |
| network | netstat_TcpExt_TCPMD5Failure | \- | count | host,container | proc fs |
| network | netstat_TcpExt_TCPMD5NotFound | \- | count | host,container | proc fs |
| network | netstat_TcpExt_TCPMD5Unexpected | \- | count | host,container | proc fs |
| network | netstat_TcpExt_TCPMTUPFail | \- | count | host,container | proc fs |
| network | netstat_TcpExt_TCPMTUPSuccess | \- | count | host,container | proc fs |
| network | netstat_TcpExt_TCPMemoryPressures | \- | count | host,container | proc fs |
| network | netstat_TcpExt_TCPMemoryPressuresChrono | \- | count | host,container | proc fs |
| network | netstat_TcpExt_TCPMinTTLDrop | \- | count | host,container | proc fs |
| network | netstat_TcpExt_TCPOFODrop | The TCP layer receives an out of order packet but doesnt have enough memory, so drops it. Such packets wont be counted into TcpExtTCPOFOQueue | count | host,container | proc fs |
| network | netstat_TcpExt_TCPOFOMerge | The received out of order packet has an overlay with the previous packet. the overlay part will be dropped. All of TcpExtTCPOFOMerge packets will also be counted into TcpExtTCPOFOQueue | count | host,container | proc fs |
| network | netstat_TcpExt_TCPOFOQueue | The TCP layer receives an out of order packet and has enough memory to queue it | count | host,container | proc fs |
| network | netstat_TcpExt_TCPOrigDataSent | Number of outgoing packets with original data (excluding retransmission but including data-in-SYN). This counter is different from TcpOutSegs because TcpOutSegs also tracks pure ACKs. TCPOrigDataSent is more useful to track the TCP retransmission rate | count | host,container | proc fs |
| network | netstat_TcpExt_TCPPartialUndo | Detected some erroneous retransmits, a partial ACK arrived while were fast retransmitting, so able to partially undo some of our CWND reduction | count | host,container | proc fs |
| network | netstat_TcpExt_TCPPureAcks | If a packet set ACK flag and has no data, it is a pure ACK packet, if kernel handles it in the fast path, TcpExtTCPHPAcks will increase 1, if kernel handles it in the slow path, TcpExtTCPPureAcks will increase 1 | count | host,container | proc fs |
| network | netstat_TcpExt_TCPRcvCoalesce | When packets are received by the TCP layer and are not be read by the application, the TCP layer will try to merge them. This counter indicate how many packets are merged in such situation. If GRO is enabled, lots of packets would be merged by GRO, these packets wouldnt be counted to TcpExtTCPRcvCoalesce | count | host,container | proc fs |
| network | netstat_TcpExt_TCPRcvCollapsed | This counter indicates how many skbs are freed during collapse | count | host,container | proc fs |
| network | netstat_TcpExt_TCPRenoFailures | Number of failures that enter the TCP_CA_Disorder phase and then undergo RTO | count | host,container | proc fs |
| network | netstat_TcpExt_TCPRenoRecovery | When the congestion control comes into Recovery state, if sack is used, TcpExtTCPSackRecovery increases 1, if sack is not used, TcpExtTCPRenoRecovery increases 1. These two counters mean the TCP stack begins to retransmit the lost packets | count | host,container | proc fs |
| network | netstat_TcpExt_TCPRenoRecoveryFail | Number of connections that enter the Recovery phase and then undergo RTO | count | host,container | proc fs |
| network | netstat_TcpExt_TCPRenoReorder | The reorder packet is detected by fast recovery. It would only be used if SACK is disabled | count | host,container | proc fs |
| network | netstat_TcpExt_TCPReqQFullDoCookies | \- | count | host,container | proc fs |
| network | netstat_TcpExt_TCPReqQFullDrop | \- | count | host,container | proc fs |
| network | netstat_TcpExt_TCPRetransFail | The TCP stack tries to deliver a retransmission packet to lower layers but the lower layers return an error | count | host,container | proc fs |
| network | netstat_TcpExt_TCPSACKDiscard | This counter indicates how many SACK blocks are invalid. If the invalid SACK block is caused by ACK recording, the TCP stack will only ignore it and wont update this counter | count | host,container | proc fs |
| network | netstat_TcpExt_TCPSACKReneging | A packet was acknowledged by SACK, but the receiver has dropped this packet, so the sender needs to retransmit this packet | count | host,container | proc fs |
| network | netstat_TcpExt_TCPSACKReorder | The reorder packet detected by SACK | count | host,container | proc fs |
| network | netstat_TcpExt_TCPSYNChallenge | The number of challenge acks sent in response to SYN packets | count | host,container | proc fs |
| network | netstat_TcpExt_TCPSackFailures | Number of failures that enter the TCP_CA_Disorder phase and then undergo RTO | count | host,container | proc fs |
| network | netstat_TcpExt_TCPSackMerged | A skb is merged | count | host,container | proc fs |
| network | netstat_TcpExt_TCPSackRecovery | When the congestion control comes into Recovery state, if sack is used, TcpExtTCPSackRecovery increases 1, if sack is not used, TcpExtTCPRenoRecovery increases 1. These two counters mean the TCP stack begins to retransmit the lost packets | count | host,container | proc fs |
| network | netstat_TcpExt_TCPSackRecoveryFail | When the congestion control comes into Recovery state, if sack is used, TcpExtTCPSackRecovery increases 1 | count | host,container | proc fs |
| network | netstat_TcpExt_TCPSackShiftFallback | A skb should be shifted or merged, but the TCP stack doesnt do it for some reasons | count | host,container | proc fs |
| network | netstat_TcpExt_TCPSackShifted | A skb is shifted | count | host,container | proc fs |
| network | netstat_TcpExt_TCPSlowStartRetrans | The TCP stack wants to retransmit a packet and the congestion control state is Loss | count | host,container | proc fs |
| network | netstat_TcpExt_TCPSpuriousRTOs | The spurious retransmission timeout detected by the F-RTO algorithm | count | host,container | proc fs |
| network | netstat_TcpExt_TCPSpuriousRtxHostQueues | When the TCP stack wants to retransmit a packet, and finds that packet is not lost in the network, but the packet is not sent yet, the TCP stack would give up the retransmission and update this counter. It might happen if a packet stays too long time in a qdisc or driver queue | count | host,container | proc fs |
| network | netstat_TcpExt_TCPSynRetrans | Number of SYN and SYN/ACK retransmits to break down retransmissions into SYN, fast-retransmits, timeout retransmits, etc | count | host,container | proc fs |
| network | netstat_TcpExt_TCPTSReorder | The reorder packet is detected when a hole is filled | count | host,container | proc fs |
| network | netstat_TcpExt_TCPTimeWaitOverflow | Number of TIME_WAIT sockets unable to be allocated due to limit exceeding | count | host,container | proc fs |
| network | netstat_TcpExt_TCPTimeouts | TCP timeout events | count | host,container | proc fs |
| network | netstat_TcpExt_TCPToZeroWindowAdv | The TCP receive window is set to zero from a no-zero value | count | host,container | proc fs |
| network | netstat_TcpExt_TCPWantZeroWindowAdv | Depending on current memory usage, the TCP stack tries to set receive window to zero. But the receive window might still be a no-zero value | count | host,container | proc fs |
| network | netstat_TcpExt_TCPWinProbe | \- | count | host,container | proc fs |
| network | netstat_TcpExt_TCPWqueueTooBig | \- | count | host,container | proc fs |
| network | netstat_TcpExt_TW | TCP sockets finished time wait in fast timer | count | host,container | proc fs |
| network | netstat_TcpExt_TWKilled | TCP sockets finished time wait in slow timer | count | host,container | proc fs |
| network | netstat_TcpExt_TWRecycled | Time wait sockets recycled by time stamp | count | host,container | proc fs |
| network | netstat_Tcp_ActiveOpens | It means the TCP layer sends a SYN, and come into the SYN-SENT state. Every time TcpActiveOpens increases 1, TcpOutSegs should always increase 1 | count | host,container | proc fs |
| network | netstat_Tcp_AttemptFails | The number of times TCP connections have made a direct transition to the CLOSED state from either the SYN-SENT state or the SYN-RCVD state, plus the number of times TCP connections have made a direct transition to the LISTEN state from the SYN-RCVD state | count | host,container | proc fs |
| network | netstat_Tcp_CurrEstab | The number of TCP connections for which the current state is either ESTABLISHED or CLOSE-WAIT | count | host,container | proc fs |
| network | netstat_Tcp_EstabResets | The number of times TCP connections have made a direct transition to the CLOSED state from either the ESTABLISHED state or the CLOSE-WAIT state | count | host,container | proc fs |
| network | netstat_Tcp_InCsumErrors | Incremented when a TCP checksum failure is detected | count | host,container | proc fs |
| network | netstat_Tcp_InErrs | The total number of segments received in error (e.g., bad TCP checksums) | count | host,container | proc fs |
| network | netstat_Tcp_InSegs | The number of packets received by the TCP layer. As mentioned in RFC1213, it includes the packets received in error, such as checksum error, invalid TCP header and so on | count | host,container | proc fs |
| network | netstat_Tcp_MaxConn | The limit on the total number of TCP connections the entity can support. In entities where the maximum number of connections is dynamic, this object should contain the value -1 | count | host,container | proc fs |
| network | netstat_Tcp_OutRsts | The number of TCP segments sent containing the RST flag | count | host,container | proc fs |
| network | netstat_Tcp_OutSegs | The total number of segments sent, including those on current connections but excluding those containing only retransmitted octets | count | host,container | proc fs |
| network | netstat_Tcp_PassiveOpens | The number of times TCP connections have made a direct transition to the SYN-RCVD state from the LISTEN state | count | host,container | proc fs |
| network | netstat_Tcp_RetransSegs | The total number of segments retransmitted - that is, the number of TCP segments transmitted containing one or more previously transmitted octets | count | host,container | proc fs |
| network | netstat_Tcp_RtoAlgorithm | The algorithm used to determine the timeout value used for retransmitting unacknowledged octets | count | host,container | proc fs |
| network | netstat_Tcp_RtoMax | The maximum value permitted by a TCP implementation for the retransmission timeout, measured in milliseconds. More refined semantics for objects of this type depend upon the algorithm used to determine the retransmission timeout | count | host,container | proc fs |
| network | netstat_Tcp_RtoMin | The minimum value permitted by a TCP implementation for the retransmission timeout, measured in milliseconds. More refined semantics for objects of this type depend upon the algorithm used to determine the retransmission timeout | count | host,container | proc fs |
| network | sockstat_FRAG_inuse | \- | count | host,container | proc fs |
| network | sockstat_FRAG_memory | \- | pages | host,container | proc fs |
| network | sockstat_RAW_inuse | Number of RAW socket used | count | host,container | proc fs |
| network | sockstat_TCP_alloc | The number of TCP sockets that have been allocated | count | host,container | proc fs |
| network | sockstat_TCP_inuse | Established TCP socket number | count | host,container | proc fs |
| network | sockstat_TCP_mem | The total size of TCP memory used by the system | pages | system | proc fs |
| network | sockstat_TCP_mem_bytes | The total size of TCP memory used by the system | bytes | system | sockstat_TCP_mem \* page_size |
| network | sockstat_TCP_orphan | Number of TCP connections waiting to be closed | count | host,container | proc fs |
| network | sockstat_TCP_tw | Number of TCP sockets to be terminated | count | host,container | proc fs |
| network | sockstat_UDPLITE_inuse | \- | count | host,container | proc fs |
| network | sockstat_UDP_inuse | Number of UDP socket used | count | host,container | proc fs |
| network | sockstat_UDP_mem | The total size of udp memory used by the system | pages | system | proc fs |
| network | sockstat_UDP_mem_bytes | The total number of bytes of udp memory used by the system | bytes | system | sockstat_UDP_mem \* page_size |
| network | sockstat_sockets_used | The number of sockets used by the system | count | system | proc fs |

193
go.mod Normal file
View File

@ -0,0 +1,193 @@
module huatuo-bamai
go 1.22.4
require (
git.xiaojukeji.com/kernel/huatuo v1.3.0
github.com/cilium/ebpf v0.16.0
github.com/containerd/cgroups/v3 v3.0.3
github.com/deckarep/golang-set v1.8.0
github.com/docker/docker v27.2.0+incompatible
github.com/elastic/go-elasticsearch/v7 v7.17.10
github.com/ema/qdisc v1.0.0
github.com/gin-contrib/pprof v1.5.1
github.com/gin-gonic/gin v1.10.0
github.com/go-playground/validator/v10 v10.22.1
github.com/google/cadvisor v0.50.0
github.com/gopacket/gopacket v1.2.0
github.com/grafana/grafana-plugin-sdk-go v0.251.0
github.com/grafana/pyroscope v1.7.1
github.com/grafana/pyroscope/api v0.4.0
github.com/jsimonetti/rtnetlink v1.4.2
github.com/mdlayher/netlink v1.7.2
github.com/opencontainers/runtime-spec v1.2.0
github.com/pelletier/go-toml v1.9.5
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.20.3
github.com/prometheus/procfs v0.15.1
github.com/safchain/ethtool v0.4.1
github.com/shirou/gopsutil v2.21.11+incompatible
github.com/sirupsen/logrus v1.9.3
github.com/tidwall/gjson v1.14.2
github.com/urfave/cli/v2 v2.27.4
github.com/vishvananda/netlink v1.3.0
github.com/vishvananda/netns v0.0.4
golang.org/x/net v0.31.0
golang.org/x/sys v0.27.0
golang.org/x/time v0.6.0
gopkg.in/natefinch/lumberjack.v2 v2.2.1
k8s.io/api v0.31.3
k8s.io/cri-client v0.31.3
)
require (
connectrpc.com/connect v1.16.2 // indirect
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect
github.com/Microsoft/go-winio v0.6.2 // indirect
github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9 // indirect
github.com/apache/arrow/go/v15 v15.0.2 // indirect
github.com/armon/go-metrics v0.4.1 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/blang/semver/v4 v4.0.0 // indirect
github.com/bytedance/sonic v1.12.4 // indirect
github.com/bytedance/sonic/loader v0.2.1 // indirect
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/cheekybits/genny v1.0.0 // indirect
github.com/cloudwego/base64x v0.1.4 // indirect
github.com/cloudwego/iasm v0.2.0 // indirect
github.com/coreos/go-semver v0.3.0 // indirect
github.com/coreos/go-systemd/v22 v22.5.0 // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
github.com/dennwc/varint v1.0.0 // indirect
github.com/distribution/reference v0.6.0 // indirect
github.com/docker/go-connections v0.5.0 // indirect
github.com/docker/go-units v0.5.0 // indirect
github.com/dustin/go-humanize v1.0.1 // indirect
github.com/fatih/color v1.15.0 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fxamacker/cbor/v2 v2.7.0 // indirect
github.com/gabriel-vasile/mimetype v1.4.6 // indirect
github.com/gin-contrib/sse v0.1.0 // indirect
github.com/go-kit/log v0.2.1 // indirect
github.com/go-logfmt/logfmt v0.6.0 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-ole/go-ole v1.2.6 // indirect
github.com/go-playground/locales v0.14.1 // indirect
github.com/go-playground/universal-translator v0.18.1 // indirect
github.com/goccy/go-json v0.10.3 // indirect
github.com/godbus/dbus/v5 v5.0.6 // indirect
github.com/gogo/googleapis v1.4.1 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/gogo/status v1.1.1 // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/golang/snappy v0.0.4 // indirect
github.com/google/btree v1.1.2 // indirect
github.com/google/flatbuffers v23.5.26+incompatible // indirect
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/gorilla/mux v1.8.1 // indirect
github.com/grafana/dskit v0.0.0-20231221015914-de83901bf4d6 // indirect
github.com/grafana/regexp v0.0.0-20221123153739-15dc172cd2db // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect
github.com/hashicorp/consul/api v1.28.2 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
github.com/hashicorp/go-hclog v1.6.3 // indirect
github.com/hashicorp/go-immutable-radix v1.3.1 // indirect
github.com/hashicorp/go-msgpack v1.1.5 // indirect
github.com/hashicorp/go-multierror v1.1.1 // indirect
github.com/hashicorp/go-rootcerts v1.0.2 // indirect
github.com/hashicorp/go-sockaddr v1.0.6 // indirect
github.com/hashicorp/golang-lru v0.6.0 // indirect
github.com/hashicorp/memberlist v0.5.0 // indirect
github.com/hashicorp/serf v0.10.1 // indirect
github.com/josharian/native v1.1.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/compress v1.17.9 // indirect
github.com/klauspost/cpuid/v2 v2.2.9 // indirect
github.com/leodido/go-urn v1.4.0 // indirect
github.com/mattetti/filebuffer v1.0.1 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mattn/go-runewidth v0.0.14 // indirect
github.com/mdlayher/socket v0.4.1 // indirect
github.com/miekg/dns v1.1.58 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/moby/docker-image-spec v1.3.1 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/olekukonko/tablewriter v0.0.5 // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/image-spec v1.1.0 // indirect
github.com/opentracing-contrib/go-stdlib v1.0.0 // indirect
github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b // indirect
github.com/pelletier/go-toml/v2 v2.2.3 // indirect
github.com/pierrec/lz4/v4 v4.1.18 // indirect
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.55.0 // indirect
github.com/prometheus/prometheus v0.51.2 // indirect
github.com/rivo/uniseg v0.4.3 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/samber/lo v1.38.1 // indirect
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.0 // indirect
github.com/tklauser/go-sysconf v0.3.12 // indirect
github.com/tklauser/numcpus v0.6.1 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/uber/jaeger-client-go v2.30.0+incompatible // indirect
github.com/uber/jaeger-lib v2.4.1+incompatible // indirect
github.com/ugorji/go/codec v1.2.12 // indirect
github.com/valyala/bytebufferpool v1.0.0 // indirect
github.com/x448/float16 v0.8.4 // indirect
github.com/xlab/treeprint v1.2.0 // indirect
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
github.com/yusufpapurcu/wmi v1.2.4 // indirect
github.com/zeebo/xxh3 v1.0.2 // indirect
go.etcd.io/etcd/api/v3 v3.5.7 // indirect
go.etcd.io/etcd/client/pkg/v3 v3.5.7 // indirect
go.etcd.io/etcd/client/v3 v3.5.7 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.53.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 // indirect
go.opentelemetry.io/otel v1.29.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.28.0 // indirect
go.opentelemetry.io/otel/metric v1.29.0 // indirect
go.opentelemetry.io/otel/sdk v1.29.0 // indirect
go.opentelemetry.io/otel/trace v1.29.0 // indirect
go.opentelemetry.io/proto/otlp v1.3.1 // indirect
go.uber.org/atomic v1.11.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
go.uber.org/zap v1.26.0 // indirect
golang.org/x/arch v0.12.0 // indirect
golang.org/x/crypto v0.29.0 // indirect
golang.org/x/exp v0.0.0-20240823005443-9b4947da3948 // indirect
golang.org/x/mod v0.20.0 // indirect
golang.org/x/oauth2 v0.23.0 // indirect
golang.org/x/sync v0.9.0 // indirect
golang.org/x/text v0.20.0 // indirect
golang.org/x/tools v0.24.0 // indirect
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20240822170219-fc7c04adadcd // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240814211410-ddb44dafa142 // indirect
google.golang.org/grpc v1.66.0 // indirect
google.golang.org/protobuf v1.35.2 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/apimachinery v0.31.3 // indirect
k8s.io/client-go v0.31.3 // indirect
k8s.io/component-base v0.31.3 // indirect
k8s.io/cri-api v0.31.3 // indirect
k8s.io/klog/v2 v2.130.1 // indirect
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
)

691
go.sum Normal file
View File

@ -0,0 +1,691 @@
connectrpc.com/connect v1.16.2 h1:ybd6y+ls7GOlb7Bh5C8+ghA6SvCBajHwxssO2CGFjqE=
connectrpc.com/connect v1.16.2/go.mod h1:n2kgwskMHXC+lVqb18wngEpF95ldBHXjZYJussz5FRc=
git.xiaojukeji.com/kernel/huatuo v1.3.0 h1:dCtjHnQg+2b2SEhXi3AuEWbdH3sC0j70xaDCvuOdFGs=
git.xiaojukeji.com/kernel/huatuo v1.3.0/go.mod h1:oMnjctv7Dp754Vz1cZm5/k/8Eke0I2DijQNzBul3bTc=
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0=
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8=
github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=
github.com/HdrHistogram/hdrhistogram-go v1.1.2 h1:5IcZpTvzydCQeHzK4Ef/D5rrSqwxob0t8PQPMybUNFM=
github.com/HdrHistogram/hdrhistogram-go v1.1.2/go.mod h1:yDgFjdqOqDEKOvasDdhWNXYg9BVp4O+o5f6V/ehm6Oo=
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9 h1:ez/4by2iGztzR4L0zgAOR8lTQK9VlyBVVd7G4omaOQs=
github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9/go.mod h1:OMCwj8VM1Kc9e19TLln2VL61YJF0x1XFtfdL4JdbSyE=
github.com/apache/arrow/go/v15 v15.0.2 h1:60IliRbiyTWCWjERBCkO1W4Qun9svcYoZrSLcyOsMLE=
github.com/apache/arrow/go/v15 v15.0.2/go.mod h1:DGXsR3ajT524njufqf95822i+KTh+yea1jass9YXgjA=
github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY=
github.com/armon/go-metrics v0.4.1 h1:hR91U9KYmb6bLBYLQjyM+3j+rcd/UhE+G78SFnF8gJA=
github.com/armon/go-metrics v0.4.1/go.mod h1:E6amYzXo6aW1tqzoZGT755KkbgrJsSdpwZ+3JqfkOG4=
github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
github.com/bytedance/sonic v1.12.4 h1:9Csb3c9ZJhfUWeMtpCDCq6BUoH5ogfDFLUgQ/jG+R0k=
github.com/bytedance/sonic v1.12.4/go.mod h1:B8Gt/XvtZ3Fqj+iSKMypzymZxw/FVwgIGKzMzT9r/rk=
github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
github.com/bytedance/sonic/loader v0.2.1 h1:1GgorWTqf12TA8mma4DDSbaQigE2wOgQo7iCjjJv3+E=
github.com/bytedance/sonic/loader v0.2.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cheekybits/genny v1.0.0 h1:uGGa4nei+j20rOSeDeP5Of12XVm7TGUd4dJA9RDitfE=
github.com/cheekybits/genny v1.0.0/go.mod h1:+tQajlRqAUrPI7DOSpB0XAqZYtQakVtB7wXkRAgjxjQ=
github.com/chromedp/cdproto v0.0.0-20220208224320-6efb837e6bc2 h1:XCdvHbz3LhewBHN7+mQPx0sg/Hxil/1USnBmxkjHcmY=
github.com/chromedp/cdproto v0.0.0-20220208224320-6efb837e6bc2/go.mod h1:At5TxYYdxkbQL0TSefRjhLE3Q0lgvqKKMSFUglJ7i1U=
github.com/cilium/ebpf v0.16.0 h1:+BiEnHL6Z7lXnlGUsXQPPAE7+kenAd4ES8MQ5min0Ok=
github.com/cilium/ebpf v0.16.0/go.mod h1:L7u2Blt2jMM/vLAVgjxluxtBKlz3/GWjB0dMOEngfwE=
github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag=
github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I=
github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
github.com/containerd/cgroups/v3 v3.0.3 h1:S5ByHZ/h9PMe5IOQoN7E+nMc2UcLEM/V48DGDJ9kip0=
github.com/containerd/cgroups/v3 v3.0.3/go.mod h1:8HBe7V3aWGLFPd/k03swSIsGjZhHI2WzJmticMgVuz0=
github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=
github.com/coreos/go-semver v0.3.0 h1:wkHLiw0WNATZnSG7epLsujiMCgPAc9xhjJ4tgnAxmfM=
github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs=
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4=
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/deckarep/golang-set v1.8.0 h1:sk9/l/KqpunDwP7pSjUg0keiOOLEnOBHzykLrsPppp4=
github.com/deckarep/golang-set v1.8.0/go.mod h1:5nI87KwE7wgsBU1F4GKAw2Qod7p5kyS383rP6+o6qqo=
github.com/dennwc/varint v1.0.0 h1:kGNFFSSw8ToIy3obO/kKr8U9GZYUAxQEVuix4zfDWzE=
github.com/dennwc/varint v1.0.0/go.mod h1:hnItb35rvZvJrbTALZtY/iQfDs48JKRG1RPpgziApxA=
github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
github.com/docker/docker v27.2.0+incompatible h1:Rk9nIVdfH3+Vz4cyI/uhbINhEZ/oLmc+CBXmH6fbNk4=
github.com/docker/docker v27.2.0+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c=
github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc=
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/elastic/go-elasticsearch/v7 v7.17.10 h1:TCQ8i4PmIJuBunvBS6bwT2ybzVFxxUhhltAs3Gyu1yo=
github.com/elastic/go-elasticsearch/v7 v7.17.10/go.mod h1:OJ4wdbtDNk5g503kvlHLyErCgQwwzmDtaFC4XyOxXA4=
github.com/elazarl/goproxy v0.0.0-20230731152917-f99041a5c027 h1:1L0aalTpPz7YlMxETKpmQoWMBkeiuorElZIXoNmgiPE=
github.com/elazarl/goproxy v0.0.0-20230731152917-f99041a5c027/go.mod h1:Ro8st/ElPeALwNFlcTpWmkr6IoMFfkjXAvTHpevnDsM=
github.com/ema/qdisc v1.0.0 h1:EHLG08FVRbWLg8uRICa3xzC9Zm0m7HyMHfXobWFnXYg=
github.com/ema/qdisc v1.0.0/go.mod h1:FhIc0fLYi7f+lK5maMsesDqwYojIOh3VfRs8EVd5YJQ=
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU=
github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk=
github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs=
github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw=
github.com/felixge/fgprof v0.9.4-0.20221116204635-ececf7638e93 h1:S8ZdFFDRXUKs3fHpMDPVh9oWd46hKqEEt/X3oxhtF5Q=
github.com/felixge/fgprof v0.9.4-0.20221116204635-ececf7638e93/go.mod h1:RdbpDgzqYVh/T9fPELJyV7EYJuHB55UTEULNun8eiPw=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E=
github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ=
github.com/gabriel-vasile/mimetype v1.4.6 h1:3+PzJTKLkvgjeTbts6msPJt4DixhT4YtFNf1gtGe3zc=
github.com/gabriel-vasile/mimetype v1.4.6/go.mod h1:JX1qVKqZd40hUPpAfiNTe0Sne7hdfKSbOqqmkq8GCXc=
github.com/getkin/kin-openapi v0.124.0 h1:VSFNMB9C9rTKBnQ/fpyDU8ytMTr4dWI9QovSKj9kz/M=
github.com/getkin/kin-openapi v0.124.0/go.mod h1:wb1aSZA/iWmorQP9KTAS/phLj/t17B5jT7+fS8ed9NM=
github.com/gin-contrib/pprof v1.5.1 h1:Mzy+3HHtHbtwr4VewBTXZp/hR7pS6ZuZkueBIrQiLL4=
github.com/gin-contrib/pprof v1.5.1/go.mod h1:uwzoF6FxdzJJGyMdcZB+VSuVjOBe1kSH+KMIvKGwvCQ=
github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
github.com/go-kit/log v0.2.1 h1:MRVx0/zhvdseW+Gza6N9rVzU/IVzaeE1SFI4raAhmBU=
github.com/go-kit/log v0.2.1/go.mod h1:NwTd00d/i8cPZ3xOwwiv2PO5MOcx78fFErGNcVmBjv0=
github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
github.com/go-logfmt/logfmt v0.6.0 h1:wGYYu3uicYdqXVgoYbvnkrPVXkuLM1p1ifugDMEdRi4=
github.com/go-logfmt/logfmt v0.6.0/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/go-openapi/jsonpointer v0.20.2 h1:mQc3nmndL8ZBzStEo3JYF8wzmeWffDH4VbXz58sAx6Q=
github.com/go-openapi/jsonpointer v0.20.2/go.mod h1:bHen+N0u1KEO3YlmqOjTT9Adn1RfD91Ar825/PuiRVs=
github.com/go-openapi/swag v0.22.9 h1:XX2DssF+mQKM2DHsbgZK74y/zj4mo9I99+89xUmuZCE=
github.com/go-openapi/swag v0.22.9/go.mod h1:3/OXnFfnMAwBD099SwYRk7GD3xOrr1iL7d/XNLXVVwE=
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
github.com/go-playground/validator/v10 v10.22.1 h1:40JcKH+bBNGFczGuoBYgX4I6m/i27HYW8P9FDk5PbgA=
github.com/go-playground/validator/v10 v10.22.1/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
github.com/go-quicktest/qt v1.101.0 h1:O1K29Txy5P2OK0dGo59b7b0LR6wKfIhttaAhHUyn7eI=
github.com/go-quicktest/qt v1.101.0/go.mod h1:14Bz/f7NwaXPtdYEgzsx46kqSxVwTbzVZsDC26tQJow=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA=
github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/godbus/dbus/v5 v5.0.6 h1:mkgN1ofwASrYnJ5W6U/BxG15eXXXjirgZc7CLqkcaro=
github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/gogo/googleapis v0.0.0-20180223154316-0cd9801be74a/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s=
github.com/gogo/googleapis v1.4.1 h1:1Yx4Myt7BxzvUr5ldGSbwYiZG6t9wGBZ+8/fX3Wvtq0=
github.com/gogo/googleapis v1.4.1/go.mod h1:2lpHqI5OcWCtVElxXnPt+s8oJvMpySlOyM6xDCrzib4=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/gogo/status v1.1.1 h1:DuHXlSFHNKqTQ+/ACf5Vs6r4X/dH2EgIzR9Vr+H65kg=
github.com/gogo/status v1.1.1/go.mod h1:jpG3dM5QPcqu19Hg8lkUhBFBa3TcLs1DG7+2Jqci7oU=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v1.1.2 h1:xf4v41cLI2Z6FxbKm+8Bu+m8ifhj15JuZ9sa0jZCMUU=
github.com/google/btree v1.1.2/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4=
github.com/google/cadvisor v0.50.0 h1:7w/hKIbJKBWqQsRTy+Hpj2vj+fnxrLXcEXFy+LW0Bsg=
github.com/google/cadvisor v0.50.0/go.mod h1:VxCDwZalpFyENvmfabFqaIGsqNKLtDzE62a19rfVTB8=
github.com/google/flatbuffers v23.5.26+incompatible h1:M9dgRyhJemaM4Sw8+66GHBu8ioaQmyPLg1b8VwK5WJg=
github.com/google/flatbuffers v23.5.26+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af h1:kmjWCqn2qkEml422C2Rrd27c3VGxi6a/6HNq8QmHRKM=
github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gopacket/gopacket v1.2.0 h1:eXbzFad7f73P1n2EJHQlsKuvIMJjVXK5tXoSca78I3A=
github.com/gopacket/gopacket v1.2.0/go.mod h1:BrAKEy5EOGQ76LSqh7DMAr7z0NNPdczWm2GxCG7+I8M=
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
github.com/grafana/dskit v0.0.0-20231221015914-de83901bf4d6 h1:Z78JZ7pa6InQ5BcMB27M+NMTZ7LV+MXgOd3dZPfEdG4=
github.com/grafana/dskit v0.0.0-20231221015914-de83901bf4d6/go.mod h1:kkWM4WUV230bNG3urVRWPBnSJHs64y/0RmWjftnnn0c=
github.com/grafana/grafana-plugin-sdk-go v0.251.0 h1:gnOtxrC/1rqFvpSbQYyoZqkr47oWDlz4Q2L6Ozmsi3w=
github.com/grafana/grafana-plugin-sdk-go v0.251.0/go.mod h1:gCGN9kHY3KeX4qyni3+Kead38Q+85pYOrsDcxZp6AIk=
github.com/grafana/otel-profiling-go v0.5.1 h1:stVPKAFZSa7eGiqbYuG25VcqYksR6iWvF3YH66t4qL8=
github.com/grafana/otel-profiling-go v0.5.1/go.mod h1:ftN/t5A/4gQI19/8MoWurBEtC6gFw8Dns1sJZ9W4Tls=
github.com/grafana/pyroscope v1.7.1 h1:aGXOVNwUGXK3dNSpc40/IOtOG/ACvaS2C/mJ7jUxMFg=
github.com/grafana/pyroscope v1.7.1/go.mod h1:RuSiNg8N9iufpHbScIFU4kU4LbWHaU7G1knyVDw/V5s=
github.com/grafana/pyroscope-go v1.0.3 h1:8WWmItzLfg4m8G+j//ElSjMeMr88Y6Lvblar6qeTyKk=
github.com/grafana/pyroscope-go/godeltaprof v0.1.8 h1:iwOtYXeeVSAeYefJNaxDytgjKtUuKQbJqgAIjlnicKg=
github.com/grafana/pyroscope-go/godeltaprof v0.1.8/go.mod h1:2+l7K7twW49Ct4wFluZD3tZ6e0SjanjcUUBPVD/UuGU=
github.com/grafana/pyroscope/api v0.4.0 h1:J86DxoNeLOvtJhB1Cn65JMZkXe682D+RqeoIUiYc/eo=
github.com/grafana/pyroscope/api v0.4.0/go.mod h1:MFnZNeUM4RDsDOnbgKW3GWoLSBpLzMMT9nkvhHHo81o=
github.com/grafana/regexp v0.0.0-20221123153739-15dc172cd2db h1:7aN5cccjIqCLTzedH7MZzRZt5/lsAHch6Z3L2ZGn5FA=
github.com/grafana/regexp v0.0.0-20221123153739-15dc172cd2db/go.mod h1:M5qHK+eWfAv8VR/265dIuEpL3fNfeC21tXXp9itM24A=
github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1 h1:qnpSQwGEnkcRpTqNOIR6bJbR0gAorgP9CSALpRcKoAA=
github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1/go.mod h1:lXGCsh6c22WGtjr+qGHj1otzZpV/1kwTMAqkwZsnWRU=
github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.1.0 h1:pRhl55Yx1eC7BZ1N+BBWwnKaMyD8uC+34TLdndZMAKk=
github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.1.0/go.mod h1:XKMd7iuf/RGPSMJ/U4HP0zS2Z9Fh8Ps9a+6X26m/tmI=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k=
github.com/hashicorp/consul/api v1.28.2 h1:mXfkRHrpHN4YY3RqL09nXU1eHKLNiuAN4kHvDQ16k/8=
github.com/hashicorp/consul/api v1.28.2/go.mod h1:KyzqzgMEya+IZPcD65YFoOVAgPpbfERu4I/tzG6/ueE=
github.com/hashicorp/consul/sdk v0.16.0 h1:SE9m0W6DEfgIVCJX7xU+iv/hUl4m/nxqMTnCdMxDpJ8=
github.com/hashicorp/consul/sdk v0.16.0/go.mod h1:7pxqqhqoaPqnBnzXD1StKed62LqJeClzVsUEy85Zr0A=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ=
github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k=
github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M=
github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=
github.com/hashicorp/go-immutable-radix v1.3.1 h1:DKHmCUm2hRBK510BaiZlwvpD40f8bJFeZnpfm2KLowc=
github.com/hashicorp/go-immutable-radix v1.3.1/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=
github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM=
github.com/hashicorp/go-msgpack v1.1.5 h1:9byZdVjKTe5mce63pRVNP1L7UAmdHOTEMGehn6KvJWs=
github.com/hashicorp/go-msgpack v1.1.5/go.mod h1:gWVc3sv/wbDmR3rQsj1CAktEZzoz1YNK9NfGLXJ69/4=
github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk=
github.com/hashicorp/go-multierror v1.1.0/go.mod h1:spPvp8C1qA32ftKqdAHm4hHTbPw+vmowP0z+KUhOZdA=
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
github.com/hashicorp/go-plugin v1.6.1 h1:P7MR2UP6gNKGPp+y7EZw2kOiq4IR9WiqLvp0XOsVdwI=
github.com/hashicorp/go-plugin v1.6.1/go.mod h1:XPHFku2tFo3o3QKFgSYo+cghcUhw1NA1hZyMK0PWAw0=
github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs=
github.com/hashicorp/go-rootcerts v1.0.2 h1:jzhAVGtqPKbwpyCPELlgNWhE1znq+qwJtW5Oi2viEzc=
github.com/hashicorp/go-rootcerts v1.0.2/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8=
github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU=
github.com/hashicorp/go-sockaddr v1.0.6 h1:RSG8rKU28VTUTvEKghe5gIhIQpv8evvNpnDEyqO4u9I=
github.com/hashicorp/go-sockaddr v1.0.6/go.mod h1:uoUUmtwU7n9Dv3O4SNLeFvg0SxQ3lyjsj6+CCykpaxI=
github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4=
github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/go-uuid v1.0.3 h1:2gKiV6YVmrJ1i2CKKa9obLvRieoRGviZFL26PcT/Co8=
github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/go-version v1.6.0 h1:feTTfFNnjP967rlCxM/I9g701jU+RN74YKx2mOkIeek=
github.com/hashicorp/go-version v1.6.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.6.0 h1:uL2shRDx7RTrOrTCUZEGP/wJUFiUI8QT6E7z5o8jga4=
github.com/hashicorp/golang-lru v0.6.0/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64=
github.com/hashicorp/mdns v1.0.4/go.mod h1:mtBihi+LeNXGtG8L9dX59gAEa12BDtBQSp4v/YAJqrc=
github.com/hashicorp/memberlist v0.5.0 h1:EtYPN8DpAURiapus508I4n9CzHs2W+8NZGbmmR/prTM=
github.com/hashicorp/memberlist v0.5.0/go.mod h1:yvyXLpo0QaGE59Y7hDTsTzDD25JYBZ4mHgHUZ8lrOI0=
github.com/hashicorp/serf v0.10.1 h1:Z1H2J60yRKvfDYAOZLd2MU0ND4AH/WDz7xYHDWQsIPY=
github.com/hashicorp/serf v0.10.1/go.mod h1:yL2t6BqATOLGc5HF7qbFkTfXoPIY0WZdWHfEvMqbG+4=
github.com/hashicorp/yamux v0.1.1 h1:yrQxtgseBDrq9Y652vSRDvsKCJKOUD+GzTS4Y0Y8pvE=
github.com/hashicorp/yamux v0.1.1/go.mod h1:CtWFDAQgb7dxtzFs4tWbplKIe2jSi3+5vKbgIO0SLnQ=
github.com/invopop/yaml v0.2.0 h1:7zky/qH+O0DwAyoobXUqvVBwgBFRxKoQ/3FjcVpjTMY=
github.com/invopop/yaml v0.2.0/go.mod h1:2XuRLgs/ouIrW3XNzuNj7J3Nvu/Dig5MXvbCEdiBN3Q=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/josharian/native v1.1.0 h1:uuaP0hAbW7Y4l0ZRQ6C9zfb7Mg1mbFKry/xzDAfmtLA=
github.com/josharian/native v1.1.0/go.mod h1:7X/raswPFr05uY3HiLlYeyQntB6OO7E/d2Cu7qoaN2w=
github.com/jsimonetti/rtnetlink v1.4.2 h1:Df9w9TZ3npHTyDn0Ev9e1uzmN2odmXd0QX+J5GTEn90=
github.com/jsimonetti/rtnetlink v1.4.2/go.mod h1:92s6LJdE+1iOrw+F2/RO7LYI2Qd8pPpFNNUYW06gcoM=
github.com/jsimonetti/rtnetlink/v2 v2.0.1 h1:xda7qaHDSVOsADNouv7ukSuicKZO7GgVUCXxpaIEIlM=
github.com/jsimonetti/rtnetlink/v2 v2.0.1/go.mod h1:7MoNYNbb3UaDHtF8udiJo/RH6VsTKP1pqKLUTVCvToE=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.2.9 h1:66ze0taIn2H33fBvCkXuv9BmCwDfafmiIVpKV9kKGuY=
github.com/klauspost/cpuid/v2 v2.2.9/go.mod h1:rqkxqrZ1EhYM9G+hXH7YdowN5R5RGN6NK4QwQ3WMXF8=
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
github.com/magefile/mage v1.15.0 h1:BvGheCMAsG3bWUDbZ8AyXXpCNwU9u5CB6sM+HNb9HYg=
github.com/magefile/mage v1.15.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/mattetti/filebuffer v1.0.1 h1:gG7pyfnSIZCxdoKq+cPa8T0hhYtD9NxCdI4D7PTjRLM=
github.com/mattetti/filebuffer v1.0.1/go.mod h1:YdMURNDOttIiruleeVr6f56OrMc+MydEnTcXwtkxNVs=
github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE=
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/mdlayher/netlink v1.7.2 h1:/UtM3ofJap7Vl4QWCPDGXY8d3GIY2UGSDbK+QWmY8/g=
github.com/mdlayher/netlink v1.7.2/go.mod h1:xraEF7uJbxLhc5fpHL4cPe221LI2bdttWlU+ZGLfQSw=
github.com/mdlayher/socket v0.4.1 h1:eM9y2/jlbs1M615oshPQOHZzj6R6wMT7bX5NPiQvn2U=
github.com/mdlayher/socket v0.4.1/go.mod h1:cAqeGjoufqdxWkD7DkpyS+wcefOtmu5OQ8KuoJGIReA=
github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso=
github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI=
github.com/miekg/dns v1.1.58 h1:ca2Hdkz+cDg/7eNF6V56jjzuZ4aCAE+DbVkILdQWG/4=
github.com/miekg/dns v1.1.58/go.mod h1:Ypv+3b/KadlvW9vJfXOTf300O4UqaHFzFCuHz+rPkBY=
github.com/mitchellh/cli v1.1.0/go.mod h1:xcISNoH86gajksDmfB23e/pu+B+GeFRMYmoHXxx3xhI=
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/mitchellh/go-testing-interface v1.14.1 h1:jrgshOhYAUVNMAJiKbEu7EqAwgJJ2JqpQmpLJOu07cU=
github.com/mitchellh/go-testing-interface v1.14.1/go.mod h1:gfgS7OtZj6MA4U1UrDRp04twqAjfvlZyCfX3sDjEym8=
github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 h1:RWengNIwukTxcDr9M+97sNutRR1RKhG96O6jWumTTnw=
github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826/go.mod h1:TaXosZuwdSHYgviHp1DAtfrULt5eUgsSMsZf+YrPgl8=
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/oklog/run v1.1.0 h1:GEenZ1cK0+q0+wsJew9qUg/DyD8k3JzYsZAi5gYi2mA=
github.com/oklog/run v1.1.0/go.mod h1:sVPdnTZT1zYwAJeCMu2Th4T21pA3FPOQRfWjQlk7DVU=
github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA=
github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To=
github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk=
github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0=
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug=
github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM=
github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk=
github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opentracing-contrib/go-stdlib v1.0.0 h1:TBS7YuVotp8myLon4Pv7BtCBzOTo1DeZCld0Z63mW2w=
github.com/opentracing-contrib/go-stdlib v1.0.0/go.mod h1:qtI1ogk+2JhVPIXVc6q+NHziSmy2W5GbdQZFUHADCBU=
github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b h1:FfH+VrHHk6Lxt9HdVS0PXzSXFyS2NbZKXv33FYPol0A=
github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b/go.mod h1:AC62GU6hc0BrNm+9RK9VSiwa/EUe1bkIeFORAMcHvJU=
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY=
github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8=
github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNHvL12M=
github.com/pelletier/go-toml/v2 v2.2.3/go.mod h1:MfCQTFTvCcUyyvvwm1+G6H/jORL20Xlb6rzQu9GuUkc=
github.com/perimeterx/marshmallow v1.1.5 h1:a2LALqQ1BlHM8PZblsDdidgv1mWi1DgC2UmX50IvK2s=
github.com/perimeterx/marshmallow v1.1.5/go.mod h1:dsXbUu8CRzfYP5a87xpp0xq9S3u0Vchtcl8we9tYaXw=
github.com/pierrec/lz4/v4 v4.1.18 h1:xaKrnTkyoqfh1YItXl56+6KJNVYWlEEPuAQW9xsplYQ=
github.com/pierrec/lz4/v4 v4.1.18/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo=
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI=
github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSgv7Sy7s/s=
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU=
github.com/prometheus/client_golang v1.20.3 h1:oPksm4K8B+Vt35tUhw6GbSNSgVlVSBH0qELP/7u83l4=
github.com/prometheus/client_golang v1.20.3/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4=
github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc=
github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8=
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A=
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
github.com/prometheus/prometheus v0.51.2 h1:U0faf1nT4CB9DkBW87XLJCBi2s8nwWXdTbyzRUAkX0w=
github.com/prometheus/prometheus v0.51.2/go.mod h1:yv4MwOn3yHMQ6MZGHPg/U7Fcyqf+rxqiZfSur6myVtc=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.4.3 h1:utMvzDsuh3suAEnhH0RdHmoPbU648o6CvXxTx4SBMOw=
github.com/rivo/uniseg v0.4.3/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
github.com/safchain/ethtool v0.4.1 h1:S6mEleTADqgynileXoiapt/nKnatyR6bmIHoF+h2ADo=
github.com/safchain/ethtool v0.4.1/go.mod h1:XLLnZmy4OCRTkksP/UiMjij96YmIsBfmBQcs7H6tA48=
github.com/samber/lo v1.38.1 h1:j2XEAqXKb09Am4ebOg31SpvzUTTs6EN3VfgeLUhPdXM=
github.com/samber/lo v1.38.1/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA=
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I=
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
github.com/shirou/gopsutil v2.21.11+incompatible h1:lOGOyCG67a5dv2hq5Z1BLDUqqKp3HkbjPcz5j6XMS0U=
github.com/shirou/gopsutil v2.21.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/tidwall/gjson v1.14.2 h1:6BBkirS0rAHjumnjHF6qgy5d2YAJ1TLIaFE2lzfOLqo=
github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU=
github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI=
github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk=
github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY=
github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM=
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
github.com/uber/jaeger-client-go v2.30.0+incompatible h1:D6wyKGCecFaSRUpo8lCVbaOOb6ThwMmTEbhRwtKR97o=
github.com/uber/jaeger-client-go v2.30.0+incompatible/go.mod h1:WVhlPFC8FDjOFMMWRy2pZqQJSXxYSwNYOkTr/Z6d3Kk=
github.com/uber/jaeger-lib v2.4.1+incompatible h1:td4jdvLcExb4cBISKIpHuGoVXh+dVKhn2Um6rjCsSsg=
github.com/uber/jaeger-lib v2.4.1+incompatible/go.mod h1:ComeNDZlWwrWnDv8aPp0Ba6+uUTzImX/AauajbLI56U=
github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
github.com/unknwon/bra v0.0.0-20200517080246-1e3013ecaff8 h1:aVGB3YnaS/JNfOW3tiHIlmNmTDg618va+eT0mVomgyI=
github.com/unknwon/bra v0.0.0-20200517080246-1e3013ecaff8/go.mod h1:fVle4kNr08ydeohzYafr20oZzbAkhQT39gKK/pFQ5M4=
github.com/unknwon/com v1.0.1 h1:3d1LTxD+Lnf3soQiD4Cp/0BRB+Rsa/+RTvz8GMMzIXs=
github.com/unknwon/com v1.0.1/go.mod h1:tOOxU81rwgoCLoOVVPHb6T/wt8HZygqH5id+GNnlCXM=
github.com/unknwon/log v0.0.0-20150304194804-e617c87089d3 h1:4EYQaWAatQokdji3zqZloVIW/Ke1RQjYw2zHULyrHJg=
github.com/unknwon/log v0.0.0-20150304194804-e617c87089d3/go.mod h1:1xEUf2abjfP92w2GZTV+GgaRxXErwRXcClbUwrNJffU=
github.com/urfave/cli v1.22.15 h1:nuqt+pdC/KqswQKhETJjo7pvn/k4xMUxgW6liI7XpnM=
github.com/urfave/cli v1.22.15/go.mod h1:wSan1hmo5zeyLGBjRJbzRTNk8gwoYa2B9n4q9dmRIc0=
github.com/urfave/cli/v2 v2.27.4 h1:o1owoI+02Eb+K107p27wEX9Bb8eqIoZCfLXloLUSWJ8=
github.com/urfave/cli/v2 v2.27.4/go.mod h1:m4QzxcD2qpra4z7WhzEGn74WZLViBnMpb1ToCAKdGRQ=
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
github.com/vishvananda/netlink v1.3.0 h1:X7l42GfcV4S6E4vHTsw48qbrV+9PVojNfIhZcwQdrZk=
github.com/vishvananda/netlink v1.3.0/go.mod h1:i6NetklAujEcC6fK0JPjT8qSwWyO0HLn4UKG+hGqeJs=
github.com/vishvananda/netns v0.0.4 h1:Oeaw1EM2JMxD51g9uhtC0D7erkIjgmj8+JZc26m1YX8=
github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
github.com/xlab/treeprint v1.2.0 h1:HzHnuAF1plUN2zGlAFHbSQP2qJ0ZAD3XF5XD7OesXRQ=
github.com/xlab/treeprint v1.2.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0=
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4=
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ=
github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0=
github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0=
github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA=
go.etcd.io/etcd/api/v3 v3.5.7 h1:sbcmosSVesNrWOJ58ZQFitHMdncusIifYcrBfwrlJSY=
go.etcd.io/etcd/api/v3 v3.5.7/go.mod h1:9qew1gCdDDLu+VwmeG+iFpL+QlpHTo7iubavdVDgCAA=
go.etcd.io/etcd/client/pkg/v3 v3.5.7 h1:y3kf5Gbp4e4q7egZdn5T7W9TSHUvkClN6u+Rq9mEOmg=
go.etcd.io/etcd/client/pkg/v3 v3.5.7/go.mod h1:o0Abi1MK86iad3YrWhgUsbGx1pmTS+hrORWc2CamuhY=
go.etcd.io/etcd/client/v3 v3.5.7 h1:u/OhpiuCgYY8awOHlhIhmGIGpxfBU/GZBUP3m/3/Iz4=
go.etcd.io/etcd/client/v3 v3.5.7/go.mod h1:sOWmj9DZUMyAngS7QQwCyAXXAL6WhgTOPLNS/NabQgw=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.53.0 h1:9G6E0TXzGFVfTnawRzrPl83iHOAV7L8NJiR8RSGYV1g=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.53.0/go.mod h1:azvtTADFQJA8mX80jIH/akaE7h+dbm/sVuaHqN13w74=
go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.53.0 h1:IVtyPth4Rs5P8wIf0mP2KVKFNTJ4paX9qQ4Hkh5gFdc=
go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.53.0/go.mod h1:ImRBLMJv177/pwiLZ7tU7HDGNdBv7rS0HQ99eN/zBl8=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 h1:4K4tsIXefpVJtvA/8srF4V4y0akAoPHkIslgAkjixJA=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0/go.mod h1:jjdQuTGVsXV4vSs+CJ2qYDeDPf9yIJV23qlIzBm73Vg=
go.opentelemetry.io/contrib/propagators/jaeger v1.29.0 h1:+YPiqF5rR6PqHBlmEFLPumbSP0gY0WmCGFayXRcCLvs=
go.opentelemetry.io/contrib/propagators/jaeger v1.29.0/go.mod h1:6PD7q7qquWSp3Z4HeM3e/2ipRubaY1rXZO8NIHVDZjs=
go.opentelemetry.io/contrib/samplers/jaegerremote v0.23.0 h1:qKi9ntCcronqWqfuKxqrxZlZd82jXJEgGiAWH1+phxo=
go.opentelemetry.io/contrib/samplers/jaegerremote v0.23.0/go.mod h1:1kbAgQa5lgYC3rC6cE3jSxQ/Q13l33wv/WI8U+htwag=
go.opentelemetry.io/otel v1.29.0 h1:PdomN/Al4q/lN6iBJEN3AwPvUiHPMlt93c8bqTG5Llw=
go.opentelemetry.io/otel v1.29.0/go.mod h1:N/WtXPs1CNCUEx+Agz5uouwCba+i+bJGFicT8SR4NP8=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0 h1:dIIDULZJpgdiHz5tXrTgKIMLkus6jEFa7x5SOKcyR7E=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0/go.mod h1:jlRVBe7+Z1wyxFSUs48L6OBQZ5JwH2Hg/Vbl+t9rAgI=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.28.0 h1:R3X6ZXmNPRR8ul6i3WgFURCHzaXjHdm0karRG/+dj3s=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.28.0/go.mod h1:QWFXnDavXWwMx2EEcZsf3yxgEKAqsxQ+Syjp+seyInw=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.27.0 h1:QY7/0NeRPKlzusf40ZE4t1VlMKbqSNT7cJRYzWuja0s=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.27.0/go.mod h1:HVkSiDhTM9BoUJU8qE6j2eSWLLXvi1USXjyd2BXT8PY=
go.opentelemetry.io/otel/metric v1.29.0 h1:vPf/HFWTNkPu1aYeIsc98l4ktOQaL6LeSoeV2g+8YLc=
go.opentelemetry.io/otel/metric v1.29.0/go.mod h1:auu/QWieFVWx+DmQOUMgj0F8LHWdgalxXqvp7BII/W8=
go.opentelemetry.io/otel/sdk v1.29.0 h1:vkqKjk7gwhS8VaWb0POZKmIEDimRCMsopNYnriHyryo=
go.opentelemetry.io/otel/sdk v1.29.0/go.mod h1:pM8Dx5WKnvxLCb+8lG1PRNIDxu9g9b9g59Qr7hfAAok=
go.opentelemetry.io/otel/trace v1.29.0 h1:J/8ZNK4XgR7a21DZUAsbF8pZ5Jcw1VhACmnYt39JTi4=
go.opentelemetry.io/otel/trace v1.29.0/go.mod h1:eHl3w0sp3paPkYstJOmAimxhiFXPg+MMTlEh3nsQgWQ=
go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0=
go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8=
go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE=
go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
go.uber.org/zap v1.26.0 h1:sI7k6L95XOKS281NhVKOFCUNIvv9e0w4BF8N3u+tCRo=
go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so=
golang.org/x/arch v0.12.0 h1:UsYJhbzPYGsT0HbEdmYcqtCv8UNGvnaL561NnIUvaKg=
golang.org/x/arch v0.12.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190923035154-9ee001bba392/go.mod h1:/lpIB1dKB+9EgE3H3cr1v9wB50oz8l4C4h62xy7jSTY=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.29.0 h1:L5SG1JTTXupVV3n6sUqMTeWbjAyfPwoda2DLX8J8FrQ=
golang.org/x/crypto v0.29.0/go.mod h1:+F4F4N5hv6v38hfeYwTdx20oUvLLc+QfrE9Ax9HtgRg=
golang.org/x/exp v0.0.0-20240823005443-9b4947da3948 h1:kx6Ds3MlpiUHKj7syVnbp57++8WpuKPcR5yjLBjvLEA=
golang.org/x/exp v0.0.0-20240823005443-9b4947da3948/go.mod h1:akd2r19cwCdwSwWeIdzYQGa/EZZyqcOdwWiwj5L5eKQ=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0=
golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190923162816-aa69164e4478/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210410081132-afb366fc7cd1/go.mod h1:9tjilg8BloeKEkVJvy7fQ90B1CfIiPueXVOjqfkSzI8=
golang.org/x/net v0.31.0 h1:68CPQngjLL0r2AlUKiSxtQFKvzRVbnzLwMUn5SzcLHo=
golang.org/x/net v0.31.0/go.mod h1:P4fl1q7dY2hnZFxEk4pPSkDHF+QqjitcnDjUQyMM+pM=
golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs=
golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ=
golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190922100055-0a153f010e69/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190924154521-2837fb4f24fe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210303074136-134d130e1a04/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s=
golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug=
golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4=
golang.org/x/time v0.6.0 h1:eTDhh4ZXt5Qf0augr54TN6suAUudPcawVZeIAPU7D4U=
golang.org/x/time v0.6.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190424220101-1e8e1cfdf96b/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190907020128-2ca718005c18/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24=
golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 h1:H2TDz8ibqkAF6YGhCdN3jS9O0/s90v0rJh3X/OLHEUk=
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8=
gonum.org/v1/gonum v0.12.0 h1:xKuo6hzt+gMav00meVPUlXwSdoEJP46BR+wdxQEFK2o=
gonum.org/v1/gonum v0.12.0/go.mod h1:73TDxJfAAHeA8Mk9mf8NlIppyhQNo5GLTcYeqgo2lvY=
google.golang.org/genproto v0.0.0-20180518175338-11a468237815/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto/googleapis/api v0.0.0-20240822170219-fc7c04adadcd h1:BBOTEWLuuEGQy9n1y9MhVJ9Qt0BDu21X8qZs71/uPZo=
google.golang.org/genproto/googleapis/api v0.0.0-20240822170219-fc7c04adadcd/go.mod h1:fO8wJzT2zbQbAjbIoos1285VfEIYKDDY+Dt+WpTkh6g=
google.golang.org/genproto/googleapis/rpc v0.0.0-20240814211410-ddb44dafa142 h1:e7S5W7MGGLaSu8j3YjdezkZ+m1/Nm0uRVRMEMGk26Xs=
google.golang.org/genproto/googleapis/rpc v0.0.0-20240814211410-ddb44dafa142/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU=
google.golang.org/grpc v1.12.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw=
google.golang.org/grpc v1.66.0 h1:DibZuoBznOxbDQxRINckZcUvnCEvrW9pcWIE2yF9r1c=
google.golang.org/grpc v1.66.0/go.mod h1:s3/l6xSSCURdVfAnL+TqCNMyTDAGN6+lZeVxnZR128Y=
google.golang.org/protobuf v1.35.2 h1:8Ar7bF+apOIoThw1EdZl0p1oWvMqTHmpA2fRTyZO8io=
google.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/fsnotify/fsnotify.v1 v1.4.7 h1:XNNYLJHt73EyYiCZi6+xjupS9CpvmiDgjPTAjrBlQbo=
gopkg.in/fsnotify/fsnotify.v1 v1.4.7/go.mod h1:Fyux9zXlo4rWoMSIzpn9fDAYjalPqJ/K1qJ27s+7ltE=
gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
gopkg.in/natefinch/lumberjack.v2 v2.2.1 h1:bBRl1b0OH9s/DuPhuXpNl+VtCaJXFZ5/uEFST95x9zc=
gopkg.in/natefinch/lumberjack.v2 v2.2.1/go.mod h1:YD8tP3GAjkrDg1eZH7EGmyESg/lsYskCTPBJVb9jqSc=
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools/v3 v3.5.1 h1:EENdUnS3pdur5nybKYIh2Vfgc8IUNBjxDPSjtiJcOzU=
gotest.tools/v3 v3.5.1/go.mod h1:isy3WKz7GK6uNw/sbHzfKBLvlvXwUyV06n6brMxxopU=
k8s.io/api v0.31.3 h1:umzm5o8lFbdN/hIXbrK9oRpOproJO62CV1zqxXrLgk8=
k8s.io/api v0.31.3/go.mod h1:UJrkIp9pnMOI9K2nlL6vwpxRzzEX5sWgn8kGQe92kCE=
k8s.io/apimachinery v0.31.3 h1:6l0WhcYgasZ/wk9ktLq5vLaoXJJr5ts6lkaQzgeYPq4=
k8s.io/apimachinery v0.31.3/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo=
k8s.io/client-go v0.31.3 h1:CAlZuM+PH2cm+86LOBemaJI/lQ5linJ6UFxKX/SoG+4=
k8s.io/client-go v0.31.3/go.mod h1:2CgjPUTpv3fE5dNygAr2NcM8nhHzXvxB8KL5gYc3kJs=
k8s.io/component-base v0.31.3 h1:DMCXXVx546Rfvhj+3cOm2EUxhS+EyztH423j+8sOwhQ=
k8s.io/component-base v0.31.3/go.mod h1:xME6BHfUOafRgT0rGVBGl7TuSg8Z9/deT7qq6w7qjIU=
k8s.io/cri-api v0.31.3 h1:dsZXzrGrCEwHjsTDlAV7rutEplpMLY8bfNRMIqrtXjo=
k8s.io/cri-api v0.31.3/go.mod h1:Po3TMAYH/+KrZabi7QiwQI4a692oZcUOUThd/rqwxrI=
k8s.io/cri-client v0.31.3 h1:9ZwddaNJomqkTBYQqSmB+Ccns3beY4HyYDwmRtWTCJM=
k8s.io/cri-client v0.31.3/go.mod h1:klbWiYkOatOQOkXOYZMZMGSTM8q9eC/efsYGuXcgPes=
k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A=
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo=
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0=
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4=
sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08=
sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY=

224
huatuo-bamai.conf Normal file
View File

@ -0,0 +1,224 @@
# log-level: Debug, Info, Warn, Error, Panic
LogLevel = "Info"
# logging filepath
# LogFile = ""
[APIServer]
# TCPAddr is the tcp monitoring information of the huatuo-bamai server
TCPAddr = ":19704"
[HuaTuoConf]
UserName = "huatuo-bamai"
PassWord = "huatuo-bamai"
UnixAddr = "/home/xiaoju/didicloud/huatuo/shared/huatuo.unix"
ServerIP = "127.0.0.1:12735"
KeepaliveTimeout = 300
APIVersion = "v1.3"
ReqTimeout = 15
[RuntimeCgroup]
LimitInitCPU = 0.5
LimitCPU = 2.0
# limit memory (MB)
LimitMem = 2048
# storage configurations
[Storage]
# ES & Kibana configurations
[Storage.ES]
# disable ES storage if one of Address, Username, Password empty.
Address = "http://10.88.128.149:30963"
Username = "20416"
Password = "E4haxbsIUPiUZES"
Index = "cn_huatuo_relay_cases"
# tracer's record data
# Path: all but the last element of path for per tracer
# RotationSize: the maximum size in Megabytes of a record file before it gets rotated for per subsystem
# MaxRotation: the maximum number of old log files to retain for per subsystem
[Storage.LocalFile]
Path = "./record"
RotationSize = 100
MaxRotation = 10
[TaskConfig]
MaxRunningTask = 10
[Tracing]
# blacklist
BlackList = ["softlockup"]
[Tracing.Cpuidle]
CgUserth = 75 #75%
CgDeltaUserth = 30 #30%
CgSysth = 45 #45%
CgDeltaSysth = 0 #0
CgUsageth = 90 #90%
CgDeltaUsageth = 30 #30%
CgStep = 10 #10s
CgGrace = 1800 #1800s
CgUsageToolduration = 10 #10s
[Tracing.Cpusys]
CPUSysth = 50 #50%
CPUSysDelta = 30 #30%
CPUSysStep = 1 #1s
CPUSysToolduration = 10 #10s
[Tracing.Waitrate]
[Tracing.Waitrate.SpikeThreshold]
"0" = 50.0
"101" = 80.0
"102" = 120.0
"103" = 170.0
"1" = 220.0
"2" = 270.0
"3" = 320.0
"4" = 370.0
[Tracing.Waitrate.SlopeThreshold]
"0" = 0.05
"101" = 0.1
"102" = 0.2
"103" = 0.3
"1" = 0.4
"2" = 0.5
"3" = 0.6
"4" = 0.7
[Tracing.Waitrate.SampleConfig]
# DataSetCapability * SampleInterval is time capability (in seconds)
DataSetCapability = 360
# Seconds
OnceCaptureTime = 15
SampleInterval = 5
[Tracing.Softirq]
ThresholdTime = 100000000
[Tracing.Dload]
ThresholdLoad = 5.0
MonitorGap = 180
[Tracing.IOTracing]
IOScheduleThreshold = 100 #100ms
ReadThreshold = 2000 #MB/s
WriteThreshold = 1500 #MB/s
IOutilThreshold = 90 #90%
IOwaitThreshold = 100 #100ms
PeriodSecond = 8
MaxStackNumber = 16
TopProcessCount = 15
TopFilesPerProcess = 10
[Tracing.MemoryReclaim]
Deltath = 900000000 #900ms
[Tracing.MemoryBurst]
HistoryWindowLength = 60
SampleInterval = 5 # seconds
SilencePeriod = 300 # seconds
TopNProcesses = 10
BurstRatio = 2.0
AnonThreshold = 70 # percent
# the latency threshold for package receive
[Tracing.NetRecvLat]
ToNetIf = 5 # ms, from driver to a core recv
ToTCPV4 = 10 # ms, from driver to TCP recv, contains ToNetIf
ToUserCopy = 115 # ms, from driver to user recv, contains ToNetIf + ToUserCopy
IgnoreHost = true # whether to ignore the host process
IgnoreContainerLevel = [103, 3, 4]
[Tracing.Dropwatch]
IgnoreNeighInvalidate = true # ignore the error of `neigh_invalidate`
[Tracing.Netdev]
Whitelist = ["eth0", "eth1", "bond4", "lo"]
[Tracing.Fastfork]
RedisInfoCollectionInterval = 3600 # interval (seconds) of redis proess information collection
EnableForkProbe = 1 # enable fork kprobe and kretprobe
EnablePtsepProbe = 1
EnableWaitptsepProbe = 1
# Collector Configurations.
[MetricCollector]
# blacklist
BlackList = ["ethtool"]
# Netdev Configurations.
[MetricCollector.Netdev]
# Use `netlink` instead of `procfs net/dev` to get netdev statistic.
# Only support the host environment to use `netlink` now!
EnableNetlink = false
# IgnoredDevices: Ignore special devices in this netdev statistic.
# AcceptDevices: Accept special devices in this netdev statistic.
# These configurations use `Regexp`.
# 'IgnoredDevices' has higher priority than 'AcceptDevices'.
IgnoredDevices = "^(lo)|(docker\\w*)|(veth\\w*)$"
#AcceptDevices = ""
# Qdisc Configurations.
[MetricCollector.Qdisc]
# IgnoredDevices: Ignore special devices in this qdisc statistic.
# AcceptDevices: Accept special devices in this qdisc statistic.
# These configurations use `Regexp`.
# 'IgnoredDevices' has higher priority than 'AcceptDevices'.
IgnoredDevices = "^(lo)|(docker\\w*)|(veth\\w*)$"
#AcceptDevices = ""
[MetricCollector.Vmstat]
IncludedMetrics = "allocstall|nr_active_anon|nr_active_file|nr_boost_pages|nr_dirty|nr_free_pages|nr_inactive_anon|nr_inactive_file|nr_kswapd_boost|nr_mlock|nr_shmem|nr_slab_reclaimable|nr_slab_unreclaimable|nr_unevictable|nr_writeback|numa_pages_migrated|pgdeactivate|pgrefill|pgscan_direct|pgscan_kswapd|pgsteal_direct|pgsteal_kswapd"
ExcludedMetrics = "total"
[MetricCollector.MemoryStat]
IncludedMetrics = "active_anon|active_file|dirty|inactive_anon|inactive_file|pgdeactivate|pgrefill|pgscan_direct|pgscan_kswapd|pgsteal_direct|pgsteal_kswapd|shmem|unevictable|writeback|pgscan_globaldirect|pgscan_globalkswapd|pgscan_cswapd|pgsteal_cswapd|pgsteal_globaldirect|pgsteal_globalkswapd"
ExcludedMetrics = "total"
[MetricCollector.MemoryEvents]
IncludedMetrics = "watermark_inc|watermark_dec"
# ExcludedMetrics = ""
# Netstat Configurations.
[MetricCollector.Netstat]
# ExcludedMetrics: Ignore keys in this netstat statistic.
# IncludedMetrics: Accept keys in this netstat statistic.
# The 'key' format: protocol + '_' + netstat_name. eg: TcpExt_TCPSynRetrans.
# These configurations use `Regexp`.
# 'ExcludedMetrics' has higher priority than 'IncludedMetrics'.
#ExcludedMetrics = ""
#IncludedMetrics = ""
[MetricCollector.MountPointStat]
IncludedMountPoints = "(^/home$)|(^/$)|(^/boot$)"
# Known warning pattern filter
#
# array[0] - the name of the known issue
# array[1] - regex of pattern which help identify the known issues
# array[2] & array[3] - regex of known clusters or containers hit the issue
#
# Example:
# ["ep_poll", "ep_scan_ready_list.constprop.21\\+0x217", "athena-predict", ""]
# issue name: ep_poll
# ep_scan_ready_list.constprop.21\\+0x217 is used to identify the issue base on
# the stack backtrace
# athena-predict is part of name of cluster which are very known hit this
# issue frequently
[WarningFilter]
PatternList = [
[
"coredump",
"do_exit\\+0x1c9",
"",
""
],
[
"ep_poll",
"ep_scan_ready_list.constprop.21\\+0x217",
"athena-predict",
""
],
[
"php_parallel_exit",
"unlink_anon_vmas\\+0x76"
],
[
"futex",
"futex_wait_queue_me\\+0xc1"
],
[
"netrecvlat",
"comm=gundam_client:"
],
[
"", "", "", ""
]
]
[Pod]
KubeletPodListURL = "http://127.0.0.1:10255/pods"
DockerAPIVersion = "1.24"

129
internal/bpf/bpf.go Normal file
View File

@ -0,0 +1,129 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bpf
import (
"context"
)
// The BPF APIs
//
// The bpf manager has the following APIs:
//
// // InitBpfManager initializes the bpf manager.
// InitBpfManager() error
//
// // CloseBpfManager closes the bpf manager.
// CloseBpfManager()
//
// // LoadBpf the bpf and return the bpf.
// LoadBpf(objName string, consts map[string]any) (BPF, error)
// AttachOption is an option for attaching a program.
type AttachOption struct {
ProgramName string
Symbol string // symbol for kprobe/kretprobe/tracepoint/raw_tracepoint
PerfEvent struct { // BPF_PROG_TYPE_PERF_EVENT
SamplePeriod, SampleFreq uint64
}
}
// Info is the info of a bpf.
type Info struct {
MapsInfo []MapInfo
ProgramsInfo []ProgramInfo
}
// MapInfo is the info of a map.
type MapInfo struct {
ID uint32
Name string
}
// ProgramInfo is the info of a program.
type ProgramInfo struct {
ID uint32
Name string
SectionName string
}
// MapItem describes a map element with key-value
type MapItem struct {
Key []byte
Value []byte
}
type BPF interface {
// Name returns the bpf name.
Name() string
// MapIDByName gets mapID by Name.
MapIDByName(name string) uint32
// ProgIDByName gets progID by Name.
ProgIDByName(name string) uint32
// String returns the bpf string.
String() string
// Info gets bpf information.
Info() (*Info, error)
// Close the bpf bpf.
Close() error
// AttachWithOptions attaches programs with options.
AttachWithOptions(opts []AttachOption) error
// Attach the default programs.
Attach() error
// Detach all programs.
Detach() error
// Loaded checks bpf is still loaded.
Loaded() (bool, error)
// EventPipe gets event-pipe and returns a PerfEventReader.
EventPipe(ctx context.Context, mapID, perCPUBuffer uint32) (PerfEventReader, error)
// EventPipeByName gets event-pipe by the mapName and returns a PerfEventReader.
EventPipeByName(ctx context.Context, mapName string, perCPUBuffer uint32) (PerfEventReader, error)
// AttachAndEventPipe attaches and event-pipe and returns a PerfEventReader.
AttachAndEventPipe(ctx context.Context, mapName string, perCPUBuffer uint32) (PerfEventReader, error)
// ReadMap read the value content corresponding to a key from a map
//
// NOTICE: The content of the key needs to be converted to byte type, and the
// obtained value is of byte type, which also needs to be converted to the
// corresponding type.
ReadMap(mapID uint32, key []byte) ([]byte, error)
// WriteMapItems writes the value content corresponding to a key to a map.
WriteMapItems(mapID uint32, items []MapItem) error
// DeleteMapItems deletes multiple items from a BPF map by keys.
DeleteMapItems(mapID uint32, keys [][]byte) error
// DumpMap dump all the context of the map
DumpMap(mapID uint32) ([]MapItem, error)
// DumpMapByName dump all the context of the map.
DumpMapByName(mapName string) ([]MapItem, error)
// WaitDetachByBreaker check the bpf's status.
WaitDetachByBreaker(ctx context.Context, cancel context.CancelFunc)
}

595
internal/bpf/bpf_default.go Normal file
View File

@ -0,0 +1,595 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build !didi
package bpf
import (
"bytes"
"context"
"fmt"
"io"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"huatuo-bamai/internal/log"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/link"
"golang.org/x/sys/unix"
)
const (
bpfFileDirectory = "./bpf"
)
// InitBpfManager initializes the bpf manager.
func InitBpfManager() error {
// unlimit
return unix.Setrlimit(unix.RLIMIT_MEMLOCK, &unix.Rlimit{
Cur: unix.RLIM_INFINITY,
Max: unix.RLIM_INFINITY,
})
}
// CloseBpfManager closes the bpf manager.
func CloseBpfManager() {}
type mapSpec struct {
name string
bMap *ebpf.Map
}
type programSpec struct {
name string
specType ebpf.ProgramType
sectionName string
sectionPrefix string
bProg *ebpf.Program
links map[string]link.Link
}
type defaultBPF struct {
name string
mapSpecs map[uint32]mapSpec
programSpecs map[uint32]programSpec
mapName2IDs map[string]uint32
programName2IDs map[string]uint32
}
// _ is a type assertion
var _ BPF = (*defaultBPF)(nil)
// LoadBpfFromBytes loads the bpf from bytes.
func LoadBpfFromBytes(bpfName string, bpfBytes []byte, consts map[string]any) (BPF, error) {
return loadBpfFromReader(bpfName, bytes.NewReader(bpfBytes), consts)
}
// LoadBpf the bpf and return the bpf.
func LoadBpf(bpfName string, consts map[string]any) (BPF, error) {
f, err := os.Open(filepath.Join(bpfFileDirectory, bpfName))
if err != nil {
return nil, err
}
defer f.Close()
return loadBpfFromReader(bpfName, f, consts)
}
// loadBpfFromReader loads the bpf from reader.
func loadBpfFromReader(bpfName string, rd io.ReaderAt, consts map[string]any) (BPF, error) {
specs, err := ebpf.LoadCollectionSpecFromReader(rd)
if err != nil {
return nil, fmt.Errorf("can't parse the bpf file %s: %w", bpfName, err)
}
// RewriteConstants
if consts != nil {
if err := specs.RewriteConstants(consts); err != nil {
return nil, fmt.Errorf("can't rewrite constants: %w", err)
}
}
// loads Maps and Programs into the kernel.
coll, err := ebpf.NewCollection(specs)
if err != nil {
return nil, fmt.Errorf("can't new the bpf collection: %w", err)
}
defer coll.Close()
b := &defaultBPF{
name: bpfName,
mapSpecs: make(map[uint32]mapSpec),
programSpecs: make(map[uint32]programSpec),
}
// maps
for name, spec := range specs.Maps {
m, ok := coll.Maps[name]
if !ok {
continue
}
info, err := m.Info()
if err != nil {
return nil, fmt.Errorf("can't get map info: %w", err)
}
id, ok := info.ID()
if !ok {
return nil, fmt.Errorf("invalid map ID: %v", id)
}
bMap, err := m.Clone()
if err != nil {
return nil, fmt.Errorf("can't clone map: %w", err)
}
b.mapSpecs[uint32(id)] = mapSpec{
name: spec.Name,
bMap: bMap,
}
}
// programs
for name, spec := range specs.Programs {
p, ok := coll.Programs[name]
if !ok {
continue
}
info, err := p.Info()
if err != nil {
return nil, fmt.Errorf("can't get program info: %w", err)
}
id, ok := info.ID()
if !ok {
return nil, fmt.Errorf("invalid program ID: %v", id)
}
bProg, err := p.Clone()
if err != nil {
return nil, fmt.Errorf("can't clone program: %w", err)
}
b.programSpecs[uint32(id)] = programSpec{
name: spec.Name,
specType: spec.Type,
sectionName: spec.SectionName,
sectionPrefix: strings.SplitN(spec.SectionName, "/", 2)[0],
bProg: bProg,
links: make(map[string]link.Link),
}
}
// mapName2IDs
b.mapName2IDs = make(map[string]uint32, len(b.mapSpecs))
for id, m := range b.mapSpecs {
b.mapName2IDs[m.name] = id
}
// programName2IDs
b.programName2IDs = make(map[string]uint32, len(b.programSpecs))
for id, p := range b.programSpecs {
b.programName2IDs[p.name] = id
}
log.Infof("loaded bpf: %s", b)
// auto clean
runtime.SetFinalizer(b, (*defaultBPF).Close)
return b, nil
}
// Name returns the name of the bpf.
func (b *defaultBPF) Name() string {
return b.name
}
// MapIDByName gets mapID by Name.
func (b *defaultBPF) MapIDByName(name string) uint32 {
return b.mapName2IDs[name]
}
// ProgIDByName gets progID by Name.
func (b *defaultBPF) ProgIDByName(name string) uint32 {
return b.programName2IDs[name]
}
// String returns the bpf string.
func (b *defaultBPF) String() string {
return fmt.Sprintf("%s#%d#%d", b.name, len(b.mapSpecs), len(b.programSpecs))
}
// Info gets defaultBPF information.
func (b *defaultBPF) Info() (*Info, error) {
info := &Info{
MapsInfo: make([]MapInfo, 0, len(b.mapSpecs)),
ProgramsInfo: make([]ProgramInfo, 0, len(b.programSpecs)),
}
// maps
for id, m := range b.mapSpecs {
info.MapsInfo = append(info.MapsInfo, MapInfo{
ID: id,
Name: m.name,
})
}
// programs
for id, p := range b.programSpecs {
info.ProgramsInfo = append(info.ProgramsInfo, ProgramInfo{
ID: id,
Name: p.name,
SectionName: p.sectionName,
})
}
return info, nil
}
// Close the bpf.
func (b *defaultBPF) Close() error {
for _, m := range b.mapSpecs {
m.bMap.Close()
}
for _, p := range b.programSpecs {
for _, l := range p.links {
l.Close()
}
p.bProg.Close()
}
return nil
}
// AttachWithOptions attaches programs with options.
func (b *defaultBPF) AttachWithOptions(opts []AttachOption) error {
var err error
defer func() {
if err != nil { // detach all programs when error.
_ = b.Detach()
}
}()
for _, opt := range opts {
progID := b.ProgIDByName(opt.ProgramName)
spec := b.programSpecs[progID]
switch spec.specType {
case ebpf.TracePoint:
// opt.Symbol: <system>/<symbol>
symbols := strings.SplitN(opt.Symbol, "/", 2)
if len(symbols) != 2 {
return fmt.Errorf("bpf %s: invalid symbol: %s", b, opt.Symbol)
}
if err = b.attachTracepoint(progID, symbols[0], symbols[1]); err != nil {
return fmt.Errorf("attach tracepoint with options %v: %w", opt, err)
}
case ebpf.Kprobe:
// opt.Symbol: <symbol>[+<offset>]
// opt.Symbol: <symbol>
if err = b.attachKprobe(progID, opt.Symbol, spec.sectionPrefix == "kretprobe"); err != nil {
return fmt.Errorf("attach kprobe with options %v: %w", opt, err)
}
case ebpf.RawTracepoint:
// opt.Symbol: <symbol>
if err = b.attachRawTracepoint(progID, opt.Symbol); err != nil {
return fmt.Errorf("attach raw tracepoint with options %v: %w", opt, err)
}
case ebpf.PerfEvent:
// SamplePeriod/SamplePeriod
if err = b.attachPerfEvent(progID, opt.PerfEvent.SamplePeriod, opt.PerfEvent.SampleFreq); err != nil {
return fmt.Errorf("attach perf event with options %v: %w", opt, err)
}
default:
return fmt.Errorf("bpf %s: unsupported program type: %s", b, spec.specType)
}
}
return nil
}
// Attach the default programs.
func (b *defaultBPF) Attach() error {
var err error
defer func() {
if err != nil { // detach all programs when error.
_ = b.Detach()
}
}()
for progID, spec := range b.programSpecs {
switch spec.specType {
case ebpf.TracePoint:
// section: tracepoint/<system>/<symbol>
symbols := strings.SplitN(spec.sectionName, "/", 3)
if len(symbols) != 3 {
return fmt.Errorf("bpf %s: invalid section name: %s", b, spec.sectionName)
}
if err = b.attachTracepoint(progID, symbols[1], symbols[2]); err != nil {
return fmt.Errorf("attach tracepoint: %w", err)
}
case ebpf.Kprobe:
// section: kprobe/<symbol>[+<offset>]
// section: kretprobe/<symbol>
symbols := strings.SplitN(spec.sectionName, "/", 2)
if len(symbols) != 2 {
return fmt.Errorf("bpf %s: invalid section name: %s", b, spec.sectionName)
}
if err = b.attachKprobe(progID, symbols[1], symbols[0] == "kretprobe"); err != nil {
return fmt.Errorf("attach kprobe: %w", err)
}
case ebpf.RawTracepoint:
// section: raw_tracepoint/<symbol>
symbols := strings.SplitN(spec.sectionName, "/", 2)
if len(symbols) != 2 {
return fmt.Errorf("bpf %s: invalid section name: %s", b, spec.sectionName)
}
if err = b.attachRawTracepoint(progID, symbols[1]); err != nil {
return fmt.Errorf("attach raw tracepoint: %w", err)
}
default:
return fmt.Errorf("bpf %s: unsupported program type: %s", b, spec.specType)
}
}
return nil
}
func (b *defaultBPF) attachKprobe(progID uint32, symbol string, isRetprobe bool) error {
spec := b.programSpecs[progID]
if !isRetprobe { // kprobe
// : <symbol>[+<offset>]
// : <symbol>
var (
err error
offset uint64
)
symOffsets := strings.Split(symbol, "+")
if len(symOffsets) > 2 {
return fmt.Errorf("bpf %s: invalid symbol: %s", b, symbol)
} else if len(symOffsets) == 2 {
offset, err = strconv.ParseUint(symOffsets[1], 10, 64)
if err != nil {
return fmt.Errorf("bpf %s: invalid symbol: %s", b, symbol)
}
}
linkKey := fmt.Sprintf("%s+%d", symOffsets[0], offset)
if _, ok := spec.links[linkKey]; ok {
return fmt.Errorf("bpf %s: duplicate symbol: %s", b, symbol)
}
opts := link.KprobeOptions{
Offset: offset,
}
l, err := link.Kprobe(symOffsets[0], spec.bProg, &opts)
if err != nil {
return fmt.Errorf("can't attach kprobe %s in %v: %w", symbol, spec.bProg, err)
}
spec.links[linkKey] = l
log.Infof("attach kprobe %s in %v, links: %v", symbol, spec.bProg, spec.links)
} else { // kretprobe
linkKey := symbol
if _, ok := spec.links[linkKey]; ok {
return fmt.Errorf("bpf %s: duplicate symbol: %s", b, symbol)
}
l, err := link.Kretprobe(symbol, spec.bProg, nil)
if err != nil {
return fmt.Errorf("can't attach kretprobe %s in %v: %w", symbol, spec.bProg, err)
}
spec.links[linkKey] = l
log.Infof("attach kretprobe %s in %v, links: %v", symbol, spec.bProg, spec.links)
}
return nil
}
func (b *defaultBPF) attachTracepoint(progID uint32, system, symbol string) error {
spec := b.programSpecs[progID]
linkKey := fmt.Sprintf("%s/%s", system, symbol)
if _, ok := spec.links[linkKey]; ok {
return fmt.Errorf("bpf %s: duplicate symbol: %s", b, symbol)
}
l, err := link.Tracepoint(system, symbol, spec.bProg, nil)
if err != nil {
return fmt.Errorf("can't attach tracepoint %s/%s in %v: %w", system, symbol, spec.bProg, err)
}
spec.links[linkKey] = l
log.Infof("attach tracepoint %s/%s in %v, links: %v", system, symbol, spec.bProg, spec.links)
return nil
}
func (b *defaultBPF) attachRawTracepoint(progID uint32, symbol string) error {
spec := b.programSpecs[progID]
linkKey := symbol
if _, ok := spec.links[linkKey]; ok {
return fmt.Errorf("bpf %s: duplicate symbol: %s", b, symbol)
}
l, err := link.AttachRawTracepoint(link.RawTracepointOptions{
Name: symbol,
Program: spec.bProg,
})
if err != nil {
return fmt.Errorf("can't attach raw tracepoint %s in %v: %w", symbol, spec.bProg, err)
}
spec.links[linkKey] = l
log.Infof("attach raw tracepoint %s in %v, links: %v", symbol, spec.bProg, spec.links)
return nil
}
func (b *defaultBPF) attachPerfEvent(progID uint32, samplePeriod, sampleFrequency uint64) error {
// TODO implement
return fmt.Errorf("not implemented")
}
// Detach all programs.
func (b *defaultBPF) Detach() error {
for _, spec := range b.programSpecs {
for _, l := range spec.links {
err := l.Close()
log.Infof("detach %s in %v: %v", spec.sectionName, spec.bProg, err)
}
}
return nil
}
// Loaded checks bpf is still loaded.
func (b *defaultBPF) Loaded() (bool, error) {
return true, nil
}
// EventPipe gets event-pipe and returns a PerfEventReader.
func (b *defaultBPF) EventPipe(ctx context.Context, mapID, perCPUBuffer uint32) (PerfEventReader, error) {
reader, err := newPerfEventReader(ctx, b.mapSpecs[mapID].bMap, int(perCPUBuffer))
if err != nil {
return nil, err
}
log.Infof("event-pipe %d, perCPUBuffer %d", mapID, perCPUBuffer)
return reader, nil
}
// EventPipeByName gets event-pipe by the mapName and returns a PerfEventReader.
func (b *defaultBPF) EventPipeByName(ctx context.Context, mapName string, perCPUBuffer uint32) (PerfEventReader, error) {
return b.EventPipe(ctx, b.MapIDByName(mapName), perCPUBuffer)
}
// AttachAndEventPipe attaches and event-pipe and returns a PerfEventReader.
func (b *defaultBPF) AttachAndEventPipe(ctx context.Context, mapName string, perCPUBuffer uint32) (PerfEventReader, error) {
reader, err := b.EventPipeByName(ctx, mapName, perCPUBuffer)
if err != nil {
return nil, err
}
if err := b.Attach(); err != nil {
reader.Close()
return nil, err
}
log.Infof("attach and event-pipe %s, perCPUBuffer %d", mapName, perCPUBuffer)
return reader, nil
}
// ReadMap read the value content corresponding to a key from a map
//
// NOTICE: The content of the key needs to be converted to byte type, and the
// obtained value is of byte type, which also needs to be converted to the
// corresponding type.
func (b *defaultBPF) ReadMap(mapID uint32, key []byte) ([]byte, error) {
val, err := b.mapSpecs[mapID].bMap.LookupBytes(key)
if err != nil {
return nil, err
}
log.Debugf("read map %d, key %v, value %v", mapID, key, val)
return val, nil
}
// WriteMapItems write the value content corresponding to a key to a map.
func (b *defaultBPF) WriteMapItems(mapID uint32, items []MapItem) error {
m := b.mapSpecs[mapID].bMap
for _, item := range items {
if err := m.Update(item.Key, item.Value, ebpf.UpdateAny); err != nil {
return fmt.Errorf("map %d, key %v: update: %w", mapID, item.Key, err)
}
log.Infof("write map %d, key %v, value %v", mapID, item.Key, item.Value)
}
return nil
}
// DeleteMapItems deletes multiple items from a BPF map by keys.
func (b *defaultBPF) DeleteMapItems(mapID uint32, keys [][]byte) error {
m := b.mapSpecs[mapID].bMap
for _, k := range keys {
if err := m.Delete(k); err != nil {
return fmt.Errorf("map %d, key %v: delete: %w", mapID, k, err)
}
log.Infof("delete map %d, key %v", mapID, k)
}
return nil
}
// DumpMap dump all the context of the map
func (b *defaultBPF) DumpMap(mapID uint32) ([]MapItem, error) {
m := b.mapSpecs[mapID].bMap
var prevKey any
items := []MapItem{}
for i := 0; i < int(m.MaxEntries()); i++ {
nextKey, err := m.NextKeyBytes(prevKey)
if err != nil {
return nil, fmt.Errorf("map %d, prevKey %v: next key: %w", mapID, prevKey, err)
}
// last key
if len(nextKey) == 0 {
break
}
value, err := m.LookupBytes(nextKey)
if err != nil {
return nil, fmt.Errorf("map %d, key %v: value: %w", mapID, nextKey, err)
}
if value == nil {
continue
}
prevKey = nextKey
items = append(items, MapItem{
Key: nextKey,
Value: value,
})
}
log.Debugf("dump map %d, items %v", mapID, items)
return items, nil
}
// DumpMapByName dump all the context of the map.
func (b *defaultBPF) DumpMapByName(mapName string) ([]MapItem, error) {
return b.DumpMap(b.MapIDByName(mapName))
}
// WaitDetachByBreaker check the bpf's status.
func (b *defaultBPF) WaitDetachByBreaker(ctx context.Context, cancel context.CancelFunc) {
// TODO: implement
}

View File

@ -0,0 +1,24 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bpf
// PerfEventReader reads the eBPF perf_event.
type PerfEventReader interface {
// ReadInto reads the eBPF perf_event into pdata.
ReadInto(pdata any) error
// Close the PerfEventReader.
Close() error
}

View File

@ -0,0 +1,96 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build !didi
package bpf
import (
"bytes"
"context"
"encoding/binary"
"fmt"
"os"
"time"
"huatuo-bamai/pkg/types"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/perf"
"github.com/pkg/errors"
)
// perfEventReader reads the eBPF perf_event_array.
type perfEventReader struct {
ctx context.Context
rd *perf.Reader
cancelCtx context.CancelFunc
}
// _ is a type assertion
var _ PerfEventReader = (*perfEventReader)(nil)
// newPerfEventReader creates a new perfEventReader.
func newPerfEventReader(ctx context.Context, array *ebpf.Map, perCPUBuffer int) (PerfEventReader, error) {
rd, err := perf.NewReader(array, perCPUBuffer)
if err != nil {
return nil, fmt.Errorf("can't create the perf event reader: %w", err)
}
readerCtx, cancel := context.WithCancel(ctx)
return &perfEventReader{ctx: readerCtx, rd: rd, cancelCtx: cancel}, nil
}
// Close the perfEventReader.
func (r *perfEventReader) Close() error {
r.cancelCtx()
r.rd.Close()
return nil
}
// ReadInto reads the eBPF perf_event into pdata.
func (r *perfEventReader) ReadInto(pdata any) error {
for {
select {
case <-r.ctx.Done():
return types.ErrExitByCancelCtx
default:
// set the poll deadline 100ms
r.rd.SetDeadline(time.Now().Add(100 * time.Millisecond))
// read the event
record, err := r.rd.Read()
if err != nil {
if errors.Is(err, perf.ErrClosed) { // Close
return fmt.Errorf("perfEventReader is closed: %w", types.ErrExitByCancelCtx)
} else if errors.Is(err, os.ErrDeadlineExceeded) { // poll deadline
continue
}
return fmt.Errorf("failed to read the event: %w", err)
}
if record.LostSamples != 0 {
continue
}
// parse the event
if err := binary.Read(bytes.NewBuffer(record.RawSample), binary.NativeEndian, pdata); err != nil {
return fmt.Errorf("failed to parse the event: %w", err)
}
return nil
}
}
}

340
internal/conf/config.go Normal file
View File

@ -0,0 +1,340 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package conf
import (
"fmt"
"os"
"reflect"
"regexp"
"strings"
"sync"
"huatuo-bamai/internal/log"
"github.com/pelletier/go-toml"
)
// CommonConf global common configuration
type CommonConf struct {
LogLevel string `default:"Info"`
LogFile string
// APIServer addr
APIServer struct {
TCPAddr string `default:":19704"`
}
// HuaTuo config
HuaTuoConf struct {
UserName string
PassWord string
UnixAddr string
ServerIP string
APIVersion string
ReqTimeout int
OnlyOneSession bool `default:"true"`
KeepaliveEnable bool `default:"true"`
KeepaliveTimeout int
}
// RuntimeCgroup for huatuo-bamai resource
RuntimeCgroup struct {
// limit cpu num 0.5 2.0
// limit memory (MB)
LimitInitCPU float64 `default:"0.5"`
LimitCPU float64 `default:"2.0"`
LimitMem int64 `default:"2048"`
}
// Storage for huatuo-bamai tracer storage
Storage struct {
// ES configurations
ES struct {
Address, Username, Password, Index string
}
// LocalFile record file configuration
LocalFile struct {
Path string `default:"record"`
RotationSize int `default:"100"`
MaxRotation int `default:"10"`
}
}
TaskConfig struct {
MaxRunningTask int `default:"10"`
}
Tracing struct {
// backlist
BlackList []string
// Cpuidle for cpuidle configuration
Cpuidle struct {
CgUserth uint64
CgDeltaUserth int64
CgSysth uint64
CgDeltaSysth int64
CgUsageth uint64
CgDeltaUsageth int64
CgStep int64
CgGrace int64
CgUsageToolduration int64
}
// Cpusys for cpusys configuration
Cpusys struct {
CPUSysth uint64
CPUSysDelta int64
CPUSysStep int64
CPUSysToolduration int64
}
// Waitrate for waitrate.go
Waitrate struct {
SpikeThreshold map[string]float64
SlopeThreshold map[string]float64
SampleConfig map[string]int
}
// Softirq for softirq thresh configuration
Softirq struct {
ThresholdTime uint64
}
// Dload for dload thresh configuration
Dload struct {
ThresholdLoad float64
MonitorGap int
}
// IOTracing for iotracer thresh configuration
IOTracing struct {
IOScheduleThreshold uint64
ReadThreshold uint64
WriteThreshold uint64
IOutilThreshold uint64
IOwaitThreshold uint64
PeriodSecond uint64
MaxStackNumber int
TopProcessCount int
TopFilesPerProcess int
}
// MemoryReclaim for MemoryReclaim configuration
MemoryReclaim struct {
Deltath uint64
}
// MemoryBurst configuration
MemoryBurst struct {
HistoryWindowLength int
SampleInterval int
SilencePeriod int
TopNProcesses int
BurstRatio float64
AnonThreshold int
}
// NetRecvLat configuration
NetRecvLat struct {
ToNetIf uint64
ToTCPV4 uint64
ToUserCopy uint64
IgnoreHost bool
IgnoreContainerLevel []int
}
// Dropwatch configuration
Dropwatch struct {
IgnoreNeighInvalidate bool
}
// Netdev configuration
Netdev struct {
Whitelist []string
}
Fastfork struct {
RedisInfoCollectionInterval uint32 `default:"3600"`
EnableForkProbe uint32 `default:"1"`
EnablePtsepProbe uint32 `default:"1"`
EnableWaitptsepProbe uint32 `default:"1"`
}
}
MetricCollector struct {
// backlist
BlackList []string
Netdev struct {
// Use `netlink` instead of `procfs net/dev` to get netdev statistic.
// Only support the host environment to use `netlink` now!
EnableNetlink bool
// IgnoredDevices: Ignore special devices in this netdev statistic.
// AcceptDevices: Accept special devices in this netdev statistic.
// These configurations use `Regexp`.
// 'IgnoredDevices' has higher priority than 'AcceptDevices'.
IgnoredDevices, AcceptDevices string
}
Qdisc struct {
// IgnoredDevices: Ignore special devices in this qdisc statistic.
// AcceptDevices: Accept special devices in this qdisc statistic.
// These configurations use `Regexp`.
// 'IgnoredDevices' has higher priority than 'AcceptDevices'.
IgnoredDevices, AcceptDevices string
}
Vmstat struct {
IncludedMetrics, ExcludedMetrics string
}
MemoryStat struct {
IncludedMetrics, ExcludedMetrics string
}
MemoryEvents struct {
IncludedMetrics, ExcludedMetrics string
}
Netstat struct {
// ExcludedMetrics: Ignore keys in this netstat statistic.
// IncludedMetrics: Accept keys in this netstat statistic.
// The 'key' format: protocol + '_' + netstat_name. eg: TcpExt_TCPSynRetrans.
// These configurations use `Regexp`.
// 'ExcludedMetrics' has higher priority than 'IncludedMetrics'.
ExcludedMetrics, IncludedMetrics string
}
MountPointStat struct {
IncludedMountPoints string
}
}
// WarningFilter for filt the known issues
WarningFilter struct {
PatternList [][]string
}
// Pod configuration
Pod struct {
KubeletPodListURL string `default:"http://127.0.0.1:10255/pods"`
DockerAPIVersion string `default:"1.24"`
}
}
var (
lock = sync.Mutex{}
configFile = ""
config = &CommonConf{}
// Region is host and containers belong to.
Region string
)
// LoadConfig load conf file
func LoadConfig(path string) error {
f, err := os.Open(path)
if err != nil {
return err
}
defer f.Close()
// defaults.SetDefaults(config)
d := toml.NewDecoder(f)
if err := d.Strict(true).Decode(config); err != nil {
return err
}
// MB
config.RuntimeCgroup.LimitMem *= 1024 * 1024
configFile = path
log.Infof("Loadconfig:\n%+v\n", config)
return nil
}
// Get return the global configuration obj
func Get() *CommonConf {
return config
}
// Set is a function that modifies the configuration obj
//
// @key: supported keys
// - "Key1"
// - "Key1.Key2"
func Set(key string, val any) {
lock.Lock()
defer lock.Unlock()
// find key
c := reflect.ValueOf(config)
for _, k := range strings.Split(key, ".") {
elem := c.Elem().FieldByName(k)
if !elem.IsValid() || !elem.CanAddr() {
panic(fmt.Errorf("invalid elem %s: %v", key, elem))
}
c = elem.Addr()
}
// assign
rc := reflect.Indirect(c)
rval := reflect.ValueOf(val)
if rc.Kind() != rval.Kind() {
panic(fmt.Errorf("%s type %s is not assignable to type %s", key, rc.Kind(), rval.Kind()))
}
rc.Set(rval)
log.Infof("Config: set %s = %v", key, val)
}
// Sync write config data to file
func Sync() error {
f, err := os.Create(configFile)
if err != nil {
return err
}
defer f.Close()
encoder := toml.NewEncoder(f)
return encoder.Encode(config)
}
// KnownIssueSearch search the known issue pattern in
// the stack and return pattern name if found.
func KnownIssueSearch(srcPattern, srcMatching1, srcMatching2 string) (issueName string, inKnownList uint64) {
for _, p := range config.WarningFilter.PatternList {
if len(p) < 2 {
log.Infof("Invalid configuration, please check the config file!")
return "", 0
}
rePattern := regexp.MustCompile(p[1])
if rePattern.MatchString(srcPattern) {
if srcMatching1 != "" && len(p) >= 3 && p[2] != "" {
re1 := regexp.MustCompile(p[2])
if re1.MatchString(srcMatching1) {
return p[0], 1
}
}
if srcMatching2 != "" && len(p) >= 4 && p[3] != "" {
re2 := regexp.MustCompile(p[3])
if re2.MatchString(srcMatching2) {
return p[0], 1
}
}
return p[0], 0
}
}
return "", 0
}

View File

@ -0,0 +1,235 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package flamegraph
import (
"github.com/grafana/grafana-plugin-sdk-go/data"
)
// Level is a depth array of flame graph data
type Level struct {
Values []int64
}
// Flamebearer is pyroscope flame graph data
type Flamebearer struct {
Names []string
Levels []*Level
Total int64
MaxSelf int64
}
// StartOffest is offset of the bar relative to previous sibling
const StartOffest = 0
// ValueOffest is value or width of the bar
const ValueOffest = 1
// SelfOffest is self value of the bar
const SelfOffest = 2
// NameOffest is index into the names array
const NameOffest = 3
// ItemOffest Next bar. Each bar of the profile is represented by 4 number in a flat array.
const ItemOffest = 4
// ProfileTree grafana tree struct
type ProfileTree struct {
Start int64
Value int64
Self int64
Level int
Name string
Nodes []*ProfileTree
}
// LevelsToTree converts flamebearer format into a tree. This is needed to then convert it into nested set format
func LevelsToTree(levels []*Level, names []string) *ProfileTree {
if len(levels) == 0 {
return nil
}
tree := &ProfileTree{
Start: 0,
Value: levels[0].Values[ValueOffest],
Self: levels[0].Values[SelfOffest],
Level: 0,
Name: names[levels[0].Values[0]],
}
parentsStack := []*ProfileTree{tree}
currentLevel := 1
// Cycle through each level
for {
if currentLevel >= len(levels) {
break
}
// If we still have levels to go, this should not happen. Something is probably wrong with the flamebearer data.
if len(parentsStack) == 0 {
break
}
var nextParentsStack []*ProfileTree
currentParent := parentsStack[:1][0]
parentsStack = parentsStack[1:]
itemIndex := 0
// cumulative offset as items in flamebearer format have just relative to prev item
offset := int64(0)
// Cycle through bar in a level
for {
if itemIndex >= len(levels[currentLevel].Values) {
break
}
itemStart := levels[currentLevel].Values[itemIndex+StartOffest] + offset
itemValue := levels[currentLevel].Values[itemIndex+ValueOffest]
selfValue := levels[currentLevel].Values[itemIndex+SelfOffest]
itemEnd := itemStart + itemValue
parentEnd := currentParent.Start + currentParent.Value
if itemStart >= currentParent.Start && itemEnd <= parentEnd {
// We have an item that is in the bounds of current parent item, so it should be its child
treeItem := &ProfileTree{
Start: itemStart,
Value: itemValue,
Self: selfValue,
Level: currentLevel,
Name: names[levels[currentLevel].Values[itemIndex+NameOffest]],
}
// Add to parent
currentParent.Nodes = append(currentParent.Nodes, treeItem)
// Add this item as parent for the next level
nextParentsStack = append(nextParentsStack, treeItem)
itemIndex += ItemOffest
// Update offset for next item. This is changing relative offset to absolute one.
offset = itemEnd
} else {
// We went out of parents bounds so lets move to next parent. We will evaluate the same item again, but
// we will check if it is a child of the next parent item in line.
if len(parentsStack) == 0 {
break
}
currentParent = parentsStack[:1][0]
parentsStack = parentsStack[1:]
continue
}
}
parentsStack = nextParentsStack
currentLevel++
}
return tree
}
// TreeToNestedSetDataFrame walks the tree depth first and adds items into the dataframe. This is a nested set format
func TreeToNestedSetDataFrame(tree *ProfileTree, unit string) (*data.Frame, *EnumField) {
frame := data.NewFrame("response")
frame.Meta = &data.FrameMeta{PreferredVisualization: "flamegraph"}
levelField := data.NewField("level", nil, []int64{})
valueField := data.NewField("value", nil, []int64{})
selfField := data.NewField("self", nil, []int64{})
// profileTypeID should encode the type of the profile with unit being the 3rd part
valueField.Config = &data.FieldConfig{Unit: unit}
selfField.Config = &data.FieldConfig{Unit: unit}
frame.Fields = data.Fields{levelField, valueField, selfField}
labelField := NewEnumField("label", nil)
// Tree can be nil if profile was empty, we can still send empty frame in that case
if tree != nil {
walkTree(tree, func(tree *ProfileTree) {
levelField.Append(int64(tree.Level))
valueField.Append(tree.Value)
selfField.Append(tree.Self)
labelField.Append(tree.Name)
})
}
frame.Fields = append(frame.Fields, labelField.GetField())
return frame, labelField
}
// EnumField label struct
type EnumField struct {
field *data.Field
valuesMap map[string]data.EnumItemIndex
counter data.EnumItemIndex
}
// NewEnumField add a new label field
func NewEnumField(name string, labels data.Labels) *EnumField {
return &EnumField{
field: data.NewField(name, labels, []data.EnumItemIndex{}),
valuesMap: make(map[string]data.EnumItemIndex),
}
}
// GetValuesMap get label.valuesMap
func (e *EnumField) GetValuesMap() map[string]data.EnumItemIndex {
return e.valuesMap
}
// Append data
func (e *EnumField) Append(value string) {
if valueIndex, ok := e.valuesMap[value]; ok {
e.field.Append(valueIndex)
} else {
e.valuesMap[value] = e.counter
e.field.Append(e.counter)
e.counter++
}
}
// GetField get fields
func (e *EnumField) GetField() *data.Field {
s := make([]string, len(e.valuesMap))
for k, v := range e.valuesMap {
s[v] = k
}
e.field.SetConfig(&data.FieldConfig{
TypeConfig: &data.FieldTypeConfig{
Enum: &data.EnumFieldConfig{
Text: s,
},
},
})
return e.field
}
func walkTree(tree *ProfileTree, fn func(tree *ProfileTree)) {
fn(tree)
stack := tree.Nodes
for {
if len(stack) == 0 {
break
}
fn(stack[0])
if stack[0].Nodes != nil {
stack = append(stack[0].Nodes, stack[1:]...)
} else {
stack = stack[1:]
}
}
}

View File

@ -0,0 +1,23 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package flamegraph
// FrameData Flamegraph json data
type FrameData struct {
Level int64 `json:"level"`
Value int64 `json:"value"`
Self int64 `json:"self"`
Label string `json:"label"`
}

184
internal/log/log.go Normal file
View File

@ -0,0 +1,184 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package log
import (
"fmt"
"io"
"os"
"path/filepath"
"runtime"
"github.com/sirupsen/logrus"
)
var logger *logrus.Logger
const rfc3339NanoFixed = "2006-01-02T15:04:05.000000000Z07:00"
func init() {
logger = logrus.New()
logger.SetFormatter(&logrus.TextFormatter{
DisableColors: true,
ForceQuote: true,
FullTimestamp: true,
TimestampFormat: rfc3339NanoFixed,
DisableSorting: true,
})
logger.SetOutput(os.Stdout)
logger.SetLevel(logrus.InfoLevel)
logger.SetReportCaller(false)
}
func newLogrusEntry(callerSkip int) *logrus.Entry {
var function string
pc, file, line, ok := runtime.Caller(callerSkip)
if !ok {
file = "<???>"
function = "<???>"
line = 1
} else {
file = filepath.Base(file)
function = runtime.FuncForPC(pc).Name()
}
return logger.WithFields(logrus.Fields{
logrus.FieldKeyFunc: function,
logrus.FieldKeyFile: fmt.Sprintf("%s:%d", file, line),
})
}
// SetLevel aims to set the log level
func SetLevel(lvl string) {
level, err := logrus.ParseLevel(lvl)
if err != nil {
Errorf("invalid lvl: %v", err)
return
}
logger.SetLevel(level)
}
// GetLevel returns the standard logger level.
func GetLevel() logrus.Level {
return logger.GetLevel()
}
// SetOutput sets the standard logger output.
func SetOutput(out io.Writer) {
logger.SetOutput(out)
}
// AddHook adds a hook to the standard logger hooks.
func AddHook(hook logrus.Hook) {
logger.AddHook(hook)
}
// WithError creates an entry from the standard logger and adds an error to it, using the value defined in ErrorKey as key.
func WithError(err error) *logrus.Entry {
return newLogrusEntry(2).WithError(err)
}
// Debug logs a message at level Debug on the standard logger.
func Debug(args ...any) {
if logger.IsLevelEnabled(logrus.DebugLevel) {
newLogrusEntry(2).Debug(args...)
}
}
// Info logs a message at level Info on the standard logger.
func Info(args ...any) {
if logger.IsLevelEnabled(logrus.InfoLevel) {
newLogrusEntry(2).Info(args...)
}
}
// Warn logs a message at level Warn on the standard logger.
func Warn(args ...any) {
if logger.IsLevelEnabled(logrus.WarnLevel) {
newLogrusEntry(2).Warn(args...)
}
}
// Error logs a message at level Error on the standard logger.
func Error(args ...any) {
if logger.IsLevelEnabled(logrus.ErrorLevel) {
newLogrusEntry(2).Error(args...)
}
}
// Panic logs a message at level Panic on the standard logger.
func Panic(args ...any) {
if logger.IsLevelEnabled(logrus.PanicLevel) {
newLogrusEntry(2).Panic(args...)
}
}
// Fatal logs a message at level Fatal on the standard logger then the process will exit with status set to 1.
func Fatal(args ...any) {
if logger.IsLevelEnabled(logrus.FatalLevel) {
newLogrusEntry(2).Fatal(args...)
}
}
// Debugf logs a message at level Debug on the standard logger.
func Debugf(format string, args ...any) {
if logger.IsLevelEnabled(logrus.DebugLevel) {
newLogrusEntry(2).Debugf(format, args...)
}
}
// Infof logs a message at level Info on the standard logger.
func Infof(format string, args ...any) {
if logger.IsLevelEnabled(logrus.InfoLevel) {
newLogrusEntry(2).Infof(format, args...)
}
}
// Warnf logs a message at level Warn on the standard logger.
func Warnf(format string, args ...any) {
if logger.IsLevelEnabled(logrus.WarnLevel) {
newLogrusEntry(2).Warnf(format, args...)
}
}
// Errorf logs a message at level Error on the standard logger.
func Errorf(format string, args ...any) {
if logger.IsLevelEnabled(logrus.ErrorLevel) {
newLogrusEntry(2).Errorf(format, args...)
}
}
// Panicf logs a message at level Panic on the standard logger.
func Panicf(format string, args ...any) {
if logger.IsLevelEnabled(logrus.PanicLevel) {
newLogrusEntry(2).Panicf(format, args...)
}
}
// Fatalf logs a message at level Fatal on the standard logger then the process will exit with status set to 1.
func Fatalf(format string, args ...any) {
if logger.IsLevelEnabled(logrus.FatalLevel) {
newLogrusEntry(2).Fatalf(format, args...)
}
}
// WithCallerSkip creates an entry from the caller skip.
func WithCallerSkip(skip int) *logrus.Entry {
return newLogrusEntry(2 + skip)
}

207
internal/pod/container.go Normal file
View File

@ -0,0 +1,207 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package pod
import (
"errors"
"fmt"
"sync"
"syscall"
"time"
"huatuo-bamai/internal/log"
)
var (
// all containers, map: ContainerID -> *Container
containers = map[string]*Container{}
// updated
lastUpdatedAt = time.Now()
updatedStep = 5 * time.Second
updatedLock sync.Mutex
)
// Container object
type Container struct {
ID string `json:"id"`
Name string `json:"name"`
Hostname string `json:"hostname"`
Type ContainerType `json:"type"`
Qos ContainerQos `json:"qos"`
IPAddress string `json:"ip_address"`
NetNamespaceInode uint64 `json:"net_namespace_inode"`
InitPid int `json:"init_pid"` // the pid-1 of container
CgroupSuffix string `json:"cgroup_suffix"`
CSS map[string]uint64 `json:"css"` // map: Name -> Address
StartedAt time.Time `json:"started_at"` // started time
SyncedAt time.Time `json:"synced_at"` // synced time
lifeResouces map[string]any
Labels map[string]any `json:"labels"` // custom labels
}
func (c *Container) String() string {
return fmt.Sprintf("%s:%s/%s/%s:%s/%s", c.ID, c.Hostname, c.Name, c.Type, c.Qos, c.IPAddress)
}
// LifeResouces returns the life resouces of container.
func (c *Container) LifeResouces(key string) any {
return c.lifeResouces[key]
}
// LabelHostNamespace returns namespace label
func (c *Container) LabelHostNamespace() string {
return c.Labels[labelHostNamespace].(string)
}
// getContainers returns the containers by type and level.
func getContainers(typeMask ContainerType, minLevel ContainerQos) (map[string]*Container, error) {
updatedLock.Lock()
defer updatedLock.Unlock()
res := make(map[string]*Container)
if time.Since(lastUpdatedAt) > updatedStep {
if err := kubeletSyncContainers(); err != nil {
if errors.Is(err, syscall.ECONNREFUSED) { // ignore error of no connections
log.Debugf("failed to sync containers by ECONNREFUSED, err: %v", err)
return res, nil
}
return res, err
}
lastUpdatedAt = time.Now()
}
log.Debugf("sync latest containers: %+v", containers)
for _, c := range containers {
// check Type
if c.Type&typeMask == 0 {
continue
}
// check Level
if c.Qos < minLevel {
continue
}
res[c.ID] = c
}
return res, nil
}
// GetContainersByType returns the containers by type.
func GetContainersByType(typeMask ContainerType) (map[string]*Container, error) {
return getContainers(typeMask, ContainerQosLevelMin)
}
// GetNormalContainers returns the normal containers.
func GetNormalContainers() (map[string]*Container, error) {
return GetContainersByType(ContainerTypeNormal)
}
// GetNormalAndSidecarContainers returns the normal and sidecar containers.
func GetNormalAndSidecarContainers() (map[string]*Container, error) {
return GetContainersByType(ContainerTypeNormal | ContainerTypeSidecar)
}
// GetAllContainers returns all containers.
func GetAllContainers() (map[string]*Container, error) {
return getContainers(ContainerTypeAll, ContainerQosLevelMin)
}
// GetContainerByID returns the special container by id.
func GetContainerByID(id string) (*Container, error) {
all, err := GetAllContainers()
if err != nil {
return nil, err
}
if c, ok := all[id]; ok {
return c, nil
}
return nil, nil
}
// GetContainerByIPAddress returns the special container by the container ip address.
func GetContainerByIPAddress(ip string) (*Container, error) {
// only for normal
all, err := GetNormalContainers()
if err != nil {
return nil, err
}
for _, c := range all {
if c.IPAddress == ip {
return c, nil
}
}
return nil, nil
}
// GetContainerByNetNamespaceInode returns the special container by the net namespace inode.
func GetContainerByNetNamespaceInode(inode uint64) (*Container, error) {
// only for normal
all, err := GetNormalContainers()
if err != nil {
return nil, err
}
for _, c := range all {
if c.NetNamespaceInode == inode {
return c, nil
}
}
return nil, nil
}
// GetContainerByCSS returns the special container by the css address.
func GetContainerByCSS(css uint64, subsys string) (*Container, error) {
all, err := GetAllContainers()
if err != nil {
return nil, err
}
for _, c := range all {
if addr, ok := c.CSS[subsys]; ok {
if addr == css {
return c, nil
}
}
}
return nil, nil
}
// GetCSSToContainerID Build mapping from css address to container id
// Usage: return_val = GetCSSToContainerID('cpu')
//
// container_id = return_val[0xffffffffc0601000]
func GetCSSToContainerID(subsys string) (map[uint64]string, error) {
containers, err := GetAllContainers()
if err != nil {
return nil, err
}
cssToContainerMap := make(map[uint64]string)
for _, container := range containers {
if addr, ok := container.CSS[subsys]; ok {
cssToContainerMap[addr] = container.ID
}
}
return cssToContainerMap, nil
}

View File

@ -0,0 +1,313 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build !didi
package pod
import (
"bufio"
"context"
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"regexp"
"strings"
"sync"
"syscall"
"time"
"huatuo-bamai/internal/bpf"
"huatuo-bamai/internal/log"
"huatuo-bamai/internal/utils/cgrouputil"
"huatuo-bamai/pkg/types"
mapset "github.com/deckarep/golang-set"
)
// XXX go:generate go run -mod=mod github.com/cilium/ebpf/cmd/bpf2go -target amd64 cgroupCssGather $BPF_DIR/cgroup_css_gather.c -- $BPF_INCLUDE
// use the huatuo bpf framework:
//
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/cgroup_css_gather.c -o $BPF_DIR/cgroup_css_gather.o
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/cgroup_css_events.c -o $BPF_DIR/cgroup_css_events.o
func parseContainerCSS(containerID string) (map[string]uint64, error) {
msg := make(map[string]uint64)
cssList := cgroupListCssDataByKnode(containerID)
for _, css := range cssList {
msg[css.SubSys] = css.CSS
}
return msg, nil
}
const (
cgroupSubsysCount = 13
kubeletContainerIDKnodeMaxlen = 64
)
var (
// FIXME:
// 1. cgroupv supported only
// 2. cgroup dir name is containerID
kubeletContainerIDRegexp = regexp.MustCompile(`[^a-zA-Z0-9]+`)
cgroupv1SubSysName = []string{"cpu", "cpuacct", "cpuset", "memory", "blkio"}
cgroupv1NotifyCgroupFile = "cgroup.clone_children"
cgroupCssID2SubSysNameMap = map[int]string{}
cgroupCssMetaDataMap sync.Map
// avoid GC
_cgroupCssBpfInternal *bpf.BPF
)
func isValidKnodeName(name string) bool {
return !kubeletContainerIDRegexp.MatchString(name)
}
type containerCssMetaData struct {
CSS uint64
SubSys string
Cgroup uint64
CgroupRoot int32
CgroupLevel int32
ContainerID string
}
type containerCssPerfEvent struct {
Cgroup uint64
OpsType uint64
CgroupRoot int32
CgroupLevel int32
CSS [cgroupSubsysCount]uint64
KnodeName [kubeletContainerIDKnodeMaxlen + 2]byte
}
func cgroupListCssDataByKnode(containerID string) []*containerCssMetaData {
res := []*containerCssMetaData{}
cgroupCssMetaDataMap.Range(func(k, v any) bool {
if m, ok := v.(*containerCssMetaData); ok {
if m.ContainerID == containerID {
res = append(res, m)
}
}
return true
})
return res
}
func cgroupUpdateOrCreateCssData(data *containerCssPerfEvent) error {
knodeName := strings.TrimRight(string(data.KnodeName[:]), "\x00")
if !isValidKnodeName(knodeName) {
return fmt.Errorf("knode name is not containterID")
}
for index, css := range data.CSS {
if css == 0 {
continue
}
if sysName, ok := cgroupCssID2SubSysNameMap[index]; ok {
m := &containerCssMetaData{
CSS: css,
Cgroup: data.Cgroup,
CgroupRoot: data.CgroupRoot,
CgroupLevel: data.CgroupLevel,
ContainerID: knodeName,
SubSys: sysName,
}
log.Debugf("update container css data: %+v", m)
cgroupCssMetaDataMap.Store(css, m)
}
}
return nil
}
func cgroupDeleteCssData(data *containerCssPerfEvent) error {
knodeName := strings.TrimRight(string(data.KnodeName[:]), "\x00")
if !isValidKnodeName(knodeName) {
return fmt.Errorf("knode name is not containterID")
}
for index, css := range data.CSS {
if css == 0 {
continue
}
if _, ok := cgroupCssID2SubSysNameMap[index]; ok {
m, loaded := cgroupCssMetaDataMap.LoadAndDelete(css)
if loaded {
log.Debugf("delete container css data: %+v", m)
}
}
}
return nil
}
func cgroupCssEventSync(ctx context.Context, reader bpf.PerfEventReader) {
go func() {
for {
select {
case <-ctx.Done():
return
default:
var data containerCssPerfEvent
if err := reader.ReadInto(&data); err != nil {
if !errors.Is(err, types.ErrExitByCancelCtx) {
log.Errorf("cgroup css sync read events: %v", err)
}
return
}
log.Debugf("sync container css data: %+v", data)
switch data.OpsType {
case 0: // mkdir cgroup
_ = cgroupUpdateOrCreateCssData(&data)
case 1: // rmdir cgroup
_ = cgroupDeleteCssData(&data)
default:
log.Errorf("css event opstype not supported: %+v", data)
}
}
}
}()
}
func cgroupCssNotify() {
rootSet := mapset.NewSet()
for _, subsys := range cgroupv1SubSysName {
root := cgrouputil.CgroupRootFsFilePath(subsys)
realRoot, err := filepath.EvalSymlinks(root)
if err != nil {
continue
}
if rootSet.Contains(realRoot) {
continue
}
rootSet.Add(realRoot)
if err := filepath.WalkDir(realRoot, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if !d.IsDir() || len(d.Name()) != kubeletContainerIDKnodeMaxlen {
return nil
}
notifyPath := filepath.Join(path, cgroupv1NotifyCgroupFile)
_, _ = os.ReadFile(notifyPath)
log.Debugf("read cgroup path: %s", notifyPath)
return filepath.SkipDir
}); err != nil {
var e *os.PathError
if errors.As(err, &e) && errors.Is(e.Err, syscall.ENOENT) {
continue
}
return
}
}
}
func cgroupInitSubSysIDs() error {
file, err := os.Open("/proc/cgroups")
if err != nil {
return err
}
defer file.Close()
scanner := bufio.NewScanner(file)
scanner.Split(bufio.ScanLines)
// skip frst head
scanner.Scan()
ssid := 0
for scanner.Scan() {
arr := strings.SplitN(scanner.Text(), "\t", 2)
cgroupCssID2SubSysNameMap[ssid] = arr[0]
ssid++
}
return nil
}
func cgroupInitEventCssWithoutCleanup() error {
cssBpf, err := bpf.LoadBpf("cgroup_css_events.o", nil)
if err != nil {
return fmt.Errorf("LoadBpf: %w", err)
}
_cgroupCssBpfInternal = &cssBpf
childCtx := context.Background()
reader, err := cssBpf.AttachAndEventPipe(childCtx, "cgroup_perf_events", 8192)
if err != nil {
log.Infof("AttachAndEventPipe: %v", err)
return err
}
cgroupCssEventSync(childCtx, reader)
return nil
}
func cgroupInitGatherCss() error {
cssBpf, err := bpf.LoadBpf("cgroup_css_gather.o", nil)
if err != nil {
return fmt.Errorf("LoadBpf: %w", err)
}
defer cssBpf.Close()
childCtx, cancel := context.WithCancel(context.Background())
defer cancel()
reader, err := cssBpf.AttachAndEventPipe(childCtx, "cgroup_perf_events", 8192)
if err != nil {
log.Infof("AttachAndEventPipe: %v", err)
return err
}
defer reader.Close()
cgroupCssEventSync(childCtx, reader)
time.Sleep(100 * time.Millisecond)
cgroupCssNotify()
// wait sync
time.Sleep(1 * time.Second)
return nil
}
func ContainerCgroupCssInit() error {
if err := cgroupInitSubSysIDs(); err != nil {
panic("only support cgroupv1 now")
}
if err := cgroupInitGatherCss(); err != nil {
return err
}
if err := cgroupInitEventCssWithoutCleanup(); err != nil {
return err
}
return nil
}

Some files were not shown because too many files have changed in this diff Show More