Compare commits
1 Commits
Author | SHA1 | Date |
---|---|---|
|
334c032be0 |
|
@ -0,0 +1,5 @@
|
|||
*.o
|
||||
|
||||
bpf/include/vmlinux.h
|
||||
_output/
|
||||
cmd/**/bin/
|
|
@ -0,0 +1,128 @@
|
|||
---
|
||||
linters:
|
||||
disable-all: true
|
||||
enable:
|
||||
- goimports
|
||||
- gosimple
|
||||
- ineffassign # Detects when assignments to existing variables are not used
|
||||
- unconvert # Remove unnecessary type conversions
|
||||
- exportloopref # Checks for pointers to enclosing loop variables
|
||||
- tenv # Detects using os.Setenv instead of t.Setenv since Go 1.17
|
||||
- dupword # Checks for duplicate words in the source code
|
||||
- gofmt # Gofmt checks whether code was gofmt-ed
|
||||
- bodyclose # checks whether HTTP response body is closed successfully
|
||||
- misspell
|
||||
- staticcheck
|
||||
- typecheck
|
||||
- unused
|
||||
- loggercheck
|
||||
- nakedret
|
||||
- gofumpt
|
||||
- musttag
|
||||
- whitespace
|
||||
- dupword
|
||||
- gocritic
|
||||
- usestdlibvars
|
||||
- gosec
|
||||
- govet
|
||||
- nolintlint
|
||||
- unused
|
||||
- errcheck
|
||||
- errname
|
||||
- errorlint
|
||||
- fatcontext
|
||||
- gocheckcompilerdirectives
|
||||
- inamedparam
|
||||
|
||||
# Could be enabled later:
|
||||
# - gocyclo
|
||||
# - prealloc
|
||||
# - maligned
|
||||
|
||||
linters-settings:
|
||||
unused:
|
||||
# Mark all struct fields that have been written to as used.
|
||||
# Default: true
|
||||
field-writes-are-uses: false
|
||||
# Mark all local variables as used.
|
||||
# default: true
|
||||
local-variables-are-used: false
|
||||
misspell:
|
||||
# Correct spellings using locale preferences for US or UK.
|
||||
# Setting locale to US will correct the British spelling of 'colour' to 'color'.
|
||||
# Default is to use a neutral variety of English.
|
||||
locale: US
|
||||
gofumpt:
|
||||
# Choose whether to use the extra rules.
|
||||
# Default: false
|
||||
extra-rules: true
|
||||
# Module path which contains the source code being formatted.
|
||||
module-path: huatuo-bamai
|
||||
gocritic:
|
||||
enabled-tags:
|
||||
- diagnostic
|
||||
- style
|
||||
- performance
|
||||
- experimental
|
||||
- opinionated
|
||||
disabled-checks:
|
||||
- commentedOutCode
|
||||
- deferInLoop
|
||||
- evalOrder
|
||||
- exitAfterDefer
|
||||
- exposedSyncMutex
|
||||
- ifElseChain
|
||||
- importShadow
|
||||
- sloppyReassign
|
||||
- unnamedResult
|
||||
- whyNoLint
|
||||
- filepathJoin
|
||||
nolintlint:
|
||||
allow-unused: true
|
||||
gosec:
|
||||
# https://github.com/securego/gosec#available-rules
|
||||
#
|
||||
# The following issues surfaced when `gosec` linter
|
||||
# was enabled.
|
||||
# Disable G115:
|
||||
# "G115: integer overflow conversion int8 -> uint64 (gosec)"
|
||||
excludes:
|
||||
- G107
|
||||
- G115
|
||||
- G204
|
||||
- G401
|
||||
- G501
|
||||
exclude-dirs:
|
||||
- pkg/tracing
|
||||
- vendor
|
||||
issues:
|
||||
# List of regexps of issue texts to exclude.
|
||||
#
|
||||
# But independently of this option we use default exclude patterns,
|
||||
# it can be disabled by `exclude-use-default: false`.
|
||||
# To list all excluded by default patterns execute `golangci-lint run --help`
|
||||
#
|
||||
# Default: https://golangci-lint.run/usage/false-positives/#default-exclusions
|
||||
#
|
||||
# _xxx as used var.
|
||||
exclude:
|
||||
- "^(var|field) `_.*` is unused$"
|
||||
exclude-rules:
|
||||
- linters:
|
||||
- revive
|
||||
text: "if-return"
|
||||
- linters:
|
||||
- revive
|
||||
text: "empty-block"
|
||||
- linters:
|
||||
- revive
|
||||
text: "superfluous-else"
|
||||
- linters:
|
||||
- revive
|
||||
text: "unused-parameter"
|
||||
- linters:
|
||||
- revive
|
||||
text: "unreachable-code"
|
||||
- linters:
|
||||
- revive
|
||||
text: "redefines-builtin-id"
|
|
@ -0,0 +1,202 @@
|
|||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "{}"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright {yyyy} Authors of Cilium
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
|
@ -0,0 +1,78 @@
|
|||
GO ?= go
|
||||
|
||||
# the root directory
|
||||
ROOT_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
|
||||
|
||||
# bpf source code files
|
||||
BPF_DIR := $(ROOT_DIR)/bpf
|
||||
|
||||
# used for go generate to compile eBPF
|
||||
BPF_COMPILE := $(ROOT_DIR)/build/clang.sh
|
||||
BPF_INCLUDE := "-I$(BPF_DIR)/include"
|
||||
|
||||
APP_COMMIT ?= $(shell git describe --dirty --long --always)
|
||||
APP_BUILD_TIME=$(shell date "+%Y%m%d%H%M%S")
|
||||
APP_VERSION="1.0"
|
||||
|
||||
GO_BUILD_STATIC := CGO_ENABLED=1 $(GO) build -tags "netgo osusergo $(GO_TAGS)" -gcflags=all="-N -l" \
|
||||
-ldflags "-extldflags -static
|
||||
GO_BUILD_STATIC_WITH_VERSION := $(GO_BUILD_STATIC) \
|
||||
-X main.AppVersion=$(APP_VERSION) \
|
||||
-X main.AppGitCommit=$(APP_COMMIT) \
|
||||
-X main.AppBuildTime=$(APP_BUILD_TIME)"
|
||||
|
||||
# export
|
||||
export GO_BUILD_STATIC
|
||||
|
||||
all: gen-deps gen build tracer
|
||||
|
||||
gen-deps:
|
||||
# maybe need to install libbpf-devel
|
||||
|
||||
gen:
|
||||
@BPF_DIR=$(BPF_DIR) \
|
||||
BPF_COMPILE=$(BPF_COMPILE) \
|
||||
BPF_INCLUDE=$(BPF_INCLUDE) \
|
||||
$(GO) generate -x ./...
|
||||
|
||||
build:
|
||||
$(GO_BUILD_STATIC_WITH_VERSION) -o _output/bin/huatuo-bamai ./cmd/huatuo-bamai
|
||||
|
||||
TRACER_DIR := cmd
|
||||
BIN_DIR := bin
|
||||
|
||||
SUBDIRS := $(shell find $(TRACER_DIR) -mindepth 1 -maxdepth 1 -type d -not -path "$(BIN_DIR)" | grep -v 'depend\|huatuo-bamai')
|
||||
TARGETS := $(patsubst %,$(BIN_DIR)/%,$(notdir $(SUBDIRS)))
|
||||
COMBINED := $(foreach dir,$(SUBDIRS),$(dir)/$(BIN_DIR)/*.bin)
|
||||
|
||||
tracer: $(TARGETS)
|
||||
$(BIN_DIR)/%: $(TRACER_DIR)/%
|
||||
cd $< && make
|
||||
|
||||
check: imports fmt golangci-lint
|
||||
|
||||
imports:
|
||||
@echo "imports"
|
||||
@goimports -w -local huatuo-bamai $(shell find . -type f -name '*.go' -not -path "./vendor/*")
|
||||
|
||||
fmt: fmt-rewrite-rules
|
||||
@echo "gofumpt"
|
||||
gofumpt -l -w $(shell find . -type f -name '*.go' -not -path "./vendor/*")
|
||||
|
||||
fmt-rewrite-rules:
|
||||
@echo "fmt-rewrite-rules"
|
||||
gofmt -w -r 'interface{} -> any' $(shell find . -type f -name '*.go' -not -path "./vendor/*")
|
||||
|
||||
golangci-lint:
|
||||
@echo "golangci-lint"
|
||||
golangci-lint run --build-tags=$(GO_TAGS) -v ./... --timeout=5m --config .golangci.yaml
|
||||
|
||||
vendor:
|
||||
$(GO) mod tidy
|
||||
$(GO) mod verify
|
||||
$(GO) mod vendor
|
||||
|
||||
clean:
|
||||
rm -rf _output $(shell find . -type f -name "*.o") $(COMBINED)
|
||||
|
||||
.PHONY: all gen-deps gen build tracer check imports golint fmt golangci-lint vendor clean
|
91
README.md
|
@ -1,2 +1,91 @@
|
|||
# huatuo-bamai
|
||||
简体中文 | [English](./README_EN.md)
|
||||
|
||||
# 什么是 HUATUO
|
||||
**HUATUO(华佗)**是由**滴滴**开源并依托 **CCF 开源发展委员会**孵化的云原生操作系统可观测性项目,专注于为复杂云原生环境提供操作系统内核级深度观测能力。该项目基于 [eBPF](https://docs.kernel.org/userspace-api/ebpf/syscall.html) 技术,通过整合 [kprobe](https://www.kernel.org/doc/html/latest/trace/kprobes.html)、 [tracepoint](https://www.kernel.org/doc/html/latest/trace/tracepoints.html)、 [ftrace](https://www.kernel.org/doc/html/latest/trace/ftrace.html) 等内核动态追踪技术,实现了多维度的内核观测能力:**1.** 更精细化的内核子系统埋点指标 Metric **2.** 异常事件驱动的内核运行时上下文捕获 Events **3.** 针对系统突发毛刺的自动追踪 AutoTracing、AutoProfiling。该项目逐步构建了完整的 Linux 内核深度可观测体系架构。目前,HUATUO 已在滴滴生产环境中实现规模化部署,在诸多故障场景中发挥关键作用,有效保障了云原生操作系统的高可用性和性能优化。通过持续的技术演进,希望 HUATUO 能够推动 eBPF 技术在云原生可观测领域向更细粒度、更低开销、更高时效性的方向发展。
|
||||
|
||||
|
||||
# 核心特性
|
||||
- **低损耗内核全景观测**:基于 BPF 技术,保持性能损耗小于1%的基准水位,实现对内存管理、CPU 调度、网络及块 IO 子系统等核心模块的精细化、全维度、全景观测与性能剖析。通过自适应采样机制,实现系统资源损耗与观测精度的动态平衡。
|
||||
- **异常事件驱动诊断**:构建基于异常事件驱动的运行时上下文捕获机制,聚焦内核异常与慢速路径的精准埋点。当发生缺页异常、调度延迟、锁竞争等关键事件时,自动触发调用链追踪,生成包含寄存器状态、堆栈轨迹及资源占用的图谱诊断信息。
|
||||
- **全自动化追踪 AutoTracing**:AutoTracing 模块采用启发式追踪算法,解决云原生复杂场景下的典型性能毛刺故障。针对 CPU idle 掉底,CPU sys 突增,IO 突增,loadavg 突增等棘手问题,实现自动化快照留存机制和根因分析。
|
||||
- **持续性能剖析 Profiling**:持续对操作系统内核,应用程序进行全方位性能剖析,涉及系统 CPU、内存、I/O、 锁、以及各种解释性编程语言,力助业务持续的优化迭代更新。该功能在哨兵压测,防火演练,重要节假日护堤等场景发挥关键作用。
|
||||
- **分布式链路追踪 Tracing**:以网络为中心的面向服务请求的分布式链路追踪,能够清晰的划分系统调用层级关系,节点关联关系,耗时记账等,支持在大规模分布式系统中的跨节点追踪,提供微服务调用的全景视图,保障系统在复杂场景下的稳定性。
|
||||
- **开源技术生态融合**:无缝对接主流开源可观测技术栈,如 Prometheus、Grafana、Pyroscope、Elasticsearch等。支持独立物理机和云原生部署,自动感知 K8S 容器资源/标签/注解,自动关联操作系统内核事件指标,消除数据孤岛。通过零侵扰、内核可编程方式兼容主流硬件平台和内核版本,确保其适应性、应用性。
|
||||
|
||||
# 快速上手
|
||||
为用户开发者快速体验 HUATUO, 我们提供容器编译镜像的便捷方式,一键运行 docker compose 即可启动。该命令会启动 elasticsearch, prometheus, grafana 以及编译的 huatuo-bamai 组件。上述命令执行成功后,打开浏览器访问 http://localhost:3000 即可浏览监控大盘。
|
||||
|
||||
```bash
|
||||
$ docker compose --project-directory ./build/docker up
|
||||
```
|
||||
|
||||
# 软件架构
|
||||

|
||||
|
||||
|
||||
# 功能列表
|
||||
## Autotracing
|
||||
| 追踪名称 | 核心功能 | 场景 |
|
||||
| ---------------| --------------------- |-------------------------------------- |
|
||||
| cpu sys | 宿主 sys 增高检测 | 由于系统负载异常导致业务毛刺问题 |
|
||||
| cpu idle | 容器 cpu idle 掉底检测,提供调用栈,火焰图,进程上下文信息等 | 容器 cpu 使用异常,帮助业务判断进程热点是否异常 |
|
||||
| dload | 跟踪 D 状态进程,提供容器运行情况、D 状态进程调用栈信息等 | 由于系统 D 或 R 状态进程数量突增导致负载升高的问题。系统 D 状态突增通常和资源不可用或者锁被长期持有相关,R 状态进程数量突增往往是业务代码设计不合理导致 |
|
||||
| waitrate | 容器 cpu 外部争抢检测,提供发生争抢时的容器信息等 | 容器 cpu 争抢可能会引起业务毛刺,已存在争抢指标缺乏具体争抢容器信息,通过 waitrate 追踪可以获取参与争抢的容器信息,给混部资源隔离提供参考 |
|
||||
| memburst | 记录突发内存分配上下文 | 宿主机短时间内大量分配内存时,检测宿主机上短时间内大量分配内存的事件,突发性内存分配可能引发直接回收或者 oom 等 |
|
||||
| iotracer | 检测宿主磁盘满、IO 延迟异常时,输出异常时 IO 访问的文件名和路径、磁盘设备、inode 号、容器等上下文信息 | 频繁出现磁盘 IO 带宽打满、磁盘访问突增,进而导致应用请求延迟或者系统性能抖动 |
|
||||
|
||||
## Events
|
||||
| 事件名称 | 核心功能 | 场景 |
|
||||
| ---------------| --------------------- |----------------------------------------|
|
||||
| softirq | 宿主软中断延迟响应或长期关闭,输出长时间关闭软中断的调用栈,进程信息等 | 该类问题会严重影响网络收发,进而导致业务毛刺或者超时等其他问题 |
|
||||
| dropwatch | TCP 数据包丢包检测,输出发生丢包时主机、网络上下文信息等 | 该类问题主要会引起业务毛刺和延迟 |
|
||||
| netrecvlat | 在网络收方向获取数据包从驱动、协议栈、到用户主动收过程的延迟事件 | 网络延迟问题中有一类是数据传输阶段收方向存在延迟,但不清楚是延迟位置,netrecvlat case 根据 skb 入网卡时间戳依次在驱动、协议栈和用户 copy 数据的路径计算延迟,通过预先设定的阈值过滤超时的数据包,已定位延迟位置 |
|
||||
| oom | 检测宿主或容器内 oom 事件 | 当宿主机层面或者容器维度发生 oom 事件时,能够获取触发 oom 的进程信息、被 kill 的进程信息以及容器信息,便于定位进程内存泄漏、异常退出等问题 |
|
||||
| softlockup | 当系统上发生 softlockup 时,收集目标进程信息以及 cpu 信息,同时获取各个 cpu 上的内核栈信息 | 系统发生 softlockup |
|
||||
| hungtask | 提供系统内所有 D 状态进程数量、内核栈信息 | 用于定位瞬时出现 D 进程的场景,能及时保留现场便于后期问题跟踪 |
|
||||
| memreclaim | 进程进入直接回收的耗时,超过时间阈值,记录进程信息 | 内存压力过大时,如果此时进程申请内存,有可能进入直接回收,此时处于同步回收阶段,可能会造成业务进程的卡顿,此时记录进程进入直接回收的时间,有助于我们判断此进程被直接回收影响的剧烈程度 |
|
||||
|
||||
## Metrics
|
||||
metrics 采集包括各子系统的众多指标,包括 cpu, memory, io, network 等,metrics 主要来源 procfs, eBPF, 计算聚合等,以下为部分 Metrics 的简介。[详细参考](docs/metrics.md)
|
||||
|
||||
| 子系统 | Metric | 描述 | 维度 |
|
||||
| ----------- | --------------- | ----------------------------------- | ------------------ |
|
||||
| cpu | sys, usr, util | cpu 占用百分比 | 宿主、容器 |
|
||||
| cpu | burst, throttled | cpu burst 时长, throttled/limited 的次数 | 容器 |
|
||||
| cpu | inner, exter_wait_rate | 容器内外部争抢指数 | 容器 |
|
||||
| cpu | nr_running, nr_uninterruptible | 对应状态的任务数 | 容器 |
|
||||
| cpu | load 1, 5, 15 | 宿主的 1、5、15 分钟平滑负载值 | 宿主 |
|
||||
| cpu | softirq_latency | NET_RX/NET_TX 中断延迟在指定区间内的次数 | 宿主 |
|
||||
| cpu | runqlat_nlat | 调度延迟在指定时间段内的出现的次数 | 宿主、容器 |
|
||||
| cpu | reschedipi_oversell_probability | VM 宿主机可能发生 cpu 超卖 | 宿主 |
|
||||
| memory | direct_reclaim | 内存直接回收相关指标 | 容器 |
|
||||
| memory | asyncreclaim | 内存异步回收相关指标 | 容器 |
|
||||
| memory | vmstat, memory_stat | 其他内存状态指标 | 宿主、容器 |
|
||||
| memory | hungtask, oom, softlockup | 事件计数统计 | 宿主、容器 |
|
||||
| IO | d2c | 统计 IO 的延迟,只包括驱动和磁盘硬件处理部分 | 宿主、容器 |
|
||||
| IO | q2c | 统计 IO 的延迟,包括整个 IO 生命周期 | 宿主、容器 |
|
||||
| IO | disk_freeze | 统计磁盘 freeze 的事件次数 | 宿主 |
|
||||
| IO | disk_flush | 统计 RAI 设备的 flush 操作延迟 | 宿主、容器 |
|
||||
| network | arp | ARP 缓存数量 | 系统、宿主、容器 |
|
||||
| network | tcp, udp mem | Socket 使用、socket 内存使用等 | 系统、宿主、容器 |
|
||||
| network | qdisc | 网络出向队列状态统计 | 宿主 |
|
||||
| network | netdev | 设备指标统计 | 宿主、容器 |
|
||||
| network | netstat | 网络指标统计 | 宿主、容器 |
|
||||
| network | sockstat | Socket 指标统计 | 宿主、容器 |
|
||||
|
||||
|
||||
# 前端展示
|
||||
## 机房内核事件总览
|
||||

|
||||

|
||||
## AutoProfiling
|
||||

|
||||
## Dropwatch
|
||||

|
||||
## net_rx_latency
|
||||

|
||||
|
||||
|
||||
# 联系我们
|
||||
@[hao022](https://github.com/hao022)
|
||||
@[nashuiliang](https://github.com/nashuiliang)
|
|
@ -0,0 +1,98 @@
|
|||
[简体中文](./README_CN.md) | English
|
||||
|
||||
# Abstract
|
||||
**HuaTuo (华佗)** aims to provide in-depth observability for the OS Linux kernel in complex **cloud-native** scenarios. The project is based on [eBPF](https://docs.kernel.org/userspace-api/ebpf/syscall.html) technology and has built a set of deep observation service components for the Linux kernel. By leveraging kernel dynamic tracing technologies such as [kprobe](https://www.kernel.org/doc/html/latest/trace/kprobes.html), [tracepoint](https://www.kernel.org/doc/html/latest/trace/tracepoints.html), and [ftrace](https://www.kernel.org/doc/html/latest/trace/ftrace.html), HuaTuo provides more observation perspectives for the Linux kernel, including kernel runtime context capture driven by anomalous events and more granular, accurate kernel per subsystem metrics.
|
||||
|
||||
HuaTuo also integrates core technologies such as automated tracing, profiling, and distributed tracing for system performance spikes. HuaTuo has been successfully applied on a large scale within Didi (DiDi Global Inc.), solidly guaranteeing the stability and performance optimization of cloud-native operating systems and showcasing the distinct advantages of eBPF technology in cloud-native scenarios.
|
||||
|
||||
# Key Features
|
||||
- **Continuous** Kernel Observability: Achieves in-depth, low-overhead (less than 1% performance impact) instrumentation of various kernel subsystems, providing comprehensive metrics on memory, CPU scheduling, network stack, and disk I/O.
|
||||
- Kernel **Anomaly-Driven** Observability: Instruments the kernel's exception paths and slow paths to capture rich runtime context triggered by anomalous events, enabling more insightful observability data.
|
||||
- **Automated** Tracing (AutoTracing): Implements automated tracing capabilities to address system resource spikes and performance jitters (e.g., CPU idle drop, raising CPU sys utilization, I/O bursts, and Loadavg raising).
|
||||
- **Smooth Transition** to Popular Observability Stacks: Provides standard data sources for Prometheus and Pyroscope, integrates with Kubernetes container resources, and automatically correlates Kubernetes labels/annotations with kernel event metrics, eliminating data silos, ensuring seamless integration and analysis across various data sources for comprehensive system monitoring.
|
||||
|
||||
# Getting Started
|
||||
## run
|
||||
HuaTuo provides a convenient way for quick getting started, all in one command as below:
|
||||
```bash
|
||||
$ docker compose --project-directory ./build/docker up
|
||||
```
|
||||
Run it in the project root directory, then open [http://localhost:3000](http://localhost:3000) to view the panels on your browser.
|
||||
|
||||
The upper command starts three dependencies containers: [elasticsearch](https://www.elastic.co), [prometheus](https://prometheus.io), [grafana](https://grafana.com), then compiles and starts huatuo-bamai.
|
||||
- Data related to event-driven operations, such as Autotracing and Events, are stored in elasticsearch
|
||||
- Metrics-related data is actively collected and stored by prometheus
|
||||
- elasticsearch data reporting port: 9200
|
||||
- prometheus data source port: 9090
|
||||
- grafana port: 3000
|
||||
|
||||
## User-Defined Collection
|
||||
The built-in modules cover most monitoring needs. Additionally, HuaTuo supports custom data collection with easy integration. [How to Add Custom Collection](./docs/CUSTOM.md)
|
||||
|
||||
# Architectures
|
||||

|
||||
|
||||
# Observability Overview
|
||||
## Exception Totals
|
||||

|
||||

|
||||
## Profiling
|
||||

|
||||
## SKB dropwatch
|
||||

|
||||
## Net Latency
|
||||

|
||||
|
||||
# Functionality Overview
|
||||
## Autotracing
|
||||
| Tracing Name | Core Functionality | Scenarios |
|
||||
| ------------ | ----------------------- | ------------------------------------ |
|
||||
| cpu sys | Detects rising host cpu.sys utilization | Issues caused by abnormal cpu.sys load leading to jitters |
|
||||
| cpu idle | Detects low CPU idle in containers, provides call stack, flame graphs, process context info, etc. | Abnormal container CPU usage, helps identify process hotspots |
|
||||
| dload | Tracks processes in the D (uninterruptible) state, provides container runtime info, D-state process call stack, etc. | Issues caused by a sudden increase in the number of system D or R (runnable) state processes, leading to higher load. A spike in D-state processes is often related to unavailable resources or long-held locks, while R-state process spikes may indicate unreasonable user logic design |
|
||||
| waitrate | Detects CPU contention in containers, provides information about the contending containers | CPU contention in containers can cause jitters, and the existing contention metrics lack specific container info. Waitrate tracking can provide the info about the containers involved in the contention, which can be used as a reference for resource isolation in hybrid deployment scenarios |
|
||||
| mmburst | Records burst memory allocation context | Detects events where the host allocates a large amount of memory in a short time, which can lead to direct reclaim or OOM |
|
||||
| iotracer | When the host disk is full or I/O latency is abnormal, provides the file name, path, device, inode, and container context info for the abnormal I/O access | Frequent disk I/O bandwidth saturation or sudden I/O spikes can lead to application request latency or system performance jitters |
|
||||
|
||||
## Events
|
||||
| Event Name | Core Functionality | Scenarios |
|
||||
| -------------- | --------------------- | ------------------------------------ |
|
||||
| softirq | When the kernel delayed response in soft interrupts or prolonged shutdown, supports the call stack and process information of the soft interrupts that have been shut down for an extended period of time. | This type of issue can severely impact network receive/transmit, leading to jitters or latency |
|
||||
| dropwatch | Detects TCP packet drops, provides host and network context info when drops occur | This type of issue can cause jitters and latency |
|
||||
| netrecvlat | Captures latency events along the data packet receive path from the driver, TCP/IP stack, to user-level | For network latency issues, there is a class where the receive-side exhibits latency, but the location is unclear. The netrecvlat case calculates latency by timestamping the skb at the interface, driver, TCP/IP stack, and user-level copy, and filters timed-out packets to point the latency location |
|
||||
| oom | Detects OOM events in the host or containers | When OOM events occur at the host or container level, it can obtain information about the triggering process, the killed process, and container details, which is helpful for diagnosing process memory leaks, abnormal exits, etc. |
|
||||
| softlockup | When the system encounters a softlockup, it collects information about the target process, CPU, and kernel stack for per CPU | Used for investigating system softlockup incidents |
|
||||
| hungtask | Provides the number of processes in the D (uninterruptible) state and their kernel stack info | Used to identify and save the context of processes that suddenly enter the D state, for later investigation |
|
||||
| memreclaim | Records the latency when a process enters direct reclaim, if it exceeds a time threshold | When under memory pressure, if a process requests memory, it may enter direct reclaim, a synchronous reclaim phase that can cause process jitters. This records the time a process spends in direct reclaim, helping assess the impact on the affected process |
|
||||
|
||||
## Metrics
|
||||
Metrics collection involves various indicators from per subsystem, including CPU, memory, IO, network, etc. The primary sources of these metrics are procfs, eBPF, and computational aggregation, as follows is a summary. [for details](docs/metrics.md)
|
||||
|
||||
| Subsystem | Metric | Description | Dimension |
|
||||
| ------------| --------------- |------------------------------------ | ----------------------- |
|
||||
| cpu | sys, usr, util | Percentage | host, container |
|
||||
| cpu | burst, throttled | Number of periods burst occurs, times the group has been throttled/limited | container |
|
||||
| cpu | inner, exter_wait_rate | Wait rate caused by processes inside/outside the container | container |
|
||||
| cpu | nr_running, nr_uninterruptible | The number of running/uninterruptible tasks in the container | container |
|
||||
| cpu | load 1, 5, 15 | System load avg over the last x minute | container |
|
||||
| cpu | softirq_latency | The number of NET_RX/NET_TX irq latency happened | host |
|
||||
| cpu | runqlat_nlat | The number of times when schedule latency of processes in host/container is within x~xms | host, container |
|
||||
| cpu | reschedipi_oversell_probability | The possibility of cpu overselling exists on the host where the vm is located | host |
|
||||
| memory | direct_reclaim | Time speed in page allocation in memory cgroup | container |
|
||||
| memory | asyncreclaim | Memory cgroup's direct reclaim time in cgroup async memory reclaim | container |
|
||||
| memory | vmstat, memory_stat | Memory statistics | host, container |
|
||||
| memory | hungtask, oom, softlockup | Count of event happened | host, container |
|
||||
| IO | d2c | Statistics of io latency when accessing the disk, including the time consumed by the driver and hardware components | host, container |
|
||||
| IO | q2c | Statistics of io latency for the entire io lifecycle when accessing the disk | host, container |
|
||||
| IO | disk_freeze | Statistics of disk freeze events | host |
|
||||
| IO | disk_flush | Statistics of delay for flush operations on disk raid device | host, container |
|
||||
| network | arp | ARP entries | system, host, container |
|
||||
| network | tcp, udp mem | Socket memory | system |
|
||||
| network | qdisc | Qdisc statistics | host |
|
||||
| network | netdev | Network device metrics | host, container |
|
||||
| network | netstat | Network statistics | host, container |
|
||||
| network | sockstat | Socket statistics | host, container |
|
||||
|
||||
|
||||
# Contact Us
|
||||
You can report bugs, provide suggestions, or engage in discussions via Github Issues and Github Discussions. Alternatively, you can contact us using the following ways:
|
|
@ -0,0 +1,66 @@
|
|||
#include "vmlinux.h"
|
||||
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
#include "bpf_common.h"
|
||||
|
||||
#define CGROUP_KNODE_NAME_MAXLEN 64
|
||||
|
||||
struct cgroup_perf_event_t {
|
||||
u64 cgroup;
|
||||
u64 ops_type;
|
||||
s32 cgroup_root;
|
||||
s32 cgroup_level;
|
||||
u64 css[CGROUP_SUBSYS_COUNT];
|
||||
char knode_name[CGROUP_KNODE_NAME_MAXLEN + 2];
|
||||
};
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(u32));
|
||||
} cgroup_perf_events SEC(".maps");
|
||||
|
||||
char __license[] SEC("license") = "GPL";
|
||||
|
||||
/* TP_PROTO(struct cgroup *cgrp, const char *path) */
|
||||
static int
|
||||
bpf_cgroup_event_class_prog(struct bpf_raw_tracepoint_args *ctx, u64 type)
|
||||
{
|
||||
struct cgroup *cgrp = (void *)ctx->args[0];
|
||||
struct cgroup_perf_event_t data = {};
|
||||
int knode_len;
|
||||
|
||||
/* knode name */
|
||||
knode_len =
|
||||
bpf_probe_read_str(&data.knode_name, sizeof(data.knode_name),
|
||||
BPF_CORE_READ(cgrp, kn, name));
|
||||
if (knode_len != CGROUP_KNODE_NAME_MAXLEN + 1)
|
||||
return 0;
|
||||
|
||||
data.ops_type = type;
|
||||
data.cgroup = (u64)cgrp;
|
||||
data.cgroup_root = BPF_CORE_READ(cgrp, root, hierarchy_id);
|
||||
data.cgroup_level = BPF_CORE_READ(cgrp, level);
|
||||
|
||||
bpf_probe_read(&data.css, sizeof(u64) * CGROUP_SUBSYS_COUNT,
|
||||
BPF_CORE_READ(cgrp, subsys));
|
||||
|
||||
bpf_perf_event_output(ctx, &cgroup_perf_events, BPF_F_CURRENT_CPU,
|
||||
&data, sizeof(data));
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("raw_tracepoint/cgroup_mkdir")
|
||||
int bpf_cgroup_mkdir_prog(struct bpf_raw_tracepoint_args *ctx)
|
||||
{
|
||||
return bpf_cgroup_event_class_prog(ctx, 0);
|
||||
}
|
||||
|
||||
SEC("raw_tracepoint/cgroup_rmdir")
|
||||
int bpf_cgroup_rmdir_prog(struct bpf_raw_tracepoint_args *ctx)
|
||||
{
|
||||
return bpf_cgroup_event_class_prog(ctx, 1);
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
#include "vmlinux.h"
|
||||
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
#include "bpf_common.h"
|
||||
|
||||
#define CGROUP_KNODE_NAME_MAXLEN 64
|
||||
|
||||
struct cgroup_perf_event_t {
|
||||
u64 cgroup;
|
||||
u64 ops_type;
|
||||
s32 cgroup_root;
|
||||
s32 cgroup_level;
|
||||
u64 css[CGROUP_SUBSYS_COUNT];
|
||||
char knode_name[CGROUP_KNODE_NAME_MAXLEN + 2];
|
||||
};
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(u32));
|
||||
} cgroup_perf_events SEC(".maps");
|
||||
|
||||
char __license[] SEC("license") = "GPL";
|
||||
|
||||
SEC("kprobe/cgroup_clone_children_read")
|
||||
int bpf_cgroup_clone_children_read_prog(struct pt_regs *ctx)
|
||||
{
|
||||
struct cgroup_subsys_state *css = (void *)PT_REGS_PARM1(ctx);
|
||||
struct cgroup *cgrp = BPF_CORE_READ(css, cgroup);
|
||||
struct cgroup_perf_event_t data = {};
|
||||
int knode_len;
|
||||
|
||||
/* knode name */
|
||||
knode_len =
|
||||
bpf_probe_read_str(&data.knode_name, sizeof(data.knode_name),
|
||||
BPF_CORE_READ(cgrp, kn, name));
|
||||
if (knode_len != CGROUP_KNODE_NAME_MAXLEN + 1)
|
||||
return 0;
|
||||
|
||||
data.cgroup = (u64)cgrp;
|
||||
data.ops_type = 0;
|
||||
data.cgroup_root = BPF_CORE_READ(cgrp, root, hierarchy_id);
|
||||
data.cgroup_level = BPF_CORE_READ(cgrp, level);
|
||||
|
||||
/* css */
|
||||
bpf_probe_read(&data.css, sizeof(u64) * CGROUP_SUBSYS_COUNT,
|
||||
BPF_CORE_READ(cgrp, subsys));
|
||||
|
||||
/* output */
|
||||
bpf_perf_event_output(ctx, &cgroup_perf_events, BPF_F_CURRENT_CPU,
|
||||
&data, sizeof(data));
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,287 @@
|
|||
#include "vmlinux.h"
|
||||
#include "vmlinux_net.h"
|
||||
#include "bpf_common.h"
|
||||
#include "bpf_ratelimit.h"
|
||||
|
||||
#define TYPE_TCP_COMMON_DROP 1
|
||||
#define TYPE_TCP_SYN_FLOOD 2
|
||||
#define TYPE_TCP_LISTEN_OVERFLOW_HANDSHAKE1 3
|
||||
#define TYPE_TCP_LISTEN_OVERFLOW_HANDSHAKE3 4
|
||||
|
||||
#define SK_FL_PROTO_SHIFT 8
|
||||
#define SK_FL_PROTO_MASK 0x0000ff00
|
||||
#define SK_FL_TYPE_SHIFT 16
|
||||
#define SK_FL_TYPE_MASK 0xffff0000
|
||||
|
||||
struct perf_event_t {
|
||||
u64 tgid_pid;
|
||||
u32 saddr;
|
||||
u32 daddr;
|
||||
u16 sport;
|
||||
u16 dport;
|
||||
u32 seq;
|
||||
u32 ack_seq;
|
||||
u32 queue_mapping;
|
||||
u64 pkt_len;
|
||||
s64 stack_size;
|
||||
u64 stack[PERF_MAX_STACK_DEPTH];
|
||||
u32 sk_max_ack_backlog;
|
||||
u8 state;
|
||||
u8 type;
|
||||
char comm[TASK_COMM_LEN];
|
||||
};
|
||||
|
||||
/* format: /sys/kernel/debug/tracing/events/skb/kfree_skb/format */
|
||||
struct kfree_skb_args {
|
||||
unsigned long long pad;
|
||||
|
||||
void *skbaddr;
|
||||
void *location;
|
||||
u16 protocol;
|
||||
};
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(u32));
|
||||
} perf_events SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||
__uint(max_entries, 1);
|
||||
__uint(key_size, sizeof(u32));
|
||||
__uint(value_size, sizeof(struct perf_event_t));
|
||||
} dropwatch_stackmap SEC(".maps");
|
||||
|
||||
char __license[] SEC("license") = "Dual MIT/GPL";
|
||||
|
||||
static const struct perf_event_t zero_data = {};
|
||||
static const u32 stackmap_key = 0;
|
||||
|
||||
BPF_RATELIMIT(rate, 1, 100); // 100/s
|
||||
|
||||
struct sock___5_10 {
|
||||
u16 sk_type;
|
||||
u16 sk_protocol;
|
||||
}__attribute__((preserve_access_index));
|
||||
|
||||
static void sk_get_type_and_protocol(struct sock *sk, u16 *protocol, u16 *type)
|
||||
{
|
||||
// kernel version <= 4.18
|
||||
//
|
||||
// struct sock {
|
||||
// unsigned int __sk_flags_offset[0];
|
||||
// #ifdef __BIG_ENDIAN_BITFIELD
|
||||
// #define SK_FL_PROTO_SHIFT 16
|
||||
// #define SK_FL_PROTO_MASK 0x00ff0000
|
||||
// #
|
||||
// #define SK_FL_TYPE_SHIFT 0
|
||||
// #define SK_FL_TYPE_MASK 0x0000ffff
|
||||
// #else
|
||||
// #define SK_FL_PROTO_SHIFT 8
|
||||
// #define SK_FL_PROTO_MASK 0x0000ff00
|
||||
// #
|
||||
// #define SK_FL_TYPE_SHIFT 16
|
||||
// #define SK_FL_TYPE_MASK 0xffff0000
|
||||
// #endif
|
||||
//
|
||||
// unsigned int sk_padding : 1,
|
||||
// sk_kern_sock : 1,
|
||||
// sk_no_check_tx : 1,
|
||||
// sk_no_check_rx : 1,
|
||||
// sk_userlocks : 4,
|
||||
// sk_protocol : 8,
|
||||
// sk_type : 16;
|
||||
// }
|
||||
if (bpf_core_field_exists(sk->__sk_flags_offset)) {
|
||||
u32 sk_flags;
|
||||
|
||||
bpf_probe_read(&sk_flags, sizeof(sk_flags), &sk->__sk_flags_offset);
|
||||
*protocol = sk_flags >> SK_FL_PROTO_SHIFT;
|
||||
*type = sk_flags >> SK_FL_TYPE_SHIFT;
|
||||
return;
|
||||
}
|
||||
|
||||
// kernel version >= 5.10
|
||||
//
|
||||
// struct sock {
|
||||
// u16 sk_type;
|
||||
// u16 sk_protocol;
|
||||
// }
|
||||
struct sock___5_10 *sk_new = (struct sock___5_10 *)sk;
|
||||
|
||||
*protocol = BPF_CORE_READ(sk_new, sk_protocol);
|
||||
*type = BPF_CORE_READ(sk_new, sk_type);
|
||||
return;
|
||||
}
|
||||
|
||||
SEC("tracepoint/skb/kfree_skb")
|
||||
int bpf_kfree_skb_prog(struct kfree_skb_args *ctx)
|
||||
{
|
||||
struct sk_buff *skb = ctx->skbaddr;
|
||||
struct perf_event_t *data = NULL;
|
||||
struct sock_common *sk_common;
|
||||
struct tcphdr tcphdr;
|
||||
struct iphdr iphdr;
|
||||
struct sock *sk;
|
||||
u16 protocol = 0;
|
||||
u16 type = 0;
|
||||
u8 state = 0;
|
||||
|
||||
/* only for IP && TCP */
|
||||
if (ctx->protocol != ETH_P_IP)
|
||||
return 0;
|
||||
|
||||
bpf_probe_read(&iphdr, sizeof(iphdr), skb_network_header(skb));
|
||||
if (iphdr.protocol != IPPROTO_TCP)
|
||||
return 0;
|
||||
|
||||
sk = BPF_CORE_READ(skb, sk);
|
||||
if (!sk)
|
||||
return 0;
|
||||
sk_common = (struct sock_common *)sk;
|
||||
|
||||
// filter the sock by AF_INET, SOCK_STREAM, IPPROTO_TCP
|
||||
if (BPF_CORE_READ(sk_common, skc_family) != AF_INET)
|
||||
return 0;
|
||||
|
||||
sk_get_type_and_protocol(sk, &protocol, &type);
|
||||
if ((u8)protocol != IPPROTO_TCP || type != SOCK_STREAM)
|
||||
return 0;
|
||||
|
||||
// filter not CLOSE
|
||||
state = BPF_CORE_READ(sk_common, skc_state);
|
||||
if (state == TCP_CLOSE || state == 0)
|
||||
return 0;
|
||||
|
||||
// ratelimit
|
||||
if (bpf_ratelimited(&rate))
|
||||
return 0;
|
||||
|
||||
data = bpf_map_lookup_elem(&dropwatch_stackmap, &stackmap_key);
|
||||
if (!data) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
bpf_probe_read(&tcphdr, sizeof(tcphdr), skb_transport_header(skb));
|
||||
|
||||
/* event */
|
||||
data->tgid_pid = bpf_get_current_pid_tgid();
|
||||
bpf_get_current_comm(&data->comm, sizeof(data->comm));
|
||||
data->type = TYPE_TCP_COMMON_DROP;
|
||||
data->state = state;
|
||||
data->saddr = iphdr.saddr;
|
||||
data->daddr = iphdr.daddr;
|
||||
data->sport = tcphdr.source;
|
||||
data->dport = tcphdr.dest;
|
||||
data->seq = tcphdr.seq;
|
||||
data->ack_seq = tcphdr.ack_seq;
|
||||
data->pkt_len = BPF_CORE_READ(skb, len);
|
||||
data->queue_mapping = BPF_CORE_READ(skb, queue_mapping);
|
||||
data->stack_size = bpf_get_stack(ctx, data->stack, sizeof(data->stack), 0);
|
||||
data->sk_max_ack_backlog = 0; // ignore sk_max_ack_backlog in dropwatch case.
|
||||
|
||||
// output
|
||||
bpf_perf_event_output(ctx, &perf_events, BPF_F_CURRENT_CPU, data, sizeof(*data));
|
||||
|
||||
// clean
|
||||
bpf_map_update_elem(&dropwatch_stackmap, &stackmap_key, &zero_data, BPF_EXIST);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// The current kernel does not support kprobe+offset very well, waiting for kpatch to come online.
|
||||
#if 0
|
||||
static int fill_overflow_event(void *ctx, u8 type, struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
|
||||
struct perf_event_t *data = NULL;
|
||||
struct iphdr iphdr;
|
||||
struct tcphdr tcphdr;
|
||||
|
||||
data = bpf_map_lookup_elem(&dropwatch_stackmap, &stackmap_key);
|
||||
if (!data) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
bpf_probe_read(&iphdr, sizeof(iphdr), skb_network_header(skb));
|
||||
bpf_probe_read(&tcphdr, sizeof(tcphdr), skb_transport_header(skb));
|
||||
|
||||
/* event */
|
||||
data->tgid_pid = bpf_get_current_pid_tgid();
|
||||
bpf_get_current_comm(&data->comm, sizeof(data->comm));
|
||||
data->type = type;
|
||||
data->state = 0;
|
||||
data->saddr = iphdr.saddr;
|
||||
data->daddr = iphdr.daddr;
|
||||
data->sport = tcphdr.source;
|
||||
data->dport = tcphdr.dest;
|
||||
data->seq = tcphdr.seq;
|
||||
data->ack_seq = tcphdr.ack_seq;
|
||||
data->pkt_len = BPF_CORE_READ(skb, len);
|
||||
data->queue_mapping = BPF_CORE_READ(skb, queue_mapping);
|
||||
data->stack_size = 0; // ignore stack in not-overflow.
|
||||
data->sk_max_ack_backlog = BPF_CORE_READ(sk, sk_max_ack_backlog);
|
||||
|
||||
// output
|
||||
bpf_perf_event_output(ctx, &perf_events, BPF_F_CURRENT_CPU, data, sizeof(*data));
|
||||
|
||||
// clean
|
||||
bpf_map_update_elem(&dropwatch_stackmap, &stackmap_key, &zero_data, BPF_EXIST);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// the dropwatch case: syn_flood.
|
||||
SEC("kprobe/tcp_conn_request+1290")
|
||||
int bpf_tcp_syn_flood_action_prog(struct pt_regs *ctx)
|
||||
{
|
||||
// the function of `tcp_syn_flood_action` arguments:
|
||||
// %r15: struct sock *sk
|
||||
// %r13: struct sk_buff *skb
|
||||
struct sock *sk = (void *)ctx->r15;
|
||||
struct sk_buff *skb= (void *)ctx->r13;
|
||||
|
||||
// ratelimit
|
||||
if (bpf_ratelimited(ctx, rate))
|
||||
return 0;
|
||||
|
||||
// fill
|
||||
return fill_overflow_event(ctx, TYPE_TCP_SYN_FLOOD, sk, skb);
|
||||
}
|
||||
|
||||
// the dropwatch case: listen-overflow in the TCP_CLOSE state(client: TCP_SYN_SENT).
|
||||
SEC("kprobe/tcp_conn_request+167")
|
||||
int bpf_tcp_listen_overflow_handshake1_prog(struct pt_regs *ctx)
|
||||
{
|
||||
// this position has registers as follows:
|
||||
// %r15: struct sock *sk
|
||||
// %r13: struct sk_buff *skb
|
||||
struct sock *sk = (void *)ctx->r15;
|
||||
struct sk_buff *skb= (void *)ctx->r13;
|
||||
|
||||
// ratelimit
|
||||
if (bpf_ratelimited(ctx, rate))
|
||||
return 0;
|
||||
|
||||
// fill
|
||||
return fill_overflow_event(ctx, TYPE_TCP_LISTEN_OVERFLOW_HANDSHAKE1, sk, skb);
|
||||
}
|
||||
|
||||
// the dropwatch case: listen-overflow in the TCP_NEW_SYN_RECV state(client: TCP_ESTABLISHED).
|
||||
SEC("kprobe/tcp_v4_syn_recv_sock+700")
|
||||
int bpf_tcp_listen_overflow_handshake3_prog(struct pt_regs *ctx)
|
||||
{
|
||||
// this position has registers as follows:
|
||||
// %rdi: struct sock *sk
|
||||
// %rsi: struct sk_buff *skb
|
||||
// %r15: struct request_sock *req
|
||||
struct sock *sk = (void *)ctx->di;
|
||||
struct sk_buff *skb= (void *)ctx->si;
|
||||
|
||||
// ratelimit
|
||||
if (bpf_ratelimited(ctx, rate))
|
||||
return 0;
|
||||
|
||||
// fill
|
||||
return fill_overflow_event(ctx, TYPE_TCP_LISTEN_OVERFLOW_HANDSHAKE3, sk, skb);
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,43 @@
|
|||
#include "vmlinux.h"
|
||||
#include "bpf_common.h"
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include "bpf_ratelimit.h"
|
||||
|
||||
char __license[] SEC("license") = "Dual MIT/GPL";
|
||||
|
||||
#define CPU_NUM 128
|
||||
BPF_RATELIMIT_IN_MAP(rate, 1, CPU_NUM * 10000, 0);
|
||||
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(u32));
|
||||
} hungtask_perf_events SEC(".maps");
|
||||
|
||||
struct hungtask_info {
|
||||
int32_t pid;
|
||||
char comm[TASK_COMM_LEN];
|
||||
};
|
||||
|
||||
struct tracepoint_args {
|
||||
unsigned long pad;
|
||||
char comm[TASK_COMM_LEN];
|
||||
int pid;
|
||||
};
|
||||
|
||||
SEC("tracepoint/sched/sched_process_hang")
|
||||
int tracepoint_sched_process_hang(struct tracepoint_args *ctx)
|
||||
{
|
||||
struct hungtask_info info = {};
|
||||
|
||||
if (bpf_ratelimited_in_map(ctx, rate))
|
||||
return 0;
|
||||
info.pid = ctx->pid;
|
||||
// custom defined struct can't use BPF_CORE_READ_STR_INTO()
|
||||
bpf_probe_read_str(&info.comm, TASK_COMM_LEN, ctx->comm);
|
||||
bpf_perf_event_output(ctx, &hungtask_perf_events, BPF_F_CURRENT_CPU, &info, sizeof(info));
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
#ifndef __BPF_COMMON_H__
|
||||
#define __BPF_COMMON_H__
|
||||
|
||||
#ifndef NULL
|
||||
#define NULL ((void *)0)
|
||||
#endif
|
||||
|
||||
/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
|
||||
* BPF_FUNC_perf_event_read_value flags.
|
||||
*/
|
||||
enum {
|
||||
BPF_F_INDEX_MASK = 0xffffffffULL,
|
||||
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
|
||||
/* BPF_FUNC_perf_event_output for sk_buff input context. */
|
||||
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
|
||||
};
|
||||
|
||||
/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
|
||||
enum {
|
||||
BPF_F_SKIP_FIELD_MASK = 0xffULL,
|
||||
BPF_F_USER_STACK = (1ULL << 8),
|
||||
/* flags used by BPF_FUNC_get_stackid only. */
|
||||
BPF_F_FAST_STACK_CMP = (1ULL << 9),
|
||||
BPF_F_REUSE_STACKID = (1ULL << 10),
|
||||
/* flags used by BPF_FUNC_get_stack only. */
|
||||
BPF_F_USER_BUILD_ID = (1ULL << 11),
|
||||
};
|
||||
|
||||
#define TASK_COMM_LEN 16
|
||||
#define PATH_MAX 4096 /* # chars in a path name including nul */
|
||||
|
||||
/* include/uapi/linux/perf_event.h */
|
||||
#define PERF_MAX_STACK_DEPTH 127
|
||||
#define PERF_MIN_STACK_DEPTH 16
|
||||
|
||||
/* flags for BPF_MAP_UPDATE_ELEM command */
|
||||
#define BPF_ANY 0 /* create new element or update existing */
|
||||
#define BPF_NOEXIST 1 /* create new element if it didn't exist */
|
||||
#define BPF_EXIST 2 /* update existing element */
|
||||
#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */
|
||||
|
||||
#endif /* __BPF_COMMON_H__ */
|
|
@ -0,0 +1,48 @@
|
|||
#ifndef __BPF_FUNC_TRACE_H__
|
||||
#define __BPF_FUNC_TRACE_H__
|
||||
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
struct trace_entry_ctx {
|
||||
u64 id;
|
||||
u64 start_ns;
|
||||
u64 delta_ns;
|
||||
};
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__type(key, u64);
|
||||
__type(value, struct trace_entry_ctx);
|
||||
__uint(max_entries, 10240);
|
||||
} func_trace_map SEC(".maps");
|
||||
|
||||
static __always_inline void func_trace_begain(u64 id)
|
||||
{
|
||||
struct trace_entry_ctx entry = {
|
||||
.start_ns = bpf_ktime_get_ns(),
|
||||
.id = id,
|
||||
};
|
||||
|
||||
bpf_map_update_elem(&func_trace_map, &id, &entry, BPF_ANY);
|
||||
}
|
||||
|
||||
static __always_inline struct trace_entry_ctx *func_trace_end(u64 id)
|
||||
{
|
||||
struct trace_entry_ctx *entry;
|
||||
|
||||
entry = bpf_map_lookup_elem(&func_trace_map, &id);
|
||||
if (!entry) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// update any elem you need!
|
||||
entry->delta_ns = bpf_ktime_get_ns() - entry->start_ns;
|
||||
return entry;
|
||||
}
|
||||
|
||||
static __always_inline void func_trace_destroy(u64 id)
|
||||
{
|
||||
bpf_map_delete_elem(&func_trace_map, &id);
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,113 @@
|
|||
#ifndef __BPF_RATELIMIT_H__
|
||||
#define __BPF_RATELIMIT_H__
|
||||
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
struct bpf_ratelimit {
|
||||
uint64_t interval; // unit: second
|
||||
uint64_t begin;
|
||||
uint64_t burst; // max events/interval
|
||||
uint64_t max_burst; // max burst
|
||||
uint64_t events; // current events/interval
|
||||
uint64_t nmissed; // missed events/interval
|
||||
|
||||
uint64_t total_events; // total events
|
||||
uint64_t total_nmissed; // total missed events
|
||||
uint64_t total_interval; // total interval
|
||||
};
|
||||
|
||||
#define BPF_RATELIMIT(name, interval, burst) \
|
||||
struct bpf_ratelimit name = {interval, 0, burst, 0, 0, 0, 0, 0, 0}
|
||||
|
||||
// bpf_ratelimited: whether the threshold is exceeded
|
||||
//
|
||||
// @rate: struct bpf_ratelimit *
|
||||
// @return:
|
||||
// true: the threshold is exceeded
|
||||
// false: the threshold is not exceeded
|
||||
static __always_inline bool bpf_ratelimited(struct bpf_ratelimit *rate)
|
||||
{
|
||||
// validate
|
||||
if (rate == NULL || rate->interval == 0)
|
||||
return false;
|
||||
|
||||
u64 curr = bpf_ktime_get_ns() / 1000000000;
|
||||
|
||||
if (rate->begin == 0)
|
||||
rate->begin = curr;
|
||||
|
||||
if (curr > rate->begin + rate->interval) {
|
||||
__sync_fetch_and_add(&rate->total_interval, curr - rate->begin);
|
||||
rate->begin = curr;
|
||||
rate->events = rate->nmissed = 0;
|
||||
}
|
||||
|
||||
if (rate->burst && rate->burst > rate->events) {
|
||||
__sync_fetch_and_add(&rate->events, 1);
|
||||
__sync_fetch_and_add(&rate->total_events, 1);
|
||||
return false;
|
||||
}
|
||||
|
||||
__sync_fetch_and_add(&rate->nmissed, 1);
|
||||
__sync_fetch_and_add(&rate->total_nmissed, 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
#define BPF_RATELIMIT_IN_MAP(name, interval, burst, max_burst) \
|
||||
struct { \
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY); \
|
||||
__uint(key_size, sizeof(u32)); \
|
||||
__uint(value_size, sizeof(struct bpf_ratelimit)); \
|
||||
__uint(max_entries, 1); \
|
||||
} bpf_rlimit_##name SEC(".maps"); \
|
||||
struct { \
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); \
|
||||
__uint(key_size, sizeof(int)); \
|
||||
__uint(value_size, sizeof(u32)); \
|
||||
} event_bpf_rlimit_##name SEC(".maps"); \
|
||||
volatile const struct bpf_ratelimit ___bpf_rlimit_cfg_##name = { \
|
||||
interval, 0, burst, max_burst, 0, 0, 0, 0, 0}
|
||||
|
||||
// bpf_ratelimited_in_map: whether the threshold is exceeded
|
||||
//
|
||||
// @rate: struct bpf_ratelimit *
|
||||
// @return:
|
||||
// true: the threshold is exceeded
|
||||
// false: the threshold is not exceeded
|
||||
#define bpf_ratelimited_in_map(ctx, rate) \
|
||||
bpf_ratelimited_core_in_map(ctx, &bpf_rlimit_##rate, \
|
||||
&event_bpf_rlimit_##rate, \
|
||||
&___bpf_rlimit_cfg_##rate)
|
||||
|
||||
static __always_inline bool
|
||||
bpf_ratelimited_core_in_map(void *ctx, void *map, void *perf_map,
|
||||
const volatile struct bpf_ratelimit *cfg)
|
||||
{
|
||||
u32 key = 0;
|
||||
struct bpf_ratelimit *rate = NULL;
|
||||
|
||||
rate = bpf_map_lookup_elem(map, &key);
|
||||
if (rate == NULL)
|
||||
return false;
|
||||
|
||||
// init from cfg
|
||||
if (rate->interval == 0) {
|
||||
rate->interval = cfg->interval;
|
||||
rate->burst = cfg->burst;
|
||||
rate->max_burst = cfg->max_burst;
|
||||
}
|
||||
|
||||
// the threshold is not exceeded, return false
|
||||
u64 old_nmissed = rate->nmissed;
|
||||
if (!bpf_ratelimited(rate))
|
||||
return false;
|
||||
|
||||
// the threshold/max_burst is exceeded, notify once in a cycle
|
||||
if (old_nmissed == 0 || (rate->max_burst > 0 &&
|
||||
rate->nmissed > rate->max_burst - rate->burst))
|
||||
bpf_perf_event_output(ctx, perf_map, BPF_F_CURRENT_CPU, rate,
|
||||
sizeof(struct bpf_ratelimit));
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,26 @@
|
|||
#ifndef __VMLINUX_NET_H__
|
||||
#define __VMLINUX_NET_H__
|
||||
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_core_read.h>
|
||||
|
||||
#define IFNAMSIZ 16
|
||||
|
||||
#define ETH_P_IP 0x0800 /* Internet Protocol packet */
|
||||
#define AF_INET 2 /* Internet IP Protocol */
|
||||
|
||||
#define IP_MF 0x2000 /* Flag: "More Fragments" */
|
||||
#define IP_OFFSET 0x1FFF /* "Fragment Offset" part */
|
||||
|
||||
// skb_network_header - get the network header from sk_buff
|
||||
static inline unsigned char *skb_network_header(struct sk_buff *skb)
|
||||
{
|
||||
return BPF_CORE_READ(skb, head) + BPF_CORE_READ(skb, network_header);
|
||||
}
|
||||
|
||||
// skb_transport_header - get the transport header from sk_buff
|
||||
static inline unsigned char *skb_transport_header(struct sk_buff *skb)
|
||||
{
|
||||
return BPF_CORE_READ(skb, head) + BPF_CORE_READ(skb, transport_header);
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,7 @@
|
|||
#ifndef __VMLINUX_SCHED_H__
|
||||
#define __VMLINUX_SCHED_H__
|
||||
|
||||
/* copy from include/linux/sched.h */
|
||||
#define PF_KSWAPD 0x00020000 /* I am kswapd */
|
||||
|
||||
#endif
|
|
@ -0,0 +1,25 @@
|
|||
#include "vmlinux.h"
|
||||
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
#include "bpf_common.h"
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(u32));
|
||||
} ad_event_map SEC(".maps");
|
||||
|
||||
SEC("kprobe/ad_disable_collecting_distributing")
|
||||
int ad_disable(struct pt_regs *ctx)
|
||||
{
|
||||
// nothing to do here, only notify user space, because this is a
|
||||
// ko module and CO-RE relocation is not supported directly at old
|
||||
// kernel
|
||||
u64 nothing = 0;
|
||||
bpf_perf_event_output(ctx, &ad_event_map, BPF_F_CURRENT_CPU, ¬hing,
|
||||
sizeof(nothing));
|
||||
return 0;
|
||||
}
|
||||
|
||||
char __license[] SEC("license") = "Dual MIT/GPL";
|
|
@ -0,0 +1,55 @@
|
|||
#include "bpf_common.h"
|
||||
#include "vmlinux.h"
|
||||
#include "vmlinux_sched.h"
|
||||
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
char __license[] SEC("license") = "Dual MIT/GPL";
|
||||
|
||||
struct mem_cgroup_metric {
|
||||
/* cg: direct reclaim count caused by try_charge */
|
||||
unsigned long directstall_count;
|
||||
};
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__type(key, unsigned long);
|
||||
__type(value, struct mem_cgroup_metric);
|
||||
__uint(max_entries, 10240);
|
||||
} mem_cgroup_map SEC(".maps");
|
||||
|
||||
SEC("tracepoint/vmscan/mm_vmscan_memcg_reclaim_begin")
|
||||
int tracepoint_vmscan_mm_vmscan_memcg_reclaim_begin(struct pt_regs *ctx)
|
||||
{
|
||||
struct cgroup_subsys_state *mm_subsys;
|
||||
struct mem_cgroup_metric *valp;
|
||||
struct task_struct *task;
|
||||
|
||||
task = (struct task_struct *)bpf_get_current_task();
|
||||
if (BPF_CORE_READ(task, flags) & PF_KSWAPD)
|
||||
return 0;
|
||||
|
||||
mm_subsys = BPF_CORE_READ(task, cgroups, subsys[memory_cgrp_id]);
|
||||
valp = bpf_map_lookup_elem(&mem_cgroup_map, &mm_subsys);
|
||||
if (!valp) {
|
||||
struct mem_cgroup_metric new_metrics = {
|
||||
.directstall_count = 1,
|
||||
};
|
||||
bpf_map_update_elem(&mem_cgroup_map, &mm_subsys, &new_metrics,
|
||||
BPF_ANY);
|
||||
return 0;
|
||||
}
|
||||
|
||||
__sync_fetch_and_add(&valp->directstall_count, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("kprobe/mem_cgroup_css_released")
|
||||
int kprobe_mem_cgroup_css_released(struct pt_regs *ctx)
|
||||
{
|
||||
u64 css = PT_REGS_PARM1(ctx);
|
||||
bpf_map_delete_elem(&mem_cgroup_map, &css);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,90 @@
|
|||
#include "vmlinux.h"
|
||||
#include "bpf_common.h"
|
||||
#include "bpf_func_trace.h"
|
||||
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
struct mm_free_compact_entry {
|
||||
/* host: compaction latency */
|
||||
unsigned long compaction_stat;
|
||||
/* host: page alloc latency in direct reclaim */
|
||||
unsigned long allocstall_stat;
|
||||
};
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__type(key, int);
|
||||
__type(value, struct mm_free_compact_entry);
|
||||
__uint(max_entries, 10240);
|
||||
} mm_free_compact_map SEC(".maps");
|
||||
|
||||
char __license[] SEC("license") = "Dual MIT/GPL";
|
||||
|
||||
static __always_inline void
|
||||
update_metric_map(u64 free_delta_ns, u64 compact_delta_ns)
|
||||
{
|
||||
struct mm_free_compact_entry *valp;
|
||||
int key = 0;
|
||||
|
||||
valp = bpf_map_lookup_elem(&mm_free_compact_map, &key);
|
||||
if (!valp) {
|
||||
struct mm_free_compact_entry new_metrics = {
|
||||
.allocstall_stat = free_delta_ns,
|
||||
.compaction_stat = compact_delta_ns,
|
||||
};
|
||||
bpf_map_update_elem(&mm_free_compact_map, &key, &new_metrics,
|
||||
BPF_ANY);
|
||||
return;
|
||||
}
|
||||
|
||||
if (free_delta_ns)
|
||||
__sync_fetch_and_add(&valp->allocstall_stat, free_delta_ns);
|
||||
|
||||
if (compact_delta_ns)
|
||||
__sync_fetch_and_add(&valp->compaction_stat, compact_delta_ns);
|
||||
}
|
||||
|
||||
static __always_inline void func_trace_end_and_update_metric(bool free_pages)
|
||||
{
|
||||
struct trace_entry_ctx *entry;
|
||||
|
||||
entry = func_trace_end(bpf_get_current_pid_tgid());
|
||||
if (!entry)
|
||||
return;
|
||||
|
||||
if (free_pages)
|
||||
update_metric_map(entry->delta_ns, 0);
|
||||
else
|
||||
update_metric_map(0, entry->delta_ns);
|
||||
|
||||
func_trace_destroy(entry->id);
|
||||
}
|
||||
|
||||
SEC("tracepoint/vmscan/mm_vmscan_direct_reclaim_begin")
|
||||
int tracepoint_try_to_free_pages_begin(struct pt_regs *ctx)
|
||||
{
|
||||
func_trace_begain(bpf_get_current_pid_tgid());
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("tracepoint/vmscan/mm_vmscan_direct_reclaim_end")
|
||||
int tracepoint_try_to_free_pages_end(struct pt_regs *ctx)
|
||||
{
|
||||
func_trace_end_and_update_metric(true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("kprobe/try_to_compact_pages")
|
||||
int kprobe_try_to_compact_pages_host(struct pt_regs *ctx)
|
||||
{
|
||||
func_trace_begain(bpf_get_current_pid_tgid());
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("kretprobe/try_to_compact_pages")
|
||||
int kretprobe_try_to_compact_pages_host(struct pt_regs *ctx)
|
||||
{
|
||||
func_trace_end_and_update_metric(false);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
#include "vmlinux.h"
|
||||
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
#include "bpf_common.h"
|
||||
#include "bpf_func_trace.h"
|
||||
#include "bpf_ratelimit.h"
|
||||
|
||||
char __license[] SEC("license") = "Dual MIT/GPL";
|
||||
|
||||
volatile const unsigned long deltath = 0;
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(u32));
|
||||
} reclaim_perf_events SEC(".maps");
|
||||
|
||||
struct reclaim_entry {
|
||||
char comm[TASK_COMM_LEN];
|
||||
u64 delta_time;
|
||||
u64 css;
|
||||
u64 pid;
|
||||
};
|
||||
|
||||
SEC("kprobe/try_to_free_pages")
|
||||
int kprobe_try_to_free_pages(struct pt_regs *ctx)
|
||||
{
|
||||
func_trace_begain(bpf_get_current_pid_tgid());
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("kretprobe/try_to_free_pages")
|
||||
int kretprobe_try_to_free_pages(struct pt_regs *ctx)
|
||||
{
|
||||
struct trace_entry_ctx *entry;
|
||||
struct task_struct *task;
|
||||
|
||||
entry = func_trace_end(bpf_get_current_pid_tgid());
|
||||
if (!entry)
|
||||
return 0;
|
||||
|
||||
if (entry->delta_ns > deltath) {
|
||||
task = (struct task_struct *)bpf_get_current_task();
|
||||
|
||||
struct reclaim_entry data = {
|
||||
.pid = entry->id,
|
||||
.css = (u64)BPF_CORE_READ(task, cgroups,
|
||||
subsys[cpu_cgrp_id]),
|
||||
.delta_time = entry->delta_ns,
|
||||
};
|
||||
|
||||
bpf_get_current_comm(data.comm, sizeof(data.comm));
|
||||
|
||||
bpf_perf_event_output(ctx, &reclaim_perf_events,
|
||||
BPF_F_CURRENT_CPU, &data,
|
||||
sizeof(struct reclaim_entry));
|
||||
}
|
||||
|
||||
func_trace_destroy(entry->id);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
#include "vmlinux.h"
|
||||
#include "bpf_common.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include <bpf/bpf_core_read.h>
|
||||
|
||||
#define NSEC_PER_MSEC 1000000UL
|
||||
#define NSEC_PER_USEC 1000UL
|
||||
#define NR_SOFTIRQS_MAX 16 // must be 2^order
|
||||
|
||||
enum lat_zone {
|
||||
LAT_ZONE0=0, // 0 ~ 10us
|
||||
LAT_ZONE1, // 10us ~ 100us
|
||||
LAT_ZONE2, // 100us ~ 1ms
|
||||
LAT_ZONE3, // 1ms ~ inf
|
||||
LAT_ZONE_MAX,
|
||||
};
|
||||
|
||||
struct tp_softirq {
|
||||
unsigned long long pad;
|
||||
unsigned int vec;
|
||||
};
|
||||
|
||||
// Because bpf access array is strictly checked,
|
||||
// the size of the array must be aligned in order
|
||||
// of 2, so we should not use NR_SOFTIRQS, but
|
||||
// use NR_SOFTIRQS_MAX as the size of the array
|
||||
struct softirq_lat {
|
||||
u64 silat[NR_SOFTIRQS_MAX][LAT_ZONE_MAX];
|
||||
};
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||
//key -> NR_SOFTIRQS
|
||||
__type(key, u32);
|
||||
// value -> ts, record softirq_raise start time
|
||||
__type(value, u64);
|
||||
__uint(max_entries, NR_SOFTIRQS);
|
||||
} silat_map SEC(".maps");//softirq latency map
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__uint(key_size, sizeof(u32));
|
||||
__uint(value_size, sizeof(struct softirq_lat));
|
||||
__uint(max_entries, 1);
|
||||
} softirq_lats SEC(".maps");
|
||||
|
||||
SEC("tracepoint/irq/softirq_raise")
|
||||
void probe_softirq_raise(struct tp_softirq *ctx)
|
||||
{
|
||||
u32 nr;
|
||||
u64 now;
|
||||
nr = ctx->vec;
|
||||
|
||||
now = bpf_ktime_get_ns();
|
||||
bpf_map_update_elem(&silat_map, &nr, &now, BPF_ANY);
|
||||
}
|
||||
|
||||
static void
|
||||
calc_softirq_latency(struct softirq_lat *lat_mc, u32 nr, u64 now)
|
||||
{
|
||||
u64 lat, *ts;
|
||||
|
||||
ts = bpf_map_lookup_elem(&silat_map, &nr);
|
||||
if (!ts)
|
||||
return;
|
||||
|
||||
lat = now - *ts;
|
||||
|
||||
//update to metrics
|
||||
if (lat < 10 * NSEC_PER_USEC) { //10us
|
||||
__sync_fetch_and_add(&lat_mc->silat[nr & (NR_SOFTIRQS_MAX - 1)][LAT_ZONE0], 1);
|
||||
} else if (lat < 100 * NSEC_PER_USEC) {//100us
|
||||
__sync_fetch_and_add(&lat_mc->silat[nr & (NR_SOFTIRQS_MAX - 1)][LAT_ZONE1], 1);
|
||||
} else if (lat < 1 * NSEC_PER_MSEC) {//1ms
|
||||
__sync_fetch_and_add(&lat_mc->silat[nr & (NR_SOFTIRQS_MAX - 1)][LAT_ZONE2], 1);
|
||||
} else {//1ms+
|
||||
__sync_fetch_and_add(&lat_mc->silat[nr & (NR_SOFTIRQS_MAX - 1)][LAT_ZONE3], 1);
|
||||
}
|
||||
}
|
||||
|
||||
SEC("tracepoint/irq/softirq_entry")
|
||||
void probe_softirq_entry(struct tp_softirq *ctx)
|
||||
{
|
||||
u32 key = 0, nr;
|
||||
u64 now;
|
||||
struct softirq_lat *lat_mc;
|
||||
|
||||
lat_mc = bpf_map_lookup_elem(&softirq_lats, &key);
|
||||
if (!lat_mc)
|
||||
return;
|
||||
|
||||
nr = ctx->vec;
|
||||
|
||||
now = bpf_ktime_get_ns();
|
||||
|
||||
// update softirq lat to lat metric
|
||||
calc_softirq_latency(lat_mc, nr, now);
|
||||
}
|
||||
|
||||
char __license[] SEC("license") = "Dual MIT/GPL";
|
|
@ -0,0 +1,175 @@
|
|||
//go:build ignore
|
||||
|
||||
#include "vmlinux.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include <bpf/bpf_endian.h>
|
||||
#include "bpf_common.h"
|
||||
#include "vmlinux_net.h"
|
||||
#include "bpf_ratelimit.h"
|
||||
|
||||
volatile const long long mono_wall_offset = 0;
|
||||
volatile const long long to_netif = 5 * 1000 * 1000; // 5ms
|
||||
volatile const long long to_tcpv4 = 10 * 1000 * 1000; // 10ms
|
||||
volatile const long long to_user_copy = 115 * 1000 * 1000; // 115ms
|
||||
|
||||
#define likely(x) __builtin_expect(!!(x), 1)
|
||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
|
||||
BPF_RATELIMIT(rate, 1, 100);
|
||||
|
||||
struct netif_receive_skb_args {
|
||||
struct trace_entry entry;
|
||||
struct sk_buff *skb;
|
||||
};
|
||||
|
||||
struct skb_copy_datagram_iovec_args {
|
||||
struct trace_entry entry;
|
||||
struct sk_buff *skb;
|
||||
};
|
||||
|
||||
struct perf_event_t {
|
||||
char comm[TASK_COMM_LEN];
|
||||
u64 latency;
|
||||
u64 tgid_pid;
|
||||
u64 pkt_len;
|
||||
u16 sport;
|
||||
u16 dport;
|
||||
u32 saddr;
|
||||
u32 daddr;
|
||||
u32 seq;
|
||||
u32 ack_seq;
|
||||
u8 state;
|
||||
u8 where;
|
||||
};
|
||||
|
||||
enum skb_rcv_where {
|
||||
TO_NETIF_RCV,
|
||||
TO_TCPV4_RCV,
|
||||
TO_USER_COPY,
|
||||
};
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(u32));
|
||||
} net_recv_lat_event_map SEC(".maps");
|
||||
|
||||
struct mix {
|
||||
struct iphdr *ip_hdr;
|
||||
u64 lat;
|
||||
u8 state;
|
||||
u8 where;
|
||||
};
|
||||
|
||||
static inline u64 delta_now_skb_tstamp(struct sk_buff *skb)
|
||||
{
|
||||
u64 tstamp = BPF_CORE_READ(skb, tstamp);
|
||||
// although the skb->tstamp record is opened in user space by SOF_TIMESTAMPING_RX_SOFTWARE,
|
||||
// it is still 0 in the following cases:
|
||||
// unix recv, netlink recv, few virtual dev(e.g. tun dev, napi dsabled)
|
||||
if (!tstamp)
|
||||
return 0;
|
||||
|
||||
return bpf_ktime_get_ns() + mono_wall_offset - tstamp;
|
||||
}
|
||||
|
||||
static inline u8 get_state(struct sk_buff *skb)
|
||||
{
|
||||
return BPF_CORE_READ(skb, sk, __sk_common.skc_state);
|
||||
}
|
||||
|
||||
static inline void fill_and_output_event(void *ctx, struct sk_buff *skb, struct mix *_mix)
|
||||
{
|
||||
struct perf_event_t event = {};
|
||||
struct tcphdr tcp_hdr;
|
||||
|
||||
// ratelimit
|
||||
if (bpf_ratelimited(&rate))
|
||||
return;
|
||||
|
||||
if (likely(_mix->where == TO_USER_COPY)) {
|
||||
event.tgid_pid = bpf_get_current_pid_tgid();
|
||||
bpf_get_current_comm(&event.comm, sizeof(event.comm));
|
||||
}
|
||||
|
||||
bpf_probe_read(&tcp_hdr, sizeof(tcp_hdr), skb_transport_header(skb));
|
||||
event.latency = _mix->lat;
|
||||
event.saddr = _mix->ip_hdr->saddr;
|
||||
event.daddr = _mix->ip_hdr->daddr;
|
||||
event.sport = tcp_hdr.source;
|
||||
event.dport = tcp_hdr.dest;
|
||||
event.seq = tcp_hdr.seq;
|
||||
event.ack_seq = tcp_hdr.ack_seq;
|
||||
event.pkt_len = BPF_CORE_READ(skb, len);
|
||||
event.state = _mix->state;
|
||||
event.where = _mix->where;
|
||||
|
||||
bpf_perf_event_output(ctx, &net_recv_lat_event_map, BPF_F_CURRENT_CPU, &event, sizeof(struct perf_event_t));
|
||||
}
|
||||
|
||||
SEC("tracepoint/net/netif_receive_skb")
|
||||
int netif_receive_skb_prog(struct netif_receive_skb_args *args)
|
||||
{
|
||||
struct sk_buff *skb = args->skb;
|
||||
struct iphdr ip_hdr;
|
||||
u64 delta;
|
||||
|
||||
if (unlikely(BPF_CORE_READ(skb, protocol) != bpf_ntohs(ETH_P_IP))) // IPv4
|
||||
return 0;
|
||||
|
||||
bpf_probe_read(&ip_hdr, sizeof(ip_hdr), skb_network_header(skb));
|
||||
if (ip_hdr.protocol != IPPROTO_TCP)
|
||||
return 0;
|
||||
|
||||
delta = delta_now_skb_tstamp(skb);
|
||||
if (delta < to_netif)
|
||||
return 0;
|
||||
|
||||
fill_and_output_event(args, skb, &(struct mix){&ip_hdr, delta, 0, TO_NETIF_RCV});
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("kprobe/tcp_v4_rcv")
|
||||
int tcp_v4_rcv_prog(struct pt_regs *ctx)
|
||||
{
|
||||
struct sk_buff *skb = (struct sk_buff*)PT_REGS_PARM1_CORE(ctx);
|
||||
struct iphdr ip_hdr;
|
||||
u64 delta;
|
||||
|
||||
delta = delta_now_skb_tstamp(skb);
|
||||
if (delta < to_tcpv4)
|
||||
return 0;
|
||||
|
||||
bpf_probe_read(&ip_hdr, sizeof(ip_hdr), skb_network_header(skb));
|
||||
fill_and_output_event(ctx, skb, &(struct mix){&ip_hdr, delta, get_state(skb), TO_TCPV4_RCV});
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("tracepoint/skb/skb_copy_datagram_iovec")
|
||||
int skb_copy_datagram_iovec_prog(struct skb_copy_datagram_iovec_args *args)
|
||||
{
|
||||
struct sk_buff *skb = args->skb;
|
||||
struct iphdr ip_hdr;
|
||||
u64 delta;
|
||||
|
||||
if (unlikely(BPF_CORE_READ(skb, protocol) != bpf_ntohs(ETH_P_IP))) // IPv4
|
||||
return 0;
|
||||
|
||||
bpf_probe_read(&ip_hdr, sizeof(ip_hdr), skb_network_header(skb));
|
||||
if (ip_hdr.protocol != IPPROTO_TCP)
|
||||
return 0;
|
||||
|
||||
delta = delta_now_skb_tstamp(skb);
|
||||
if (delta < to_user_copy)
|
||||
return 0;
|
||||
|
||||
fill_and_output_event(args, skb, &(struct mix){&ip_hdr, delta, get_state(skb), TO_USER_COPY});
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
char __license[] SEC("license") = "Dual MIT/GPL";
|
|
@ -0,0 +1,54 @@
|
|||
#include "vmlinux.h"
|
||||
#include "bpf_common.h"
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include "bpf_ratelimit.h"
|
||||
|
||||
char __license[] SEC("license") = "Dual MIT/GPL";
|
||||
|
||||
#define CPU_NUM 128
|
||||
BPF_RATELIMIT_IN_MAP(rate, 1, CPU_NUM * 10000, 0);
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(u32));
|
||||
} oom_perf_events SEC(".maps");
|
||||
|
||||
struct oom_info {
|
||||
char trigger_comm[TASK_COMM_LEN];
|
||||
char victim_comm[TASK_COMM_LEN];
|
||||
u32 trigger_pid;
|
||||
u32 victim_pid;
|
||||
u64 trigger_memcg_css;
|
||||
u64 victim_memcg_css;
|
||||
};
|
||||
|
||||
SEC("kprobe/oom_kill_process")
|
||||
int kprobe_oom_kill_process(struct pt_regs *ctx)
|
||||
{
|
||||
struct oom_control *oc;
|
||||
struct oom_info info = {};
|
||||
struct task_struct *trigger_task, *victim_task;
|
||||
|
||||
if (bpf_ratelimited_in_map(ctx, rate))
|
||||
return 0;
|
||||
|
||||
oc = (void *)ctx->di;
|
||||
|
||||
if (!oc)
|
||||
return 0;
|
||||
trigger_task = (struct task_struct *)bpf_get_current_task();
|
||||
victim_task = BPF_CORE_READ(oc, chosen);
|
||||
info.trigger_pid = BPF_CORE_READ(trigger_task, pid);
|
||||
info.victim_pid = BPF_CORE_READ(victim_task, pid);
|
||||
BPF_CORE_READ_STR_INTO(&info.trigger_comm, trigger_task, comm);
|
||||
BPF_CORE_READ_STR_INTO(&info.victim_comm, victim_task, comm);
|
||||
|
||||
info.victim_memcg_css = (u64)BPF_CORE_READ(victim_task, cgroups, subsys[4]);
|
||||
info.trigger_memcg_css = (u64)BPF_CORE_READ(trigger_task, cgroups, subsys[4]);
|
||||
|
||||
bpf_perf_event_output(ctx, &oom_perf_events, BPF_F_CURRENT_CPU, &info, sizeof(info));
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,311 @@
|
|||
#include "vmlinux.h"
|
||||
#include "bpf_common.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include <bpf/bpf_core_read.h>
|
||||
|
||||
// defaultly, we use task_group address as key to operate map.
|
||||
#define TG_ADDR_KEY
|
||||
|
||||
#define TASK_RUNNING 0
|
||||
#define TASK_ON_RQ_QUEUED 1
|
||||
|
||||
#define _(P) \
|
||||
({ \
|
||||
typeof(P) val = 0; \
|
||||
bpf_probe_read(&val, sizeof(val), &(P)); \
|
||||
val; \
|
||||
})
|
||||
|
||||
char __license[] SEC("license") = "Dual MIT/GPL";
|
||||
|
||||
struct stat_t {
|
||||
unsigned long nvcsw; // task_group counts of voluntary context switch
|
||||
unsigned long nivcsw; // task_group counts of involuntary context switch
|
||||
unsigned long nlat_01; // task_group counts of sched latency range [0, 10)ms
|
||||
unsigned long nlat_02; // task_group counts of sched latency range [10, 20)ms
|
||||
unsigned long nlat_03; // task_group counts of sched latency range [20, 50)ms
|
||||
unsigned long nlat_04; // task_group counts of sched latency range [50, inf)ms
|
||||
};
|
||||
|
||||
struct g_stat_t {
|
||||
unsigned long g_nvcsw; // global counts of voluntary context switch
|
||||
unsigned long g_nivcsw; // global counts of involuntary context switch
|
||||
unsigned long g_nlat_01; // global counts of sched latency range [0, 10)ms
|
||||
unsigned long g_nlat_02; // global counts of sched latency range [10, 20)ms
|
||||
unsigned long g_nlat_03; // global counts of sched latency range [20, 50)ms
|
||||
unsigned long g_nlat_04; // global counts of sched latency range [50, inf)ms
|
||||
};
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__type(key, u32);
|
||||
__type(value, u64);
|
||||
// FIXME: is 10000 enough or too large?
|
||||
__uint(max_entries, 10000);
|
||||
} latency SEC(".maps");
|
||||
|
||||
struct stat_t;
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
#ifdef TG_ADDR_KEY
|
||||
__type(key, u64);
|
||||
#else
|
||||
__type(key, u32);
|
||||
#endif
|
||||
__type(value, struct stat_t);
|
||||
__uint(max_entries, 10000);
|
||||
} cpu_tg_metric SEC(".maps");
|
||||
|
||||
struct g_stat_t;
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__type(key, u32);
|
||||
__type(value, struct g_stat_t);
|
||||
// all global counts are integrated in one g_stat_t struct
|
||||
__uint(max_entries, 1);
|
||||
} cpu_host_metric SEC(".maps");
|
||||
|
||||
// record enqueue timestamp
|
||||
static int trace_enqueue(u32 pid)
|
||||
{
|
||||
//u64 *valp;
|
||||
u64 ts;
|
||||
|
||||
if (pid == 0)
|
||||
return 0;
|
||||
|
||||
ts = bpf_ktime_get_ns();
|
||||
bpf_map_update_elem(&latency, &pid, &ts, BPF_ANY);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct sched_wakeup_new_args {
|
||||
unsigned long long pad;
|
||||
char comm[16];
|
||||
int pid;
|
||||
int prio;
|
||||
int success;
|
||||
int target_cpu;
|
||||
};
|
||||
|
||||
SEC("tracepoint/sched/sched_wakeup_new")
|
||||
int sched_wakeup_new_entry(struct sched_wakeup_new_args *ctx)
|
||||
{
|
||||
return trace_enqueue(ctx->pid);
|
||||
}
|
||||
|
||||
struct sched_wakeup_args {
|
||||
unsigned long long pad;
|
||||
char comm[16];
|
||||
int pid;
|
||||
int prio;
|
||||
int success;
|
||||
int target_cpu;
|
||||
};
|
||||
|
||||
SEC("tracepoint/sched/sched_wakeup")
|
||||
int sched_wakeup_entry(struct sched_wakeup_new_args *ctx)
|
||||
{
|
||||
return trace_enqueue(ctx->pid);
|
||||
}
|
||||
|
||||
#define NSEC_PER_MSEC 1000000L
|
||||
SEC("raw_tracepoint/sched_switch")
|
||||
int sched_switch_entry(struct bpf_raw_tracepoint_args *ctx)
|
||||
{
|
||||
u32 prev_pid, next_pid, g_key = 0;
|
||||
u64 now, *tsp, delta;
|
||||
bool is_voluntary;
|
||||
struct stat_t *entry;
|
||||
struct g_stat_t *g_entry;
|
||||
|
||||
// TP_PROTO(bool preempt, struct task_struct *prev, struct task_struct *next)
|
||||
struct task_struct *prev = (struct task_struct *)ctx->args[1];
|
||||
struct task_struct *next = (struct task_struct *)ctx->args[2];
|
||||
|
||||
#ifdef TG_ADDR_KEY
|
||||
// get task_group addr: task_struct->sched_task_group
|
||||
u64 key = (u64)_(prev->sched_task_group);
|
||||
#else
|
||||
// get pid ns id: task_struct->nsproxy->pid_ns_for_children->ns.inum
|
||||
u32 key = BPF_CORE_READ(prev, nsproxy, pid_ns_for_children, ns.inum);
|
||||
#endif
|
||||
|
||||
long state;
|
||||
// to avoid compilation warning, use raw interface instead of macro _()
|
||||
bpf_probe_read(&state, sizeof(long), (void *)&(prev->state));
|
||||
|
||||
// ivcsw: treat like an enqueue event and store timestamp
|
||||
prev_pid = _(prev->pid);
|
||||
if (state == TASK_RUNNING) {
|
||||
if (prev_pid != 0) {
|
||||
now = bpf_ktime_get_ns();
|
||||
bpf_map_update_elem(&latency, &prev_pid, &now, BPF_ANY);
|
||||
}
|
||||
is_voluntary = 0;
|
||||
} else {
|
||||
is_voluntary = 1;
|
||||
}
|
||||
|
||||
g_entry = bpf_map_lookup_elem(&cpu_host_metric, &g_key);
|
||||
if (!g_entry) {
|
||||
// init global counts map
|
||||
struct g_stat_t g_new_stat = {
|
||||
.g_nvcsw = 0,
|
||||
.g_nivcsw = 0,
|
||||
.g_nlat_01 = 0,
|
||||
.g_nlat_02 = 0,
|
||||
.g_nlat_03 = 0,
|
||||
.g_nlat_04 = 0,
|
||||
};
|
||||
bpf_map_update_elem(&cpu_host_metric, &g_key, &g_new_stat, BPF_NOEXIST);
|
||||
g_entry = bpf_map_lookup_elem(&cpu_host_metric, &g_key);
|
||||
if (!g_entry)
|
||||
return 0;
|
||||
}
|
||||
|
||||
// When use pid namespace id as key, sometimes we would encounter
|
||||
// null id because task->nsproxy is freed, usually means that this
|
||||
// task is almost dead (zombie), so ignore it.
|
||||
if (key && prev_pid) {
|
||||
entry = bpf_map_lookup_elem(&cpu_tg_metric, &key);
|
||||
if (!entry) {
|
||||
struct stat_t new_stat = {
|
||||
.nvcsw = 0,
|
||||
.nivcsw = 0,
|
||||
.nlat_01 = 0,
|
||||
.nlat_02 = 0,
|
||||
.nlat_03 = 0,
|
||||
.nlat_04 = 0,
|
||||
};
|
||||
bpf_map_update_elem(&cpu_tg_metric, &key, &new_stat, BPF_NOEXIST);
|
||||
entry = bpf_map_lookup_elem(&cpu_tg_metric, &key);
|
||||
if (!entry)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (is_voluntary) {
|
||||
__sync_fetch_and_add(&entry->nvcsw, 1);
|
||||
__sync_fetch_and_add(&g_entry->g_nvcsw, 1);
|
||||
} else {
|
||||
__sync_fetch_and_add(&entry->nivcsw, 1);
|
||||
__sync_fetch_and_add(&g_entry->g_nivcsw, 1);
|
||||
}
|
||||
}
|
||||
|
||||
//trace_sched_switch is called under prev != next, no need to check again.
|
||||
|
||||
next_pid = _(next->pid);
|
||||
// ignore idle
|
||||
if (next_pid == 0)
|
||||
return 0;
|
||||
|
||||
// fetch timestamp and calculate delta
|
||||
tsp = bpf_map_lookup_elem(&latency, &next_pid);
|
||||
if (tsp == 0 || *tsp == 0) {
|
||||
return 0; // missed enqueue
|
||||
}
|
||||
|
||||
now = bpf_ktime_get_ns();
|
||||
delta = now - *tsp;
|
||||
bpf_map_delete_elem(&latency, &next_pid);
|
||||
|
||||
#ifdef TG_ADDR_KEY
|
||||
key = (u64)_(next->sched_task_group);
|
||||
#else
|
||||
key = BPF_CORE_READ(next, nsproxy, pid_ns_for_children, ns.inum);
|
||||
#endif
|
||||
|
||||
if (key) {
|
||||
entry = bpf_map_lookup_elem(&cpu_tg_metric, &key);
|
||||
if (!entry) {
|
||||
struct stat_t new_stat = {
|
||||
.nvcsw = 0,
|
||||
.nivcsw = 0,
|
||||
.nlat_01 = 0,
|
||||
.nlat_02 = 0,
|
||||
.nlat_03 = 0,
|
||||
.nlat_04 = 0,
|
||||
};
|
||||
bpf_map_update_elem(&cpu_tg_metric, &key, &new_stat, BPF_NOEXIST);
|
||||
entry = bpf_map_lookup_elem(&cpu_tg_metric, &key);
|
||||
if (!entry)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (delta < 10 * NSEC_PER_MSEC) {
|
||||
__sync_fetch_and_add(&entry->nlat_01, 1);
|
||||
__sync_fetch_and_add(&g_entry->g_nlat_01, 1);
|
||||
} else if (delta < 20 * NSEC_PER_MSEC) {
|
||||
__sync_fetch_and_add(&entry->nlat_02, 1);
|
||||
__sync_fetch_and_add(&g_entry->g_nlat_02, 1);
|
||||
} else if (delta < 50 * NSEC_PER_MSEC) {
|
||||
__sync_fetch_and_add(&entry->nlat_03, 1);
|
||||
__sync_fetch_and_add(&g_entry->g_nlat_03, 1);
|
||||
} else {
|
||||
__sync_fetch_and_add(&entry->nlat_04, 1);
|
||||
__sync_fetch_and_add(&g_entry->g_nlat_04, 1);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("raw_tracepoint/sched_process_exit")
|
||||
int sched_process_exit_entry(struct bpf_raw_tracepoint_args *ctx)
|
||||
{
|
||||
u32 pid;
|
||||
|
||||
// TP_PROTO(struct task_struct *tsk)
|
||||
struct task_struct *p = (struct task_struct *)ctx->args[0];
|
||||
|
||||
pid = _(p->pid);
|
||||
/*
|
||||
* check latency table to fix latency table overflow in below scenario:
|
||||
* when wake up the target task, but the target task always running in
|
||||
* the other cpu, the target cpu will never be the next pid, because the
|
||||
* target task will be exiting, the latency item never delete.
|
||||
* To avoid latency table overflow, we should delete the latency item in
|
||||
* exit process.
|
||||
*/
|
||||
|
||||
if (bpf_map_lookup_elem(&latency, &pid)) {
|
||||
bpf_map_delete_elem(&latency, &pid);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef TG_ADDR_KEY
|
||||
// When cgroup is removed, the record should be deleted.
|
||||
SEC("kprobe/sched_free_group")
|
||||
int sched_free_group_entry(struct pt_regs *ctx)
|
||||
{
|
||||
struct task_group *tg = (void *) PT_REGS_PARM1(ctx);
|
||||
struct stat_t *entry;
|
||||
|
||||
entry = bpf_map_lookup_elem(&cpu_tg_metric, &tg);
|
||||
if (entry)
|
||||
bpf_map_delete_elem(&cpu_tg_metric, &tg);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
// When pid namespace is destroyed, the record should be deleted.
|
||||
SEC("kprobe/destroy_pid_namespace")
|
||||
int destroy_pid_namespace_entry(struct pt_regs *ctx)
|
||||
{
|
||||
struct pid_namespace *ns = (void *) PT_REGS_PARM1(ctx);
|
||||
struct stat_t *entry;
|
||||
|
||||
// ns->ns.inum
|
||||
u32 pidns = BPF_CORE_READ(ns, ns.inum);
|
||||
entry = bpf_map_lookup_elem(&cpu_tg_metric, &pidns);
|
||||
if (entry)
|
||||
bpf_map_delete_elem(&cpu_tg_metric, &pidns);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,158 @@
|
|||
#include "vmlinux.h"
|
||||
#include "bpf_common.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include "bpf_ratelimit.h"
|
||||
|
||||
char __license[] SEC("license") = "Dual MIT/GPL";
|
||||
|
||||
#define NR_STACK_TRACE_MAX 0x4000
|
||||
#define MSEC_PER_NSEC 1000000UL
|
||||
#define TICK_DEP_MASK_NONE 0
|
||||
#define SOFTIRQ_THRESH 5000000UL
|
||||
|
||||
volatile const u64 softirq_thresh = SOFTIRQ_THRESH;
|
||||
|
||||
#define CPU_NUM 128
|
||||
#define TICK 1000
|
||||
BPF_RATELIMIT(rate, 1, CPU_NUM * TICK * 1000);
|
||||
|
||||
struct timer_softirq_run_ts {
|
||||
u32 start_trace;
|
||||
u32 restarting_tick;
|
||||
u64 soft_ts;
|
||||
};
|
||||
|
||||
struct report_event {
|
||||
u64 stack[PERF_MAX_STACK_DEPTH];
|
||||
s64 stack_size;
|
||||
u64 now;
|
||||
u64 stall_time;
|
||||
char comm[TASK_COMM_LEN];
|
||||
u32 pid;
|
||||
u32 cpu;
|
||||
};
|
||||
|
||||
// the map for recording irq/softirq timer ts
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||
__uint(key_size, sizeof(u32));
|
||||
__uint(value_size, sizeof(struct timer_softirq_run_ts));
|
||||
__uint(max_entries, 1);
|
||||
} timerts_map SEC(".maps");
|
||||
|
||||
// the map use for storing struct report_event memory
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||
__uint(key_size, sizeof(u32)); // key = 0
|
||||
__uint(value_size, sizeof(struct report_event));
|
||||
__uint(max_entries, 1);
|
||||
} report_map SEC(".maps");
|
||||
|
||||
// the event map use for report userspace
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(u32));
|
||||
} irqoff_event_map SEC(".maps");
|
||||
|
||||
SEC("kprobe/scheduler_tick")
|
||||
void probe_scheduler_tick(struct pt_regs *ctx)
|
||||
{
|
||||
// verify bpf-ratelimit
|
||||
if (bpf_ratelimited(&rate))
|
||||
return;
|
||||
|
||||
//update soft timer timestamps
|
||||
int key = 0;
|
||||
struct timer_softirq_run_ts *ts;
|
||||
//struct thresh_data *tdata;
|
||||
struct report_event *event;
|
||||
u64 now;
|
||||
u64 delta;
|
||||
|
||||
ts = bpf_map_lookup_elem(&timerts_map, &key);
|
||||
if (!ts)
|
||||
return;
|
||||
|
||||
if (!ts->start_trace)
|
||||
return;
|
||||
|
||||
//update soft timer timestamps
|
||||
if (!ts->soft_ts) {
|
||||
ts->soft_ts = bpf_ktime_get_ns();
|
||||
return;
|
||||
}
|
||||
|
||||
event = bpf_map_lookup_elem(&report_map, &key);
|
||||
if (!event)
|
||||
return;
|
||||
|
||||
if (ts->restarting_tick) {
|
||||
ts->restarting_tick = 0;
|
||||
ts->soft_ts = bpf_ktime_get_ns();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
now = bpf_ktime_get_ns();
|
||||
delta = now - ts->soft_ts;
|
||||
|
||||
// if delta over threshold, dump important info to user
|
||||
if (delta >= softirq_thresh) {
|
||||
event->now = now;
|
||||
event->stall_time = delta;
|
||||
__builtin_memset(event->comm, 0, sizeof(event->comm));
|
||||
bpf_get_current_comm(&event->comm, sizeof(event->comm));
|
||||
event->pid = (u32)bpf_get_current_pid_tgid();
|
||||
event->cpu = bpf_get_smp_processor_id();
|
||||
event->stack_size = bpf_get_stack(ctx, event->stack, sizeof(event->stack), 0);
|
||||
|
||||
bpf_perf_event_output(ctx, &irqoff_event_map, BPF_F_CURRENT_CPU,
|
||||
event, sizeof(struct report_event));
|
||||
}
|
||||
|
||||
// update soft_ts, use for next trace
|
||||
ts->soft_ts = now;
|
||||
}
|
||||
|
||||
struct tp_tick_stop {
|
||||
unsigned long pad;
|
||||
int success;
|
||||
int dependency;
|
||||
};
|
||||
|
||||
SEC("tracepoint/timer/tick_stop")
|
||||
void probe_tick_stop(struct tp_tick_stop *ctx)
|
||||
{
|
||||
struct timer_softirq_run_ts *ts;
|
||||
int key = 0;
|
||||
|
||||
ts = bpf_map_lookup_elem(&timerts_map, &key);
|
||||
if (!ts)
|
||||
return;
|
||||
|
||||
if (ctx->success == 1 && ctx->dependency == TICK_DEP_MASK_NONE) {
|
||||
ts->start_trace = 0;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
SEC("kprobe/tick_nohz_restart_sched_tick")
|
||||
void probe_tick_nohz_restart_sched_tick(struct pt_regs *ctx)
|
||||
{
|
||||
struct timer_softirq_run_ts *ts;
|
||||
int key = 0;
|
||||
u64 now;
|
||||
|
||||
ts = bpf_map_lookup_elem(&timerts_map, &key);
|
||||
if (!ts)
|
||||
return;
|
||||
|
||||
now = bpf_ktime_get_ns();
|
||||
|
||||
ts->soft_ts = now;
|
||||
ts->start_trace = 1;
|
||||
ts->restarting_tick = 1;
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
#include "vmlinux.h"
|
||||
#include "bpf_common.h"
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include "bpf_ratelimit.h"
|
||||
|
||||
char __license[] SEC("license") = "Dual MIT/GPL";
|
||||
|
||||
#define CPU_NUM 128
|
||||
BPF_RATELIMIT_IN_MAP(rate, 1, CPU_NUM * 10000, 0);
|
||||
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(u32));
|
||||
} softlockup_perf_events SEC(".maps");
|
||||
|
||||
struct softlockup_info {
|
||||
u32 cpu;
|
||||
u32 pid;
|
||||
char comm[TASK_COMM_LEN];
|
||||
};
|
||||
|
||||
SEC("kprobe/watchdog_timer_fn+442")
|
||||
int kprobe_watchdog_timer_fn(struct pt_regs *ctx)
|
||||
{
|
||||
struct softlockup_info info = {};
|
||||
struct task_struct *task;
|
||||
|
||||
if (bpf_ratelimited_in_map(ctx, rate))
|
||||
return 0;
|
||||
info.cpu = bpf_get_smp_processor_id();
|
||||
task = (struct task_struct *)bpf_get_current_task();
|
||||
info.pid = bpf_get_current_pid_tgid() & 0xffffffffUL;
|
||||
BPF_CORE_READ_STR_INTO(&info.comm, task, comm);
|
||||
bpf_perf_event_output(ctx, &softlockup_perf_events, BPF_F_CURRENT_CPU, &info, sizeof(info));
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
#!/bin/sh
|
||||
|
||||
usage() {
|
||||
echo "OVERVIEW: HuaTuo BPF compiler tool (clang LLVM)
|
||||
|
||||
USAGE: clang.sh -s <source.c> -o <output.o> -I [includes] -C '[compile_options]'
|
||||
EXAMPLE:
|
||||
clang.sh -s example.bpf.c -o example.o # run preprocess, compile, and assemble steps (-C '-c')
|
||||
clang.sh -s example.bpf.c -o example.o -I include -I include/4.18.0-193.6.3.el8_2.v1.3.x86_64 # specify the headers, (-C '-c')
|
||||
clang.sh -s example.bpf.c -o example.o -C '-E' # only run the preprocessor
|
||||
clang.sh -s example.bpf.c -o example.o -C '-S' # only run preprocess and compilation steps"
|
||||
}
|
||||
|
||||
SRC=
|
||||
OBJ=
|
||||
INCLUDES=
|
||||
DEFAULT_INCLUDES="-I include -I include/4.18.0-193.6.3.el8_2.v1.2.x86_64"
|
||||
COMPILE_OPTIONS=
|
||||
DEFAULT_COMPILE_OPTIONS="-Wall -O2 -g -target bpf -D__TARGET_ARCH_x86 -mcpu=v1 -c"
|
||||
|
||||
while getopts 'hs:o:C:I:' opt
|
||||
do
|
||||
case ${opt} in
|
||||
s)
|
||||
[ -n "${SRC}" ] && echo "-s(source) required 1 file (bpf.c)" && exit 1
|
||||
SRC=${OPTARG}
|
||||
;;
|
||||
o)
|
||||
[ -n "${OBJ}" ] && echo "-o(output) required 1 file (output.o)" && exit 1
|
||||
OBJ=${OPTARG}
|
||||
;;
|
||||
C)
|
||||
COMPILE_OPTIONS=${OPTARG}
|
||||
;;
|
||||
I)
|
||||
INCLUDES="${INCLUDES} -I ${OPTARG}"
|
||||
;;
|
||||
h)
|
||||
usage
|
||||
exit
|
||||
;;
|
||||
?)
|
||||
usage
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
[ -z "${SRC}" ] && echo -e "-s must be specified, such as -c example.bpf.c \n\n $(usage)" && exit 1
|
||||
[ -z "${OBJ}" ] && echo -e "-o must be specified, such as -o example.o \n\n $(usage)" && exit 1
|
||||
|
||||
|
||||
# Note: parameter ${DEFAULT_COMPILE_OPTIONS} will be overwritten by ${COMPILE_OPTIONS} in ${OPTIONS}
|
||||
OPTIONS="${DEFAULT_COMPILE_OPTIONS} ${COMPILE_OPTIONS}"
|
||||
[ -z "${INCLUDES}" ] && INCLUDES="${DEFAULT_INCLUDES}"
|
||||
|
||||
clang ${OPTIONS} ${SRC} -o ${OBJ} ${INCLUDES}
|
|
@ -0,0 +1,15 @@
|
|||
# elasticsearch
|
||||
ELASTIC_VERSION=8.15.5
|
||||
|
||||
# https://www.elastic.co/guide/en/elasticsearch/reference/current/built-in-users.html
|
||||
ELASTIC_PASSWORD='huatuo-bamai' # user 'elastic' (built-in)
|
||||
KIBANA_SYSTEM_PASSWORD='huatuo-bamai' # user 'kibana_system' (built-in)
|
||||
|
||||
# setup to init user
|
||||
ELASTICSEARCH_HOST='localhost'
|
||||
|
||||
# prometheus
|
||||
PROMETHEUS_VERSION=v2.53.3 # LTS v2.53
|
||||
|
||||
# Grafana
|
||||
GRAFANA_VERSION=11.0.0
|
|
@ -0,0 +1,34 @@
|
|||
ARG BUILD_PATH=/go/huatuo-bamai
|
||||
ARG RUN_PATH=/home/huatuo-bamai
|
||||
|
||||
# https://hub.docker.com/_/golang/tags?name=1.22.4
|
||||
FROM golang:1.22.4-alpine AS base
|
||||
|
||||
# Install dependencies for build
|
||||
RUN apk add --no-cache \
|
||||
make \
|
||||
clang15 \
|
||||
libbpf-dev \
|
||||
bpftool \
|
||||
curl && \
|
||||
bpftool btf dump file /sys/kernel/btf/vmlinux format c > bpf/include/vmlinux.h
|
||||
ENV PATH=$PATH:/usr/lib/llvm15/bin
|
||||
|
||||
# Build huatuo
|
||||
FROM base AS build
|
||||
ARG BUILD_PATH
|
||||
WORKDIR ${BUILD_PATH}
|
||||
COPY . .
|
||||
RUN make
|
||||
|
||||
# Release huatuo image
|
||||
FROM base AS run
|
||||
ARG BUILD_PATH
|
||||
ARG RUN_PATH
|
||||
WORKDIR ${RUN_PATH}
|
||||
COPY --from=build \
|
||||
${BUILD_PATH}/_output ./_output
|
||||
COPY --from=build \
|
||||
${BUILD_PATH}/huatuo-bamai.conf .
|
||||
|
||||
CMD ["/run.sh"]
|
|
@ -0,0 +1,48 @@
|
|||
services:
|
||||
elasticsearch:
|
||||
image: docker.elastic.co/elasticsearch/elasticsearch:${ELASTIC_VERSION:-8.15.5}
|
||||
container_name: es
|
||||
network_mode: host
|
||||
environment:
|
||||
discovery.type: single-node
|
||||
ELASTIC_PASSWORD: ${ELASTIC_PASSWORD:-}
|
||||
KIBANA_SYSTEM_PASSWORD: ${KIBANA_SYSTEM_PASSWORD:-}
|
||||
volumes:
|
||||
- ./elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml:ro
|
||||
|
||||
prometheus:
|
||||
image: prom/prometheus:${PROMETHEUS_VERSION:-v2.53.3}
|
||||
container_name: prometheus
|
||||
network_mode: host
|
||||
volumes:
|
||||
- ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana-oss:${GRAFANA_VERSION:-11.0.0}
|
||||
container_name: grafana
|
||||
network_mode: host
|
||||
volumes:
|
||||
- ./grafana/datasources/elasticsearch.yaml:/etc/grafana/provisioning/datasources/elasticsearch.yaml:ro
|
||||
- ./grafana/datasources/prometheus.yaml:/etc/grafana/provisioning/datasources/prometheus.yaml:ro
|
||||
- ./grafana/dashboards:/etc/grafana/provisioning/dashboards:ro
|
||||
depends_on:
|
||||
- prometheus
|
||||
- elasticsearch
|
||||
|
||||
huatuo-bamai:
|
||||
build:
|
||||
context: ./../../ # compile required in Dockerfile
|
||||
dockerfile: ./build/docker/Dockerfile
|
||||
container_name: huatuo-bamai
|
||||
network_mode: host
|
||||
privileged: true
|
||||
environment:
|
||||
ELASTICSEARCH_HOST: ${ELASTICSEARCH_HOST:-}
|
||||
ELASTIC_PASSWORD: ${ELASTIC_PASSWORD:-}
|
||||
volumes:
|
||||
- ./run.sh:/run.sh:ro
|
||||
- /sys/kernel:/sys/kernel
|
||||
depends_on:
|
||||
- elasticsearch
|
||||
- prometheus
|
||||
- grafana
|
|
@ -0,0 +1,4 @@
|
|||
cluster.name: "docker-cluster"
|
||||
network.host: 0.0.0.0
|
||||
http.port: 9200
|
||||
xpack.security.enabled: true
|
|
@ -0,0 +1,24 @@
|
|||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
# <string> an unique provider name. Required
|
||||
- name: 'huatuo-bamai'
|
||||
# <int> Org id. Default to 1
|
||||
orgId: 1
|
||||
# <string> name of the dashboard folder.
|
||||
folder: ''
|
||||
# <string> folder UID. will be automatically generated if not specified
|
||||
folderUid: ''
|
||||
# <string> provider type. Default to 'file'
|
||||
type: file
|
||||
# <bool> disable dashboard deletion
|
||||
disableDeletion: false
|
||||
# <int> how often Grafana will scan for changed dashboards
|
||||
updateIntervalSeconds: 10
|
||||
# <bool> allow updating provisioned dashboards from the UI
|
||||
allowUiUpdates: false
|
||||
options:
|
||||
# <string, required> path to dashboard files on disk. Required when using the 'file' type
|
||||
path: /etc/grafana/provisioning/dashboards
|
||||
# <bool> use folder names from filesystem to create folders in Grafana
|
||||
foldersFromFilesStructure: true
|
|
@ -0,0 +1,63 @@
|
|||
# https://grafana.com/docs/grafana/latest/datasources/elasticsearch/
|
||||
|
||||
apiVersion: 1
|
||||
|
||||
# List of data sources to delete from the database.
|
||||
deleteDatasources:
|
||||
- name: huatuo-bamai-es
|
||||
|
||||
# Mark provisioned data sources for deletion if they are no longer in a provisioning file.
|
||||
# It takes no effect if data sources are already listed in the deleteDatasources section.
|
||||
prune: true
|
||||
|
||||
# List of data sources to insert/update depending on what's
|
||||
# available in the database.
|
||||
datasources:
|
||||
# <string, required> Sets the name you use to refer to
|
||||
# the data source in panels and queries.
|
||||
- name: huatuo-bamai-es
|
||||
# <string, required> Sets the data source type.
|
||||
type: elasticsearch
|
||||
# <string, required> Sets the access mode, either
|
||||
# proxy or direct (Server or Browser in the UI).
|
||||
# Some data sources are incompatible with any setting
|
||||
# but proxy (Server).
|
||||
access: proxy
|
||||
# <int> Sets the organization id. Defaults to orgId 1.
|
||||
orgId: 1
|
||||
# <string> Sets a custom UID to reference this
|
||||
# data source in other parts of the configuration.
|
||||
# If not specified, Grafana generates one.
|
||||
uid: huatuo-bamai-es
|
||||
# <string> Sets the data source's URL, including the
|
||||
# port.
|
||||
url: http://localhost:9200
|
||||
# <string> Sets the database user, if necessary.
|
||||
user: elastic
|
||||
# <string> Sets the database name, if necessary.
|
||||
database:
|
||||
# <bool> Enables credential headers.
|
||||
withCredentials:
|
||||
# <bool> Toggles whether the data source is pre-selected
|
||||
# for new panels. You can set only one default
|
||||
# data source per organization.
|
||||
isDefault:
|
||||
# <map> Fields to convert to JSON and store in jsonData.
|
||||
jsonData:
|
||||
index: 'huatuo_bamai*'
|
||||
timeField: 'uploaded_time'
|
||||
# <map> Fields to encrypt before storing in jsonData.
|
||||
secureJsonData:
|
||||
# <string> Defines the CA cert, client cert, and
|
||||
# client key for encrypted authentication.
|
||||
tlsCACert: '...'
|
||||
tlsClientCert: '...'
|
||||
tlsClientKey: '...'
|
||||
# <string> Sets the database password, if necessary.
|
||||
password: huatuo-bamai
|
||||
# <int> Sets the version. Used to compare versions when
|
||||
# updating. Ignored when creating a new data source.
|
||||
version: 1
|
||||
# <bool> Allows users to edit data sources from the
|
||||
# Grafana UI.
|
||||
editable: false
|
|
@ -0,0 +1,29 @@
|
|||
# https://grafana.com/docs/grafana/latest/datasources/prometheus/
|
||||
|
||||
apiVersion: 1
|
||||
|
||||
# List of data sources to delete from the database.
|
||||
deleteDatasources:
|
||||
- name: huatuo-bamai-prom
|
||||
|
||||
# Mark provisioned data sources for deletion if they are no longer in a provisioning file.
|
||||
# It takes no effect if data sources are already listed in the deleteDatasources section.
|
||||
prune: true
|
||||
|
||||
datasources:
|
||||
- name: huatuo-bamai-prom
|
||||
type: prometheus
|
||||
access: proxy
|
||||
# <int> Sets the organization id. Defaults to orgId 1.
|
||||
orgId: 1
|
||||
# <string> Sets a custom UID to reference this
|
||||
# data source in other parts of the configuration.
|
||||
# If not specified, Grafana generates one.
|
||||
uid: huatuo-bamai-prom
|
||||
url: http://localhost:9090
|
||||
jsonData:
|
||||
httpMethod: POST
|
||||
prometheusType: Prometheus
|
||||
cacheLevel: 'High'
|
||||
disableRecordingRules: false
|
||||
incrementalQueryOverlapWindow: 10m
|
|
@ -0,0 +1,33 @@
|
|||
# my global config
|
||||
global:
|
||||
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
|
||||
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
|
||||
# scrape_timeout is set to the global default (10s).
|
||||
|
||||
# Alertmanager configuration
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets:
|
||||
# - alertmanager:9093
|
||||
|
||||
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
|
||||
rule_files:
|
||||
# - "first_rules.yml"
|
||||
# - "second_rules.yml"
|
||||
|
||||
# A scrape configuration containing exactly one endpoint to scrape:
|
||||
# Here it's Prometheus itself.
|
||||
scrape_configs:
|
||||
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
||||
- job_name: "prometheus"
|
||||
|
||||
# metrics_path defaults to '/metrics'
|
||||
# scheme defaults to 'http'.
|
||||
|
||||
static_configs:
|
||||
- targets: ["localhost:9090"]
|
||||
# add huatuo
|
||||
- job_name: "huatuo"
|
||||
static_configs:
|
||||
- targets: ["localhost:19704"]
|
|
@ -0,0 +1,71 @@
|
|||
#!/bin/sh
|
||||
|
||||
ELASTICSEARCH_HOST=${ELASTICSEARCH_HOST:-localhost}
|
||||
ELASTIC_PASSWORD=${ELASTIC_PASSWORD:-huatuo-bamai}
|
||||
|
||||
# Wait for Elasticsearch to be ready
|
||||
# ref: https://github.com/deviantony/docker-elk/blob/main/setup/entrypoint.sh
|
||||
wait_for_elasticsearch() {
|
||||
args="-s -D- -m15 -w '%{http_code}' http://${ELASTICSEARCH_HOST}:9200/"
|
||||
if [ -n "${ELASTIC_PASSWORD}" ]; then
|
||||
args="$args -u elastic:${ELASTIC_PASSWORD}"
|
||||
fi
|
||||
|
||||
result=1
|
||||
output=""
|
||||
|
||||
# retry for up to 180 seconds
|
||||
for sec in $(seq 1 180); do
|
||||
exit_code=0
|
||||
output=$(eval "curl $args") || exit_code=$?
|
||||
# echo "exec curl $args, exit code: $exit_code, output: $output"
|
||||
if [ $exit_code -ne 0 ]; then
|
||||
result=$exit_code
|
||||
fi
|
||||
|
||||
# Extract the last three characters of the output to check the HTTP status code
|
||||
http_code=$(echo "$output" | tail -c 4)
|
||||
if [ "$http_code" -eq 200 ]; then
|
||||
result=0
|
||||
break
|
||||
fi
|
||||
|
||||
echo "Waiting for Elasticsearch ready... ${sec}s"
|
||||
sleep 1
|
||||
done
|
||||
|
||||
if [ $result -ne 0 ] && [ "$http_code" -ne 000 ]; then
|
||||
echo "$output" | head -c -3
|
||||
fi
|
||||
|
||||
return $result
|
||||
}
|
||||
|
||||
exit_code=0
|
||||
wait_for_elasticsearch || exit_code=$?
|
||||
if [ $exit_code -ne 0 ]; then
|
||||
case $exit_code in
|
||||
6)
|
||||
echo 'Could not resolve host. Is Elasticsearch running?'
|
||||
;;
|
||||
7)
|
||||
echo 'Failed to connect to host. Is Elasticsearch healthy?'
|
||||
;;
|
||||
28)
|
||||
echo 'Timeout connecting to host. Is Elasticsearch healthy?'
|
||||
;;
|
||||
*)
|
||||
echo "Connection to Elasticsearch failed. Exit code: ${exit_code}"
|
||||
;;
|
||||
esac
|
||||
|
||||
exit $exit_code
|
||||
fi
|
||||
|
||||
# Waiting for initialization of Elasticsearch built-in users
|
||||
sleep 5
|
||||
|
||||
echo "Elasticsearch is ready."
|
||||
|
||||
# Run huatuo-bamai
|
||||
exec _output/bin/huatuo-bamai --region example --config huatuo-bamai.conf
|
|
@ -0,0 +1,226 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/signal"
|
||||
"runtime"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
_ "huatuo-bamai/core/autotracing"
|
||||
_ "huatuo-bamai/core/events"
|
||||
_ "huatuo-bamai/core/metrics"
|
||||
"huatuo-bamai/internal/bpf"
|
||||
"huatuo-bamai/internal/conf"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/pod"
|
||||
"huatuo-bamai/internal/services"
|
||||
"huatuo-bamai/internal/storage"
|
||||
"huatuo-bamai/internal/utils/cgrouputil"
|
||||
"huatuo-bamai/internal/utils/pidutil"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
func mainAction(ctx *cli.Context) error {
|
||||
if ctx.NArg() > 0 {
|
||||
return fmt.Errorf("invalid param %v", ctx.Args())
|
||||
}
|
||||
|
||||
if err := pidutil.LockPidFile(ctx.App.Name); err != nil {
|
||||
return fmt.Errorf("failed to lock pid file: %w", err)
|
||||
}
|
||||
defer pidutil.RemovePidFile(ctx.App.Name)
|
||||
|
||||
// init cpu quota
|
||||
host, err := cgrouputil.NewRuntimeCgroup(ctx.App.Name,
|
||||
conf.Get().RuntimeCgroup.LimitInitCPU,
|
||||
conf.Get().RuntimeCgroup.LimitMem)
|
||||
if err != nil {
|
||||
return fmt.Errorf("new cgroup: %w", err)
|
||||
}
|
||||
defer host.Delete()
|
||||
|
||||
// initialize the storage clients.
|
||||
storageInitCtx := storage.InitContext{
|
||||
EsAddresses: conf.Get().Storage.ES.Address,
|
||||
EsUsername: conf.Get().Storage.ES.Username,
|
||||
EsPassword: conf.Get().Storage.ES.Password,
|
||||
EsIndex: conf.Get().Storage.ES.Index,
|
||||
LocalPath: conf.Get().Storage.LocalFile.Path,
|
||||
LocalMaxRotation: conf.Get().Storage.LocalFile.MaxRotation,
|
||||
LocalRotationSize: conf.Get().Storage.LocalFile.RotationSize,
|
||||
Region: conf.Region,
|
||||
}
|
||||
|
||||
if err := storage.InitDefaultClients(&storageInitCtx); err != nil {
|
||||
return fmt.Errorf("storage.InitDefaultClients: %w", err)
|
||||
}
|
||||
|
||||
// init the bpf manager.
|
||||
if err := bpf.InitBpfManager(); err != nil {
|
||||
return fmt.Errorf("failed to init bpf manager: %w", err)
|
||||
}
|
||||
|
||||
if err := pod.ContainerCgroupCssInit(); err != nil {
|
||||
return fmt.Errorf("init pod cgroup metadata: %w", err)
|
||||
}
|
||||
|
||||
blackListed := conf.Get().Tracing.BlackList
|
||||
mgr, err := tracing.NewMgrTracingEvent(blackListed)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := mgr.MgrTracingEventStartAll(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
prom, err := InitMetricsCollector(blackListed)
|
||||
if err != nil {
|
||||
return fmt.Errorf("InitMetricsCollector: %w", err)
|
||||
}
|
||||
|
||||
log.Infof("Initialize the Metrics collector: %v", prom)
|
||||
|
||||
services.Start(conf.Get().APIServer.TCPAddr, mgr, prom)
|
||||
|
||||
// update cpu quota
|
||||
if err := host.UpdateCPU(conf.Get().RuntimeCgroup.LimitCPU); err != nil {
|
||||
return fmt.Errorf("cg update cpu: %w", err)
|
||||
}
|
||||
|
||||
waitExit := make(chan os.Signal, 1)
|
||||
signal.Notify(waitExit, syscall.SIGHUP, syscall.SIGQUIT, syscall.SIGUSR1, syscall.SIGINT, syscall.SIGTERM)
|
||||
for {
|
||||
s := <-waitExit
|
||||
switch s {
|
||||
case syscall.SIGQUIT, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM:
|
||||
log.Infof("huatuo-bamai exit by signal %d", s)
|
||||
bpf.CloseBpfManager()
|
||||
return nil
|
||||
case syscall.SIGUSR1:
|
||||
return nil
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
// AppGitCommit will be the hash that the binary was built from
|
||||
// and will be populated by the Makefile
|
||||
AppGitCommit string
|
||||
// AppBuildTime will be populated by the Makefile
|
||||
AppBuildTime string
|
||||
// AppVersion will be populated by the Makefile, read from
|
||||
// VERSION file of the source code.
|
||||
AppVersion string
|
||||
AppUsage = "An In-depth Observation of Linux Kernel Application"
|
||||
)
|
||||
|
||||
func main() {
|
||||
app := cli.NewApp()
|
||||
app.Usage = AppUsage
|
||||
|
||||
if AppVersion == "" {
|
||||
panic("the value of AppVersion must be specified")
|
||||
}
|
||||
|
||||
v := []string{
|
||||
"",
|
||||
fmt.Sprintf(" app_version: %s", AppVersion),
|
||||
fmt.Sprintf(" go_version: %s", runtime.Version()),
|
||||
fmt.Sprintf(" git_commit: %s", AppGitCommit),
|
||||
fmt.Sprintf(" build_time: %s", AppBuildTime),
|
||||
}
|
||||
app.Version = strings.Join(v, "\n")
|
||||
|
||||
app.Flags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "config",
|
||||
Value: "huatuo-bamai.conf",
|
||||
Usage: "huatuo-bamai config file",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "region",
|
||||
Required: true,
|
||||
Usage: "the host and containers are in this region",
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "disable-tracing",
|
||||
Usage: "disable tracing. This is related to TracerConfig.BlackList in config, and complement each other",
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "log-debug",
|
||||
Usage: "enable debug output for logging",
|
||||
},
|
||||
}
|
||||
|
||||
app.Before = func(ctx *cli.Context) error {
|
||||
if err := conf.LoadConfig(ctx.String("config")); err != nil {
|
||||
return fmt.Errorf("failed to load config: %w", err)
|
||||
}
|
||||
|
||||
// set Region
|
||||
conf.Region = ctx.String("region")
|
||||
|
||||
// log level
|
||||
if conf.Get().LogLevel != "" {
|
||||
log.SetLevel(conf.Get().LogLevel)
|
||||
log.Infof("log level [%s] configured in file, use it", log.GetLevel())
|
||||
}
|
||||
|
||||
logFile := conf.Get().LogFile
|
||||
if logFile != "" {
|
||||
file, err := os.OpenFile(logFile, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o666)
|
||||
if err == nil {
|
||||
log.SetOutput(file)
|
||||
} else {
|
||||
log.SetOutput(os.Stdout)
|
||||
log.Infof("Failed to log to file, using default stdout")
|
||||
}
|
||||
}
|
||||
|
||||
// tracer
|
||||
disabledTracing := ctx.StringSlice("disable-tracing")
|
||||
if len(disabledTracing) > 0 {
|
||||
definedTracers := conf.Get().Tracing.BlackList
|
||||
definedTracers = append(definedTracers, disabledTracing...)
|
||||
|
||||
conf.Set("TracerConfig.BlackList", definedTracers)
|
||||
log.Infof("The tracer black list by cli: %v", conf.Get().Tracing.BlackList)
|
||||
}
|
||||
|
||||
if ctx.Bool("log-debug") {
|
||||
log.SetLevel("Debug")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// core
|
||||
app.Action = mainAction
|
||||
|
||||
// run
|
||||
if err := app.Run(os.Args); err != nil {
|
||||
log.Errorf("Error: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"huatuo-bamai/pkg/metric"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/collectors"
|
||||
)
|
||||
|
||||
var promNamespace = "huatuo_bamai"
|
||||
|
||||
// InitMetricsCollector creates a new MetricsCollector instance.
|
||||
func InitMetricsCollector(blackListed []string) (*prometheus.Registry, error) {
|
||||
nc, err := metric.NewCollectorManager(blackListed)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("create collector: %w", err)
|
||||
}
|
||||
|
||||
promRegistry := prometheus.NewRegistry()
|
||||
promRegistry.MustRegister(
|
||||
nc,
|
||||
collectors.NewGoCollector(),
|
||||
collectors.NewProcessCollector(
|
||||
collectors.ProcessCollectorOpts{Namespace: promNamespace}))
|
||||
|
||||
return promRegistry, nil
|
||||
}
|
|
@ -0,0 +1,349 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package autotracing
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/internal/conf"
|
||||
"huatuo-bamai/internal/flamegraph"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/pod"
|
||||
"huatuo-bamai/internal/storage"
|
||||
"huatuo-bamai/internal/utils/cgrouputil"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
"huatuo-bamai/pkg/types"
|
||||
)
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("cpuidle", newCPUIdle)
|
||||
}
|
||||
|
||||
func newCPUIdle() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &cpuIdleTracing{},
|
||||
Internal: 20,
|
||||
Flag: tracing.FlagTracing,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// GetCPUCoresInCgroup function returns the number of cgroup cores
|
||||
func GetCPUCoresInCgroup(cgroupPath string) (uint64, error) {
|
||||
periodPath := cgroupPath + "/cpu.cfs_period_us"
|
||||
quotaPath := cgroupPath + "/cpu.cfs_quota_us"
|
||||
|
||||
period, err := readIntFromFile(periodPath)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
quota, err := readIntFromFile(quotaPath)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if quota == -1 {
|
||||
return uint64(runtime.NumCPU()), nil
|
||||
}
|
||||
|
||||
if period == 0 {
|
||||
return 0, fmt.Errorf("period not zero")
|
||||
}
|
||||
|
||||
return uint64(quota / period), nil
|
||||
}
|
||||
|
||||
func readIntFromFile(filePath string) (int, error) {
|
||||
data, err := os.ReadFile(filePath)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
str := strings.TrimSpace(string(data))
|
||||
value, err := strconv.Atoi(str)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return value, nil
|
||||
}
|
||||
|
||||
func readCPUUsage(path string) (map[string]uint64, error) {
|
||||
cpuacctPath := path + "/cpuacct.stat"
|
||||
output, err := os.ReadFile(cpuacctPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cpuUsage := make(map[string]uint64)
|
||||
lines := strings.Split(string(output), "\n")
|
||||
for _, line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
parts := strings.Fields(line)
|
||||
if len(parts) != 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
key := parts[0]
|
||||
valueStr := parts[1]
|
||||
value, err := strconv.ParseUint(valueStr, 10, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cpuUsage[key] = value
|
||||
}
|
||||
cpuUsage["total"] = uint64(time.Now().UnixNano())
|
||||
return cpuUsage, nil
|
||||
}
|
||||
|
||||
// UserHZtons because kernel USER_HZ = 100, the default value set to 10,000,000
|
||||
const (
|
||||
UserHZtons = 10000000
|
||||
USERHZ = 100
|
||||
)
|
||||
|
||||
func calculateCPUUsage(info *containerCPUInfo, currUsage map[string]uint64) error {
|
||||
deltaTotal := currUsage["total"] - info.prevUsage["total"]
|
||||
deltaUser := currUsage["user"] - info.prevUsage["user"]
|
||||
deltaSys := currUsage["system"] - info.prevUsage["system"]
|
||||
|
||||
cpuCores, err := GetCPUCoresInCgroup(info.path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("get cgroup cpu err")
|
||||
}
|
||||
|
||||
if cpuCores == 0 || deltaTotal == 0 {
|
||||
return fmt.Errorf("division by zero error")
|
||||
}
|
||||
|
||||
log.Debugf("cpuidle calculate core %v currUsage %v prevUsage %v", cpuCores, currUsage, info.prevUsage)
|
||||
info.nowUsageP["cpuUser"] = deltaUser * UserHZtons * USERHZ / deltaTotal / cpuCores
|
||||
info.nowUsageP["cpuSys"] = deltaSys * UserHZtons * USERHZ / deltaTotal / cpuCores
|
||||
return nil
|
||||
}
|
||||
|
||||
type containerCPUInfo struct {
|
||||
prevUsage map[string]uint64
|
||||
prevUsageP map[string]uint64
|
||||
nowUsageP map[string]uint64
|
||||
deltaUser int64
|
||||
deltaSys int64
|
||||
timestamp int64
|
||||
path string
|
||||
alive bool
|
||||
}
|
||||
|
||||
// cpuIdleIDMap is the container information
|
||||
type cpuIdleIDMap map[string]*containerCPUInfo
|
||||
|
||||
func updateCPUIdleIDMap(m cpuIdleIDMap) error {
|
||||
containers, err := pod.GetNormalContainers()
|
||||
if err != nil {
|
||||
return fmt.Errorf("GetNormalContainers: %w", err)
|
||||
}
|
||||
|
||||
for _, container := range containers {
|
||||
_, ok := m[container.ID]
|
||||
if ok {
|
||||
m[container.ID].path = filepath.Join(cgrouputil.V1CpuPath(), container.CgroupSuffix)
|
||||
m[container.ID].alive = true
|
||||
} else {
|
||||
temp := &containerCPUInfo{
|
||||
prevUsage: map[string]uint64{
|
||||
"user": 0,
|
||||
"system": 0,
|
||||
"total": 0,
|
||||
},
|
||||
prevUsageP: map[string]uint64{
|
||||
"cpuUser": 0,
|
||||
"cpuSys": 0,
|
||||
},
|
||||
nowUsageP: map[string]uint64{
|
||||
"cpuUser": 0,
|
||||
"cpuSys": 0,
|
||||
},
|
||||
deltaUser: 0,
|
||||
deltaSys: 0,
|
||||
timestamp: 0,
|
||||
path: filepath.Join(cgrouputil.V1CpuPath(), container.CgroupSuffix),
|
||||
alive: true,
|
||||
}
|
||||
m[container.ID] = temp
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var cpuIdleIdMap = make(cpuIdleIDMap)
|
||||
|
||||
func cpuIdleDetect(ctx context.Context) (string, error) {
|
||||
// get config info
|
||||
userth := conf.Get().Tracing.Cpuidle.CgUserth
|
||||
deltauserth := conf.Get().Tracing.Cpuidle.CgDeltaUserth
|
||||
systh := conf.Get().Tracing.Cpuidle.CgSysth
|
||||
deltasysth := conf.Get().Tracing.Cpuidle.CgDeltaSysth
|
||||
usageth := conf.Get().Tracing.Cpuidle.CgUsageth
|
||||
deltausageth := conf.Get().Tracing.Cpuidle.CgDeltaUsageth
|
||||
step := conf.Get().Tracing.Cpuidle.CgStep
|
||||
graceth := conf.Get().Tracing.Cpuidle.CgGrace
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return "", types.ErrExitByCancelCtx
|
||||
case <-time.After(time.Duration(step) * time.Second):
|
||||
if err := updateCPUIdleIDMap(cpuIdleIdMap); err != nil {
|
||||
return "", err
|
||||
}
|
||||
for containerID, v := range cpuIdleIdMap {
|
||||
if !v.alive {
|
||||
delete(cpuIdleIdMap, containerID)
|
||||
} else {
|
||||
v.alive = false
|
||||
currUsage, err := readCPUUsage(v.path)
|
||||
if err != nil {
|
||||
log.Debugf("cpuidle failed to read %s CPU usage: %s", v.path, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if v.prevUsage["user"] == 0 && v.prevUsage["system"] == 0 && v.prevUsage["total"] == 0 {
|
||||
v.prevUsage = currUsage
|
||||
continue
|
||||
}
|
||||
|
||||
err = calculateCPUUsage(v, currUsage)
|
||||
if err != nil {
|
||||
log.Debugf("cpuidle calculate err %s", err)
|
||||
continue
|
||||
}
|
||||
|
||||
v.deltaUser = int64(v.nowUsageP["cpuUser"] - v.prevUsageP["cpuUser"])
|
||||
v.deltaSys = int64(v.nowUsageP["cpuSys"] - v.prevUsageP["cpuSys"])
|
||||
v.prevUsageP["cpuUser"] = v.nowUsageP["cpuUser"]
|
||||
v.prevUsageP["cpuSys"] = v.nowUsageP["cpuSys"]
|
||||
v.prevUsage = currUsage
|
||||
nowtime := time.Now().Unix()
|
||||
gracetime := nowtime - v.timestamp
|
||||
nowUsage := v.nowUsageP["cpuUser"] + v.nowUsageP["cpuSys"]
|
||||
nowDeltaUsage := v.deltaUser + v.deltaSys
|
||||
|
||||
log.Debugf("cpuidle ctID %v user %v deltauser %v sys %v deltasys %v usage %v deltausage %v grace %v graceth %v",
|
||||
containerID, v.nowUsageP["cpuUser"], v.deltaUser, v.nowUsageP["cpuSys"], v.deltaSys, nowUsage, nowDeltaUsage, gracetime, graceth)
|
||||
|
||||
if gracetime > graceth {
|
||||
if (v.nowUsageP["cpuUser"] > userth && v.deltaUser > deltauserth) ||
|
||||
(v.nowUsageP["cpuSys"] > systh && v.deltaSys > deltasysth) ||
|
||||
(nowUsage > usageth && nowDeltaUsage > deltausageth) {
|
||||
v.timestamp = nowtime
|
||||
for key := range v.prevUsage {
|
||||
v.prevUsage[key] = 0
|
||||
}
|
||||
return containerID, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type cpuIdleTracing struct{}
|
||||
|
||||
// Cpuidle is an instance of cpuIdleTracer
|
||||
var (
|
||||
tracerTime time.Time
|
||||
)
|
||||
|
||||
type CPUIdleTracingData struct {
|
||||
NowUser uint64 `json:"nowuser"`
|
||||
UserThreshold uint64 `json:"userthreshold"`
|
||||
DeltaUser int64 `json:"deltauser"`
|
||||
DeltaUserTH int64 `json:"deltauserth"`
|
||||
NowSys uint64 `json:"nowsys"`
|
||||
SysThreshold uint64 `json:"systhreshold"`
|
||||
DeltaSys int64 `json:"deltasys"`
|
||||
DeltaSysTH int64 `json:"deltasysth"`
|
||||
NowUsage uint64 `json:"nowusage"`
|
||||
UsageThreshold uint64 `json:"usagethreshold"`
|
||||
DeltaUsage int64 `json:"deltausage"`
|
||||
DeltaUsageTH int64 `json:"deltausageth"`
|
||||
FlameData []flamegraph.FrameData `json:"flamedata"`
|
||||
}
|
||||
|
||||
// Start detect work, load bpf and wait data form perfevent
|
||||
func (c *cpuIdleTracing) Start(ctx context.Context) error {
|
||||
// TODO: Verify the conditions for startup.
|
||||
containerID, err := cpuIdleDetect(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
tracerTime = time.Now()
|
||||
dur := conf.Get().Tracing.Cpuidle.CgUsageToolduration
|
||||
durstr := strconv.FormatInt(dur, 10)
|
||||
|
||||
// exec tracerperf
|
||||
cmdctx, cancel := context.WithTimeout(ctx, time.Duration(dur+30)*time.Second)
|
||||
defer cancel()
|
||||
|
||||
log.Infof("cpuidle exec tracerperf ctid %v dur %v", containerID, durstr)
|
||||
cmd := exec.CommandContext(cmdctx, "./tracer/perf.bin", "--casename", "cpuidle.o", "--container-id", containerID, "--dur", durstr)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
log.Errorf("cpuidle cmd output %v", strings.TrimSuffix(string(output), "\n"))
|
||||
return fmt.Errorf("cpuidle tracerperf exec err: %w", err)
|
||||
}
|
||||
|
||||
// parse json
|
||||
log.Infof("cpuidle parse json")
|
||||
tracerData := CPUIdleTracingData{}
|
||||
err = json.Unmarshal(output, &tracerData.FlameData)
|
||||
if err != nil {
|
||||
return fmt.Errorf("parse JSON err: %w", err)
|
||||
}
|
||||
|
||||
// save
|
||||
log.Infof("cpuidle upload ES")
|
||||
log.Debugf("cpuidle FlameData %v", tracerData.FlameData)
|
||||
tracerData.NowUser = cpuIdleIdMap[containerID].nowUsageP["cpuUser"]
|
||||
tracerData.UserThreshold = conf.Get().Tracing.Cpuidle.CgUserth
|
||||
tracerData.DeltaUser = cpuIdleIdMap[containerID].deltaUser
|
||||
tracerData.DeltaUserTH = conf.Get().Tracing.Cpuidle.CgDeltaUserth
|
||||
tracerData.NowSys = cpuIdleIdMap[containerID].nowUsageP["cpuSys"]
|
||||
tracerData.SysThreshold = conf.Get().Tracing.Cpuidle.CgSysth
|
||||
tracerData.DeltaSys = cpuIdleIdMap[containerID].deltaSys
|
||||
tracerData.DeltaSysTH = conf.Get().Tracing.Cpuidle.CgDeltaSysth
|
||||
tracerData.NowUsage = cpuIdleIdMap[containerID].nowUsageP["cpuSys"] + cpuIdleIdMap[containerID].nowUsageP["cpuUser"]
|
||||
tracerData.UsageThreshold = conf.Get().Tracing.Cpuidle.CgUsageth
|
||||
tracerData.DeltaUsage = cpuIdleIdMap[containerID].deltaUser + cpuIdleIdMap[containerID].deltaSys
|
||||
tracerData.DeltaUsageTH = conf.Get().Tracing.Cpuidle.CgDeltaUsageth
|
||||
storage.Save("cpuidle", containerID, tracerTime, &tracerData)
|
||||
log.Infof("cpuidle upload ES end")
|
||||
return err
|
||||
}
|
|
@ -0,0 +1,182 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package autotracing
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/internal/conf"
|
||||
"huatuo-bamai/internal/flamegraph"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/storage"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
"huatuo-bamai/pkg/types"
|
||||
)
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("cpusys", newCpuSys)
|
||||
}
|
||||
|
||||
func newCpuSys() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &cpuSysTracing{},
|
||||
Internal: 20,
|
||||
Flag: tracing.FlagTracing,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// CPUStats structure that records cpu usage
|
||||
type CPUStats struct {
|
||||
system uint64
|
||||
total uint64
|
||||
}
|
||||
|
||||
func CpuSysDetect(ctx context.Context) (uint64, int64, error) {
|
||||
var (
|
||||
percpuStats CPUStats
|
||||
pervSys uint64
|
||||
deltaSys int64
|
||||
err error
|
||||
)
|
||||
sysdelta := conf.Get().Tracing.Cpusys.CPUSysDelta
|
||||
sysstep := conf.Get().Tracing.Cpusys.CPUSysStep
|
||||
systh := conf.Get().Tracing.Cpusys.CPUSysth
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return 0, 0, types.ErrExitByCancelCtx
|
||||
case <-time.After(time.Duration(sysstep) * time.Second):
|
||||
if percpuStats.total == 0 {
|
||||
percpuStats, err = getCPUStats()
|
||||
if err != nil {
|
||||
return 0, 0, fmt.Errorf("get cpuStats err %w", err)
|
||||
}
|
||||
time.Sleep(1 * time.Second)
|
||||
continue
|
||||
}
|
||||
cpuStats, err := getCPUStats()
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
systotal := cpuStats.total - percpuStats.total
|
||||
if systotal == 0 {
|
||||
return 0, 0, fmt.Errorf("systotal is ZERO")
|
||||
}
|
||||
sys := (cpuStats.system - percpuStats.system) * 100 / systotal
|
||||
if pervSys != 0 {
|
||||
deltaSys = int64(sys - pervSys)
|
||||
}
|
||||
|
||||
log.Debugf("cpusys alarm sys %v pervsys %v deltasys %v", sys, pervSys, deltaSys)
|
||||
pervSys = sys
|
||||
percpuStats = cpuStats
|
||||
|
||||
if sys > systh || deltaSys > sysdelta {
|
||||
return sys, deltaSys, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func getCPUStats() (CPUStats, error) {
|
||||
statData, err := os.ReadFile("/proc/stat")
|
||||
if err != nil {
|
||||
return CPUStats{}, err
|
||||
}
|
||||
|
||||
lines := strings.Split(string(statData), "\n")
|
||||
for _, line := range lines {
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) < 5 {
|
||||
continue
|
||||
}
|
||||
|
||||
if fields[0] == "cpu" {
|
||||
var cpuStats CPUStats
|
||||
for i := 1; i < len(fields); i++ {
|
||||
value, err := strconv.ParseUint(fields[i], 10, 64)
|
||||
if err != nil {
|
||||
return CPUStats{}, err
|
||||
}
|
||||
cpuStats.total += value
|
||||
if i == 3 {
|
||||
cpuStats.system = value
|
||||
}
|
||||
}
|
||||
return cpuStats, nil
|
||||
}
|
||||
}
|
||||
return CPUStats{}, fmt.Errorf("failed to parse /proc/stat")
|
||||
}
|
||||
|
||||
type cpuSysTracing struct{}
|
||||
|
||||
type CpuSysTracingData struct {
|
||||
NowSys string `json:"now_sys"`
|
||||
SysThreshold string `json:"sys_threshold"`
|
||||
DeltaSys string `json:"delta_sys"`
|
||||
DeltaSysTh string `json:"delta_sys_th"`
|
||||
FlameData []flamegraph.FrameData `json:"flamedata"`
|
||||
}
|
||||
|
||||
// Start the tcpconnlat task.
|
||||
func (c *cpuSysTracing) Start(ctx context.Context) error {
|
||||
// TODO: Verify the conditions for startup.
|
||||
cpuSys, delta, err := CpuSysDetect(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
tracerTime := time.Now()
|
||||
dur := conf.Get().Tracing.Cpusys.CPUSysToolduration
|
||||
durstr := strconv.FormatInt(dur, 10)
|
||||
|
||||
// exec tracerperf
|
||||
cmdctx, cancel := context.WithTimeout(ctx, time.Duration(dur+30)*time.Second)
|
||||
defer cancel()
|
||||
|
||||
log.Infof("cpusys exec tracerperf dur %v", durstr)
|
||||
cmd := exec.CommandContext(cmdctx, "./tracer/perf.bin", "--casename", "cpusys.o", "--dur", durstr)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
log.Errorf("cpusys cmd output %v", strings.TrimSuffix(string(output), "\n"))
|
||||
return fmt.Errorf("cpusys tracerperf exec err: %w", err)
|
||||
}
|
||||
|
||||
// parse json
|
||||
log.Infof("cpusys parse json")
|
||||
tracerData := CpuSysTracingData{}
|
||||
err = json.Unmarshal(output, &tracerData.FlameData)
|
||||
if err != nil {
|
||||
return fmt.Errorf("parse JSON err: %w", err)
|
||||
}
|
||||
|
||||
// save
|
||||
log.Infof("cpusys upload ES")
|
||||
tracerData.NowSys = fmt.Sprintf("%d", cpuSys)
|
||||
tracerData.SysThreshold = fmt.Sprintf("%d", conf.Get().Tracing.Cpusys.CPUSysth)
|
||||
tracerData.DeltaSys = fmt.Sprintf("%d", delta)
|
||||
tracerData.DeltaSysTh = fmt.Sprintf("%d", conf.Get().Tracing.Cpusys.CPUSysDelta)
|
||||
storage.Save("cpusys", "", tracerTime, &tracerData)
|
||||
log.Infof("cpusys upload ES end")
|
||||
return err
|
||||
}
|
|
@ -0,0 +1,427 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package autotracing
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/internal/conf"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/pod"
|
||||
"huatuo-bamai/internal/storage"
|
||||
"huatuo-bamai/internal/utils/cgrouputil"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
"huatuo-bamai/pkg/types"
|
||||
|
||||
"github.com/google/cadvisor/utils/cpuload/netlink"
|
||||
"github.com/prometheus/procfs"
|
||||
"github.com/shirou/gopsutil/process"
|
||||
)
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("dload", newDload)
|
||||
}
|
||||
|
||||
func newDload() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &dloadTracing{},
|
||||
Internal: 30,
|
||||
Flag: tracing.FlagTracing,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type containerDloadInfo struct {
|
||||
path string
|
||||
name string
|
||||
container *pod.Container
|
||||
avgnrun [2]uint64
|
||||
load [2]float64
|
||||
avgnuni [2]uint64
|
||||
loaduni [2]float64
|
||||
alive bool
|
||||
}
|
||||
|
||||
type DloadTracingData struct {
|
||||
Avg float64 `json:"avg"`
|
||||
Threshold float64 `json:"threshold"`
|
||||
NrSleeping uint64 `json:"nr_sleeping"`
|
||||
NrRunning uint64 `json:"nr_running"`
|
||||
NrStopped uint64 `json:"nr_stopped"`
|
||||
NrUninterruptible uint64 `json:"nr_uninterruptible"`
|
||||
NrIoWait uint64 `json:"nr_iowait"`
|
||||
LoadAvg float64 `json:"load_avg"`
|
||||
DLoadAvg float64 `json:"dload_avg"`
|
||||
KnowIssue string `json:"known_issue"`
|
||||
InKnownList uint64 `json:"in_known_list"`
|
||||
Stack string `json:"stack"`
|
||||
}
|
||||
|
||||
func getStack(targetPid int32) (string, error) {
|
||||
procStack := "/proc/" + strconv.Itoa(int(targetPid)) + "/stack"
|
||||
content, err := os.ReadFile(procStack)
|
||||
if err != nil {
|
||||
log.Infof("%v", err)
|
||||
return "", err
|
||||
}
|
||||
|
||||
return string(content), nil
|
||||
}
|
||||
|
||||
const (
|
||||
isHost = 1
|
||||
isCgrp = 2
|
||||
)
|
||||
|
||||
func getUnTaskList(cgrpPath string, infoType int) ([]int32, error) {
|
||||
var pidList []int32
|
||||
var err error
|
||||
|
||||
if infoType == isCgrp {
|
||||
taskPath := cgrpPath + "/tasks"
|
||||
|
||||
tskfi, err := os.Open(taskPath)
|
||||
if err != nil {
|
||||
log.Infof("%v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
r := bufio.NewReader(tskfi)
|
||||
|
||||
for {
|
||||
lineBytes, err := r.ReadBytes('\n')
|
||||
line := strings.TrimSpace(string(lineBytes))
|
||||
if err != nil && err != io.EOF {
|
||||
log.Infof("fail to read tasklist: %v", err)
|
||||
break
|
||||
}
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
|
||||
pid, _ := strconv.ParseInt(line, 10, 32)
|
||||
pidList = append(pidList, int32(pid))
|
||||
}
|
||||
} else {
|
||||
procs, err := procfs.AllProcs()
|
||||
if err != nil {
|
||||
log.Infof("%v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, p := range procs {
|
||||
pidList = append(pidList, int32(p.PID))
|
||||
}
|
||||
}
|
||||
|
||||
return pidList, err
|
||||
}
|
||||
|
||||
func dumpUnTaskStack(tskList []int32, dumpType int) (string, error) {
|
||||
var infoTitle string
|
||||
var getValidStackinfo bool = false
|
||||
var strResult string = ""
|
||||
|
||||
stackInfo := new(bytes.Buffer)
|
||||
|
||||
switch dumpType {
|
||||
case isHost:
|
||||
infoTitle = "\nbacktrace of D process in Host:\n"
|
||||
case isCgrp:
|
||||
infoTitle = "\nbacktrace of D process in Cgroup:\n"
|
||||
}
|
||||
|
||||
for _, pid := range tskList {
|
||||
proc, err := process.NewProcess(pid)
|
||||
if err != nil {
|
||||
log.Debugf("fail to get process %d: %v", pid, err)
|
||||
continue
|
||||
}
|
||||
|
||||
status, err := proc.Status()
|
||||
if err != nil {
|
||||
log.Debugf("fail to get status %d: %v", pid, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if status == "D" || status == "U" {
|
||||
comm, err := proc.Name()
|
||||
if err != nil {
|
||||
log.Infof("%v", err)
|
||||
continue
|
||||
}
|
||||
stack, err := getStack(pid)
|
||||
if err != nil {
|
||||
log.Infof("%v", err)
|
||||
continue
|
||||
}
|
||||
if stack == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
fmt.Fprintf(stackInfo, "Comm: %s\tPid: %d\n%s\n", comm, pid, stack)
|
||||
getValidStackinfo = true
|
||||
}
|
||||
}
|
||||
|
||||
if getValidStackinfo {
|
||||
strResult = fmt.Sprintf("%s%s", infoTitle, stackInfo)
|
||||
}
|
||||
|
||||
return strResult, nil
|
||||
}
|
||||
|
||||
// dloadIDMap is the container information
|
||||
type dloadIDMap map[string]*containerDloadInfo
|
||||
|
||||
var dloadIdMap = make(dloadIDMap)
|
||||
|
||||
func updateIDMap(m dloadIDMap) error {
|
||||
containers, err := pod.GetAllContainers()
|
||||
if err != nil {
|
||||
return fmt.Errorf("GetAllContainers: %w", err)
|
||||
}
|
||||
|
||||
for _, container := range containers {
|
||||
if _, ok := m[container.ID]; ok {
|
||||
m[container.ID].name = container.CgroupSuffix
|
||||
m[container.ID].path = cgrouputil.NewCPU().Path(container.CgroupSuffix)
|
||||
m[container.ID].container = container
|
||||
m[container.ID].alive = true
|
||||
continue
|
||||
}
|
||||
|
||||
m[container.ID] = &containerDloadInfo{
|
||||
path: cgrouputil.NewCPU().Path(container.CgroupSuffix),
|
||||
name: container.CgroupSuffix,
|
||||
container: container,
|
||||
alive: true,
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
const (
|
||||
fShift = 11
|
||||
fixed1 = 1 << fShift
|
||||
exp1 = 1884
|
||||
exp5 = 2014
|
||||
exp15 = 2037
|
||||
)
|
||||
|
||||
func calcLoad(load, exp, active uint64) uint64 {
|
||||
var newload uint64
|
||||
|
||||
newload = load*exp + active*(fixed1-exp)
|
||||
newload += 1 << (fShift - 1)
|
||||
|
||||
return newload / fixed1
|
||||
}
|
||||
|
||||
func calcLoadavg(avgnrun [2]uint64, active uint64) (avgnresult [2]uint64) {
|
||||
if active > 0 {
|
||||
active *= fixed1
|
||||
} else {
|
||||
active = 0
|
||||
}
|
||||
|
||||
avgnresult[0] = calcLoad(avgnrun[0], exp1, active)
|
||||
avgnresult[1] = calcLoad(avgnrun[1], exp5, active)
|
||||
|
||||
return avgnresult
|
||||
}
|
||||
|
||||
func loadInt(x uint64) (r uint64) {
|
||||
r = x >> fShift
|
||||
return r
|
||||
}
|
||||
|
||||
func loadFrac(x uint64) (r uint64) {
|
||||
r = loadInt((x & (fixed1 - 1)) * 100)
|
||||
return r
|
||||
}
|
||||
|
||||
func getAvenrun(avgnrun [2]uint64, offset uint64, shift int) (loadavgNew [2]float64) {
|
||||
var loads [2]uint64
|
||||
|
||||
loads[0] = (avgnrun[0] + offset) << shift
|
||||
loads[1] = (avgnrun[1] + offset) << shift
|
||||
|
||||
loadavgNew[0] = float64(loadInt(loads[0])) +
|
||||
float64(loadFrac(loads[0]))/float64(100)
|
||||
|
||||
loadavgNew[1] = float64(loadInt(loads[1])) +
|
||||
float64(loadFrac(loads[1]))/float64(100)
|
||||
|
||||
return loadavgNew
|
||||
}
|
||||
|
||||
func updateLoad(info *containerDloadInfo, nrRunning, nrUninterruptible uint64) {
|
||||
info.avgnrun = calcLoadavg(info.avgnrun, nrRunning+nrUninterruptible)
|
||||
info.load = getAvenrun(info.avgnrun, fixed1/200, 0)
|
||||
info.avgnuni = calcLoadavg(info.avgnuni, nrUninterruptible)
|
||||
info.loaduni = getAvenrun(info.avgnuni, fixed1/200, 0)
|
||||
}
|
||||
|
||||
func detect(ctx context.Context) (*containerDloadInfo, string, *DloadTracingData, error) {
|
||||
var caseData DloadTracingData
|
||||
|
||||
n, err := netlink.New()
|
||||
if err != nil {
|
||||
log.Infof("Failed to create cpu load util: %s", err)
|
||||
return nil, "", nil, err
|
||||
}
|
||||
defer n.Stop()
|
||||
|
||||
dloadThresh := conf.Get().Tracing.Dload.ThresholdLoad
|
||||
monitorGap := conf.Get().Tracing.Dload.MonitorGap
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil, "", nil, types.ErrExitByCancelCtx
|
||||
default:
|
||||
if err := updateIDMap(dloadIdMap); err != nil {
|
||||
return nil, "", nil, err
|
||||
}
|
||||
for k, v := range dloadIdMap {
|
||||
if !v.alive {
|
||||
delete(dloadIdMap, k)
|
||||
} else {
|
||||
v.alive = false
|
||||
|
||||
timeStartMonitor := v.container.StartedAt.Add(time.Second * time.Duration(monitorGap))
|
||||
|
||||
if time.Now().Before(timeStartMonitor) {
|
||||
log.Debugf("%s were just started, we'll start monitoring it later.", v.container.Hostname)
|
||||
continue
|
||||
}
|
||||
|
||||
stats, err := n.GetCpuLoad(v.name, v.path)
|
||||
if err != nil {
|
||||
log.Debugf("failed to get %s load, probably the container has been deleted: %s", v.container.Hostname, err)
|
||||
continue
|
||||
}
|
||||
|
||||
updateLoad(v, stats.NrRunning, stats.NrUninterruptible)
|
||||
|
||||
if v.loaduni[0] > dloadThresh {
|
||||
logTitle := fmt.Sprintf("Avg=%0.2f Threshold=%0.2f %+v ", v.loaduni[0], dloadThresh, stats)
|
||||
logBody := fmt.Sprintf("LoadAvg=%0.2f, DLoadAvg=%0.2f", v.load[0], v.loaduni[0])
|
||||
logLoad := fmt.Sprintf("%s%s", logTitle, logBody)
|
||||
|
||||
log.Infof("dload event %s", logLoad)
|
||||
|
||||
caseData.Avg = v.loaduni[0]
|
||||
caseData.Threshold = dloadThresh
|
||||
caseData.NrSleeping = stats.NrSleeping
|
||||
caseData.NrRunning = stats.NrRunning
|
||||
caseData.NrStopped = stats.NrStopped
|
||||
caseData.NrUninterruptible = stats.NrUninterruptible
|
||||
caseData.NrIoWait = stats.NrIoWait
|
||||
caseData.LoadAvg = v.load[0]
|
||||
caseData.DLoadAvg = v.loaduni[0]
|
||||
|
||||
return v, logLoad, &caseData, err
|
||||
}
|
||||
}
|
||||
}
|
||||
time.Sleep(5 * time.Second)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func dumpInfo(info *containerDloadInfo, logLoad string, caseData *DloadTracingData) error {
|
||||
var tskList []int32
|
||||
var err error
|
||||
var stackCgrp string
|
||||
var stackHost string
|
||||
var containerHostNamespace string
|
||||
|
||||
cgrpPath := info.path
|
||||
containerID := info.container.ID
|
||||
containerHostNamespace = info.container.LabelHostNamespace()
|
||||
|
||||
tskList, err = getUnTaskList(cgrpPath, isCgrp)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get cgroup task list: %w", err)
|
||||
}
|
||||
|
||||
stackCgrp, err = dumpUnTaskStack(tskList, isCgrp)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to dump cgroup task backtrace: %w", err)
|
||||
}
|
||||
|
||||
tskList, err = getUnTaskList("", isHost)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get host task list: %w", err)
|
||||
}
|
||||
|
||||
stackHost, err = dumpUnTaskStack(tskList, isHost)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to dump host task backtrace: %w", err)
|
||||
}
|
||||
|
||||
// We'll not record it if got no cgroup stack info.
|
||||
if stackCgrp == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check if this is caused by known issues.
|
||||
knownIssue, inKnownList := conf.KnownIssueSearch(stackCgrp, containerHostNamespace, "")
|
||||
if knownIssue != "" {
|
||||
caseData.KnowIssue = knownIssue
|
||||
caseData.InKnownList = inKnownList
|
||||
} else {
|
||||
caseData.KnowIssue = "none"
|
||||
caseData.InKnownList = inKnownList
|
||||
}
|
||||
|
||||
// save storage
|
||||
caseData.Stack = fmt.Sprintf("%s%s", stackCgrp, stackHost)
|
||||
storage.Save("ctnDLoad", containerID, time.Now(), caseData)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type dloadTracing struct{}
|
||||
|
||||
// Start detect work, monitor the load of containers
|
||||
func (c *dloadTracing) Start(ctx context.Context) error {
|
||||
cntInfo, logLoad, caseData, err := detect(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.Infof("caller requests stop !!!")
|
||||
default:
|
||||
err = dumpInfo(cntInfo, logLoad, caseData)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to dump info: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
|
@ -0,0 +1,250 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package autotracing
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"os"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/internal/conf"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/storage"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
|
||||
"github.com/shirou/gopsutil/process"
|
||||
)
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("membust", newMemBurst)
|
||||
}
|
||||
|
||||
func newMemBurst() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &memBurstTracing{},
|
||||
Internal: 10,
|
||||
Flag: tracing.FlagTracing,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type memBurstTracing struct{}
|
||||
|
||||
type MemoryTracingData struct {
|
||||
TopMemoryUsage []ProcessMemoryInfo `json:"top_memory_usage"`
|
||||
}
|
||||
|
||||
// ProcessMemoryInfo holds process information for sorting
|
||||
type ProcessMemoryInfo struct {
|
||||
PID int32 `json:"pid"`
|
||||
ProcessName string `json:"process_name"`
|
||||
MemorySize uint64 `json:"memory_size"`
|
||||
}
|
||||
|
||||
// ByMemory is used to sorting processes by memory usage
|
||||
type ByMemory []ProcessMemoryInfo
|
||||
|
||||
func (a ByMemory) Len() int { return len(a) }
|
||||
func (a ByMemory) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
||||
func (a ByMemory) Less(i, j int) bool { return a[i].MemorySize > a[j].MemorySize }
|
||||
|
||||
// getTopMemoryProcesses returns the top N processes consuming the most memory.
|
||||
func getTopMemoryProcesses(topN int) ([]ProcessMemoryInfo, error) {
|
||||
processes, err := process.Processes()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var pmInfos []ProcessMemoryInfo
|
||||
for _, p := range processes {
|
||||
memInfo, err := p.MemoryInfo()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
name, err := p.Name()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
pmInfos = append(pmInfos, ProcessMemoryInfo{
|
||||
PID: p.Pid,
|
||||
ProcessName: name,
|
||||
MemorySize: memInfo.RSS,
|
||||
})
|
||||
}
|
||||
|
||||
// Sort the processes by memory usage
|
||||
sort.Sort(ByMemory(pmInfos))
|
||||
|
||||
if len(pmInfos) < topN {
|
||||
return pmInfos, nil
|
||||
}
|
||||
return pmInfos[:topN], nil
|
||||
}
|
||||
|
||||
// pass required keys and readMemInfo will return their values according to /proc/meminfo
|
||||
func readMemInfo(requiredKeys map[string]bool) (map[string]int, error) {
|
||||
file, err := os.Open("/proc/meminfo")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
results := make(map[string]int)
|
||||
scanner := bufio.NewScanner(file)
|
||||
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) < 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
key := strings.Trim(fields[0], ":")
|
||||
if _, ok := requiredKeys[key]; ok {
|
||||
value, err := strconv.Atoi(strings.Trim(fields[1], " kB"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
results[key] = value
|
||||
|
||||
if len(results) == len(requiredKeys) {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func checkAndRecordMemoryUsage(currentIndex *int, isHistoryFull *bool,
|
||||
memTotal int, history []int, historyWindowLength, topNProcesses int,
|
||||
burstRatio float64, anonThreshold int,
|
||||
) ([]ProcessMemoryInfo, error) {
|
||||
memInfo, err := readMemInfo(map[string]bool{
|
||||
"Active(anon)": true,
|
||||
"Inactive(anon)": true,
|
||||
})
|
||||
if err != nil {
|
||||
log.Errorf("Error reading memory info: %v\n", err)
|
||||
return []ProcessMemoryInfo{}, nil
|
||||
}
|
||||
|
||||
currentSum := memInfo["Active(anon)"] + memInfo["Inactive(anon)"]
|
||||
history[*currentIndex] = currentSum
|
||||
|
||||
if *currentIndex == historyWindowLength-1 {
|
||||
*isHistoryFull = true
|
||||
}
|
||||
|
||||
*currentIndex = (*currentIndex + 1) % historyWindowLength
|
||||
|
||||
log.Debugf("Checked memory status. active_anon=%v KiB inactive_anon=%v KiB\n", memInfo["Active(anon)"], memInfo["Inactive(anon)"])
|
||||
|
||||
if *isHistoryFull {
|
||||
oldestSum := history[*currentIndex] // current index is the oldest element
|
||||
if float64(currentSum) >= burstRatio*float64(oldestSum) && currentSum >= (anonThreshold*memTotal/100) {
|
||||
topProcesses, err := getTopMemoryProcesses(topNProcesses)
|
||||
if err == nil {
|
||||
return topProcesses, nil
|
||||
}
|
||||
log.Errorf("Fail to getTopMemoryProcesses")
|
||||
return []ProcessMemoryInfo{}, err
|
||||
}
|
||||
}
|
||||
return []ProcessMemoryInfo{}, nil
|
||||
}
|
||||
|
||||
// Core function
|
||||
func (c *memBurstTracing) Start(ctx context.Context) error {
|
||||
var err error
|
||||
|
||||
historyWindowLength := conf.Get().Tracing.MemoryBurst.HistoryWindowLength
|
||||
sampleInterval := conf.Get().Tracing.MemoryBurst.SampleInterval
|
||||
silencePeriod := conf.Get().Tracing.MemoryBurst.SilencePeriod
|
||||
topNProcesses := conf.Get().Tracing.MemoryBurst.TopNProcesses
|
||||
burstRatio := conf.Get().Tracing.MemoryBurst.BurstRatio
|
||||
anonThreshold := conf.Get().Tracing.MemoryBurst.AnonThreshold
|
||||
|
||||
memInfo, err := readMemInfo(map[string]bool{"MemTotal": true})
|
||||
if err != nil {
|
||||
log.Infof("Error reading MemTotal from memory info: %v\n", err)
|
||||
return err
|
||||
}
|
||||
memTotal := memInfo["MemTotal"]
|
||||
history := make([]int, historyWindowLength) // circular buffer
|
||||
var currentIndex int
|
||||
var isHistoryFull bool // don't check memory burst until we have enough data
|
||||
var topProcesses []ProcessMemoryInfo
|
||||
lastReportTime := time.Now().Add(-24 * time.Hour)
|
||||
|
||||
_, err = checkAndRecordMemoryUsage(¤tIndex, &isHistoryFull, memTotal, history, historyWindowLength, topNProcesses, burstRatio, anonThreshold)
|
||||
if err != nil {
|
||||
log.Errorf("Fail to checkAndRecordMemoryUsage")
|
||||
return err
|
||||
}
|
||||
|
||||
for {
|
||||
ticker := time.NewTicker(time.Duration(sampleInterval) * time.Second)
|
||||
stoppedByUser := false
|
||||
|
||||
for range ticker.C {
|
||||
topProcesses, err = checkAndRecordMemoryUsage(¤tIndex, &isHistoryFull, memTotal, history, historyWindowLength, topNProcesses, burstRatio, anonThreshold)
|
||||
if err != nil {
|
||||
log.Errorf("Fail to checkAndRecordMemoryUsage")
|
||||
return err
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.Info("Caller request to stop")
|
||||
stoppedByUser = true
|
||||
default:
|
||||
}
|
||||
|
||||
if len(topProcesses) > 0 || stoppedByUser {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
ticker.Stop()
|
||||
|
||||
if stoppedByUser {
|
||||
break
|
||||
}
|
||||
|
||||
currentTime := time.Now()
|
||||
diff := currentTime.Sub(lastReportTime).Seconds()
|
||||
if diff < float64(silencePeriod) {
|
||||
continue
|
||||
}
|
||||
|
||||
lastReportTime = currentTime
|
||||
|
||||
// save storage
|
||||
caseData := &MemoryTracingData{
|
||||
TopMemoryUsage: topProcesses,
|
||||
}
|
||||
storage.Save("memburst", "", time.Now(), caseData)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,264 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package events
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/internal/bpf"
|
||||
"huatuo-bamai/internal/conf"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/storage"
|
||||
"huatuo-bamai/internal/utils/bpfutil"
|
||||
"huatuo-bamai/internal/utils/netutil"
|
||||
"huatuo-bamai/internal/utils/symbolutil"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
)
|
||||
|
||||
const (
|
||||
tracerName = "dropwatch"
|
||||
logPrefix = tracerName + ": "
|
||||
|
||||
// type
|
||||
typeTCPCommonDrop = 1
|
||||
typeTCPSynFlood = 2
|
||||
typeTCPListenOverflowHandshake1 = 3
|
||||
typeTCPListenOverflowHandshake3 = 4
|
||||
)
|
||||
|
||||
// from include/net/tcp_states.h
|
||||
var tcpstateMap = []string{
|
||||
"<nil>", // 0
|
||||
"ESTABLISHED",
|
||||
"SYN_SENT",
|
||||
"SYN_RECV",
|
||||
"FIN_WAIT1",
|
||||
"FIN_WAIT2",
|
||||
"TIME_WAIT",
|
||||
"CLOSE",
|
||||
"CLOSE_WAIT",
|
||||
"LAST_ACK",
|
||||
"LISTEN",
|
||||
"CLOSING",
|
||||
"NEW_SYN_RECV",
|
||||
}
|
||||
|
||||
var typeMap = map[uint8]string{
|
||||
typeTCPCommonDrop: "common_drop",
|
||||
typeTCPSynFlood: "syn_flood",
|
||||
typeTCPListenOverflowHandshake1: "listen_overflow_handshake1",
|
||||
typeTCPListenOverflowHandshake3: "listen_overflow_handshake3",
|
||||
}
|
||||
|
||||
type perfEventT struct {
|
||||
TgidPid uint64 `json:"tgid_pid"`
|
||||
Saddr uint32 `json:"saddr"`
|
||||
Daddr uint32 `json:"daddr"`
|
||||
Sport uint16 `json:"sport"`
|
||||
Dport uint16 `json:"dport"`
|
||||
Seq uint32 `json:"seq"`
|
||||
AckSeq uint32 `json:"ack_seq"`
|
||||
QueueMapping uint32 `json:"queue_mapping"`
|
||||
PktLen uint64 `json:"pkt_len"`
|
||||
StackSize int64 `json:"stack_size"`
|
||||
Stack [symbolutil.KsymbolStackMaxDepth]uint64 `json:"stack"`
|
||||
SkMaxAckBacklog uint32 `json:"sk_max_ack_backlog"`
|
||||
State uint8 `json:"state"`
|
||||
Type uint8 `json:"type"`
|
||||
Comm [bpfutil.TaskCommLen]byte `json:"comm"`
|
||||
}
|
||||
|
||||
type DropWatchTracingData struct {
|
||||
Type string `json:"type"`
|
||||
Comm string `json:"comm"`
|
||||
Pid uint64 `json:"pid"`
|
||||
Saddr string `json:"saddr"`
|
||||
Daddr string `json:"daddr"`
|
||||
Sport uint16 `json:"sport"`
|
||||
Dport uint16 `json:"dport"`
|
||||
SrcHostname string `json:"src_hostname"`
|
||||
DestHostname string `json:"dest_hostname"`
|
||||
MaxAckBacklog uint32 `json:"max_ack_backlog"`
|
||||
Seq uint32 `json:"seq"`
|
||||
AckSeq uint32 `json:"ack_seq"`
|
||||
QueueMapping uint32 `json:"queue_mapping"`
|
||||
PktLen uint64 `json:"pkt_len"`
|
||||
State string `json:"state"`
|
||||
Stack string `json:"stack"`
|
||||
}
|
||||
|
||||
type dropWatchTracing struct{}
|
||||
|
||||
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/dropwatch.c -o $BPF_DIR/dropwatch.o
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing(tracerName, newDropWatch)
|
||||
}
|
||||
|
||||
func newDropWatch() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &dropWatchTracing{},
|
||||
Internal: 10,
|
||||
Flag: tracing.FlagTracing,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Start starts the tracer.
|
||||
func (c *dropWatchTracing) Start(ctx context.Context) error {
|
||||
log.Info(logPrefix + "tracer will be starting.")
|
||||
|
||||
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf(logPrefix+"failed to load bpf: %w", err)
|
||||
}
|
||||
defer b.Close()
|
||||
|
||||
childCtx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
// attach
|
||||
reader, err := b.AttachAndEventPipe(childCtx, "perf_events", 8192)
|
||||
if err != nil {
|
||||
return fmt.Errorf(logPrefix+"failed to attach and event pipe: %w", err)
|
||||
}
|
||||
defer reader.Close()
|
||||
|
||||
// breaker
|
||||
b.WaitDetachByBreaker(childCtx, cancel)
|
||||
|
||||
log.Info(logPrefix + "tracer is waitting for event.")
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-childCtx.Done():
|
||||
log.Info(logPrefix + "tracer is stopped.")
|
||||
return nil
|
||||
default:
|
||||
var event perfEventT
|
||||
if err := reader.ReadInto(&event); err != nil {
|
||||
return fmt.Errorf(logPrefix+"failed to read from perf: %w", err)
|
||||
}
|
||||
|
||||
// format
|
||||
tracerTime := time.Now()
|
||||
tracerData := c.formatEvent(&event)
|
||||
|
||||
// ignore
|
||||
if c.ignore(tracerData) {
|
||||
log.Debugf(logPrefix+"ignore dropwatch data: %v", tracerData)
|
||||
continue
|
||||
}
|
||||
|
||||
// save storage
|
||||
storage.Save(tracerName, "", tracerTime, tracerData)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (c *dropWatchTracing) formatEvent(event *perfEventT) *DropWatchTracingData {
|
||||
// hostname
|
||||
saddr := netutil.InetNtop(event.Saddr).String()
|
||||
daddr := netutil.InetNtop(event.Daddr).String()
|
||||
srcHostname := "<nil>"
|
||||
destHostname := "<nil>"
|
||||
h, err := net.LookupAddr(saddr)
|
||||
if err == nil && len(h) > 0 {
|
||||
srcHostname = h[0]
|
||||
}
|
||||
|
||||
h, err = net.LookupAddr(daddr)
|
||||
if err == nil && len(h) > 0 {
|
||||
destHostname = h[0]
|
||||
}
|
||||
|
||||
// stack
|
||||
stacks := strings.Join(symbolutil.DumpKernelBackTrace(event.Stack[:], symbolutil.KsymbolStackMaxDepth).BackTrace, "\n")
|
||||
|
||||
// tracer data
|
||||
data := &DropWatchTracingData{
|
||||
Type: typeMap[event.Type],
|
||||
Comm: strings.TrimRight(string(event.Comm[:]), "\x00"),
|
||||
Pid: event.TgidPid >> 32,
|
||||
Saddr: saddr,
|
||||
Daddr: daddr,
|
||||
Sport: netutil.InetNtohs(event.Sport),
|
||||
Dport: netutil.InetNtohs(event.Dport),
|
||||
SrcHostname: srcHostname,
|
||||
DestHostname: destHostname,
|
||||
Seq: netutil.InetNtohl(event.Seq),
|
||||
AckSeq: netutil.InetNtohl(event.AckSeq),
|
||||
QueueMapping: event.QueueMapping,
|
||||
PktLen: event.PktLen,
|
||||
State: tcpstateMap[event.State],
|
||||
Stack: stacks,
|
||||
MaxAckBacklog: event.SkMaxAckBacklog,
|
||||
}
|
||||
|
||||
log.Debugf(logPrefix+"tracing data: %v", data)
|
||||
return data
|
||||
}
|
||||
|
||||
func (c *dropWatchTracing) ignore(data *DropWatchTracingData) bool {
|
||||
stack := strings.Split(data.Stack, "\n")
|
||||
// state: CLOSE_WAIT
|
||||
// stack:
|
||||
// 1. kfree_skb/ffffffff963047b0
|
||||
// 2. kfree_skb/ffffffff963047b0
|
||||
// 3. skb_rbtree_purge/ffffffff963089e0
|
||||
// 4. tcp_fin/ffffffff963ac200
|
||||
// 5. ...
|
||||
if data.State == "CLOSE_WAIT" {
|
||||
if len(stack) >= 3 && strings.HasPrefix(stack[2], "skb_rbtree_purge/") {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// stack:
|
||||
// 1. kfree_skb/ffffffff96d127b0
|
||||
// 2. kfree_skb/ffffffff96d127b0
|
||||
// 3. neigh_invalidate/ffffffff96d388b0
|
||||
// 4. neigh_timer_handler/ffffffff96d3a870
|
||||
// 5. ...
|
||||
if conf.Get().Tracing.Dropwatch.IgnoreNeighInvalidate {
|
||||
if len(stack) >= 3 && strings.HasPrefix(stack[2], "neigh_invalidate/") {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// stack:
|
||||
// 1. kfree_skb/ffffffff82283d10
|
||||
// 2. kfree_skb/ffffffff82283d10
|
||||
// 3. bnxt_tx_int/ffffffffc05c6f20
|
||||
// 4. __bnxt_poll_work_done/ffffffffc05c50c0
|
||||
// 5. ...
|
||||
|
||||
// stack:
|
||||
// 1. kfree_skb/ffffffffaba83d10
|
||||
// 2. kfree_skb/ffffffffaba83d10
|
||||
// 3. __bnxt_tx_int/ffffffffc045df90
|
||||
// 4. bnxt_tx_int/ffffffffc045e250
|
||||
// 5. ...
|
||||
if len(stack) >= 3 &&
|
||||
(strings.HasPrefix(stack[2], "bnxt_tx_int/") || strings.HasPrefix(stack[2], "__bnxt_tx_int/")) {
|
||||
return true
|
||||
}
|
||||
|
||||
// default: false
|
||||
return false
|
||||
}
|
|
@ -0,0 +1,126 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package events
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/internal/bpf"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/storage"
|
||||
"huatuo-bamai/internal/utils/bpfutil"
|
||||
"huatuo-bamai/internal/utils/kmsgutil"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
)
|
||||
|
||||
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/hungtask.c -o $BPF_DIR/hungtask.o
|
||||
|
||||
type hungTaskPerfEventData struct {
|
||||
Pid int32
|
||||
Comm [bpfutil.TaskCommLen]byte
|
||||
}
|
||||
|
||||
// HungTaskTracerData is the full data structure.
|
||||
type HungTaskTracerData struct {
|
||||
Pid int32 `json:"pid"`
|
||||
Comm string `json:"comm"`
|
||||
CPUsStack string `json:"cpus_stack"`
|
||||
BlockedProcessesStack string `json:"blocked_processes_stack"`
|
||||
}
|
||||
|
||||
type hungTaskTracing struct {
|
||||
hungtaskMetric []*metric.Data
|
||||
}
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("hungtask", newHungTask)
|
||||
}
|
||||
|
||||
func newHungTask() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &hungTaskTracing{
|
||||
hungtaskMetric: []*metric.Data{
|
||||
metric.NewGaugeData("happened", 0, "hungtask happened", nil),
|
||||
},
|
||||
},
|
||||
Internal: 10,
|
||||
Flag: tracing.FlagMetric | tracing.FlagTracing,
|
||||
}, nil
|
||||
}
|
||||
|
||||
var hungtaskCounter float64
|
||||
|
||||
func (c *hungTaskTracing) Update() ([]*metric.Data, error) {
|
||||
c.hungtaskMetric[0].Value = hungtaskCounter
|
||||
hungtaskCounter = 0
|
||||
return c.hungtaskMetric, nil
|
||||
}
|
||||
|
||||
func (c *hungTaskTracing) Start(ctx context.Context) error {
|
||||
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
|
||||
if err != nil {
|
||||
log.Infof("failed to LoadBpf, err: %v", err)
|
||||
return err
|
||||
}
|
||||
defer b.Close()
|
||||
|
||||
childCtx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
reader, err := b.AttachAndEventPipe(childCtx, "hungtask_perf_events", 8192)
|
||||
if err != nil {
|
||||
log.Infof("failed to AttachAndEventPipe, err: %v", err)
|
||||
return err
|
||||
}
|
||||
defer reader.Close()
|
||||
|
||||
b.WaitDetachByBreaker(childCtx, cancel)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-childCtx.Done():
|
||||
return nil
|
||||
default:
|
||||
var data hungTaskPerfEventData
|
||||
if err := reader.ReadInto(&data); err != nil {
|
||||
return fmt.Errorf("ReadFromPerfEvent fail: %w", err)
|
||||
}
|
||||
|
||||
cpusBT, err := kmsgutil.GetAllCPUsBT()
|
||||
if err != nil {
|
||||
cpusBT = err.Error()
|
||||
}
|
||||
blockedProcessesBT, err := kmsgutil.GetBlockedProcessesBT()
|
||||
if err != nil {
|
||||
blockedProcessesBT = err.Error()
|
||||
}
|
||||
|
||||
caseData := &HungTaskTracerData{
|
||||
Pid: data.Pid,
|
||||
Comm: strings.TrimRight(string(data.Comm[:]), "\x00"),
|
||||
CPUsStack: cpusBT,
|
||||
BlockedProcessesStack: blockedProcessesBT,
|
||||
}
|
||||
hungtaskCounter++
|
||||
|
||||
// save storage
|
||||
storage.Save("hungtask", "", time.Now(), caseData)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,147 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package events
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/internal/bpf"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/storage"
|
||||
"huatuo-bamai/internal/utils/bpfutil"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
|
||||
"github.com/vishvananda/netlink"
|
||||
)
|
||||
|
||||
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/lacp.c -o $BPF_DIR/lacp.o
|
||||
type lacpTracing struct {
|
||||
count uint64
|
||||
}
|
||||
|
||||
func init() {
|
||||
// bond mode4 (802.3ad) requires bonding.ko module,
|
||||
// the kprobe point is in bonding module, if not exist, should not load bpf
|
||||
if !isLacpEnv() {
|
||||
return
|
||||
}
|
||||
|
||||
tracing.RegisterEventTracing("lacp", newLACPTracing)
|
||||
}
|
||||
|
||||
func newLACPTracing() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &lacpTracing{},
|
||||
Internal: 60,
|
||||
Flag: tracing.FlagTracing | tracing.FlagMetric,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (lacp *lacpTracing) Start(ctx context.Context) (err error) {
|
||||
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Load lacp err: %w", err)
|
||||
}
|
||||
defer b.Close()
|
||||
|
||||
childCtx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
reader, err := b.AttachAndEventPipe(childCtx, "ad_event_map", 8192)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to AttachAndEventPipe, err: %w", err)
|
||||
}
|
||||
defer reader.Close()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-childCtx.Done():
|
||||
log.Info("lacp tracing is stopped.")
|
||||
return nil
|
||||
default:
|
||||
var tmp uint64
|
||||
if err := reader.ReadInto(&tmp); err != nil {
|
||||
return fmt.Errorf("read lacp perf event fail: %w", err)
|
||||
}
|
||||
|
||||
atomic.AddUint64(&lacp.count, 1)
|
||||
|
||||
bondInfo, err := readAllFiles("/proc/net/bonding")
|
||||
if err != nil {
|
||||
log.Warnf("read dir /proc/net/bonding err: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
tracerData := struct {
|
||||
Content string `json:"content"`
|
||||
}{
|
||||
Content: bondInfo,
|
||||
}
|
||||
|
||||
log.Debugf("bond info: %s", tracerData.Content)
|
||||
storage.Save("lacp", "", time.Now(), tracerData)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (lacp *lacpTracing) Update() ([]*metric.Data, error) {
|
||||
return []*metric.Data{
|
||||
metric.NewGaugeData("lacp", float64(atomic.LoadUint64(&lacp.count)),
|
||||
"lacp disabled count", nil),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func readAllFiles(dir string) (string, error) {
|
||||
var content string
|
||||
|
||||
return content, filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if info.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
content += path + "\n" + string(data)
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
func isLacpEnv() bool {
|
||||
links, err := netlink.LinkList()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, l := range links {
|
||||
if l.Type() == "bond" &&
|
||||
l.(*netlink.Bond).Mode == netlink.BOND_MODE_802_3AD {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
|
@ -0,0 +1,121 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package events
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/internal/bpf"
|
||||
"huatuo-bamai/internal/conf"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/pod"
|
||||
"huatuo-bamai/internal/storage"
|
||||
"huatuo-bamai/internal/utils/bpfutil"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
)
|
||||
|
||||
type memoryReclaimTracing struct{}
|
||||
|
||||
type memoryReclaimPerfEvent struct {
|
||||
Comm [bpfutil.TaskCommLen]byte
|
||||
Deltatime uint64
|
||||
CSS uint64
|
||||
Pid uint64
|
||||
}
|
||||
|
||||
// MemoryReclaimTracingData is the full data structure.
|
||||
type MemoryReclaimTracingData struct {
|
||||
Pid uint64 `json:"pid"`
|
||||
Comm string `json:"comm"`
|
||||
Deltatime uint64 `json:"deltatime"`
|
||||
}
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("memreclaim", newMemoryReclaim)
|
||||
}
|
||||
|
||||
func newMemoryReclaim() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &memoryReclaimTracing{},
|
||||
Internal: 5,
|
||||
Flag: tracing.FlagTracing,
|
||||
}, nil
|
||||
}
|
||||
|
||||
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/memory_reclaim.c -o $BPF_DIR/memory_reclaim.o
|
||||
|
||||
// Start detect work, load bpf and wait data form perfevent
|
||||
func (c *memoryReclaimTracing) Start(ctx context.Context) error {
|
||||
log.Infof("memory reclaim start.")
|
||||
|
||||
deltath := conf.Get().Tracing.MemoryReclaim.Deltath
|
||||
|
||||
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), map[string]any{"deltath": deltath})
|
||||
if err != nil {
|
||||
log.Infof("LoadBpf: %v", err)
|
||||
return err
|
||||
}
|
||||
defer b.Close()
|
||||
|
||||
childCtx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
reader, err := b.AttachAndEventPipe(childCtx, "reclaim_perf_events", 8192)
|
||||
if err != nil {
|
||||
log.Infof("AttachAndEventPipe: %v", err)
|
||||
return err
|
||||
}
|
||||
defer reader.Close()
|
||||
|
||||
b.WaitDetachByBreaker(childCtx, cancel)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-childCtx.Done():
|
||||
return nil
|
||||
default:
|
||||
var data memoryReclaimPerfEvent
|
||||
if err := reader.ReadInto(&data); err != nil {
|
||||
return fmt.Errorf("ReadFromPerfEvent fail: %w", err)
|
||||
}
|
||||
|
||||
container, err := pod.GetContainerByCSS(data.CSS, "cpu")
|
||||
if err != nil {
|
||||
return fmt.Errorf("GetContainerByCSS by CSS %d: %w", data.CSS, err)
|
||||
}
|
||||
|
||||
// We only care about the container and nothing else.
|
||||
// Though it may be unfair, that's just how life is.
|
||||
//
|
||||
// -- Tonghao Zhang, tonghao@bamaicloud.com
|
||||
if container == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// save storage
|
||||
tracingData := &MemoryReclaimTracingData{
|
||||
Pid: data.Pid,
|
||||
Comm: strings.Trim(string(data.Comm[:]), "\x00"),
|
||||
Deltatime: data.Deltatime,
|
||||
}
|
||||
|
||||
log.Infof("memory_reclaim saves storage: %+v", tracingData)
|
||||
storage.Save("memory_reclaim", container.ID, time.Now(), tracingData)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,229 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package events
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"slices"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/internal/conf"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/storage"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
|
||||
"github.com/vishvananda/netlink"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type linkStatusType uint8
|
||||
|
||||
const (
|
||||
linkStatusUnknown linkStatusType = iota
|
||||
linkStatusAdminUp
|
||||
linkStatusAdminDown
|
||||
linkStatusCarrierUp
|
||||
linkStatusCarrierDown
|
||||
maxLinkStatus
|
||||
)
|
||||
|
||||
func (l linkStatusType) String() string {
|
||||
return [...]string{"linkStatusUnknown", "linkStatusAdminUp", "linkStatusAdminDown", "linkStatusCarrierUp", "linkStatusCarrierDown"}[l]
|
||||
}
|
||||
|
||||
func flags2status(flags, change uint32) []linkStatusType {
|
||||
var status []linkStatusType
|
||||
|
||||
if change&unix.IFF_UP != 0 {
|
||||
if flags&unix.IFF_UP != 0 {
|
||||
status = append(status, linkStatusAdminUp)
|
||||
} else {
|
||||
status = append(status, linkStatusAdminDown)
|
||||
}
|
||||
}
|
||||
|
||||
if change&unix.IFF_LOWER_UP != 0 {
|
||||
if flags&unix.IFF_LOWER_UP != 0 {
|
||||
status = append(status, linkStatusCarrierUp)
|
||||
} else {
|
||||
status = append(status, linkStatusCarrierDown)
|
||||
}
|
||||
}
|
||||
|
||||
return status
|
||||
}
|
||||
|
||||
type netdevTracing struct {
|
||||
name string
|
||||
linkUpdateCh chan netlink.LinkUpdate
|
||||
linkDoneCh chan struct{}
|
||||
mu sync.Mutex
|
||||
ifFlagsMap map[string]uint32 // [ifname]ifinfomsg::if_flags
|
||||
metricsLinkStatusCountMap map[linkStatusType]map[string]int // [netdevEventType][ifname]count
|
||||
}
|
||||
|
||||
type netdevEventData struct {
|
||||
linkFlags uint32
|
||||
flagsChange uint32
|
||||
Ifname string `json:"ifname"`
|
||||
Index int `json:"index"`
|
||||
LinkStatus string `json:"linkstatus"`
|
||||
Mac string `json:"mac"`
|
||||
AtStart bool `json:"start"` // true: be scanned at start, false: event trigger
|
||||
}
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("netdev_event", newNetdevTracing)
|
||||
}
|
||||
|
||||
func newNetdevTracing() (*tracing.EventTracingAttr, error) {
|
||||
initMap := make(map[linkStatusType]map[string]int)
|
||||
for i := linkStatusUnknown; i < maxLinkStatus; i++ {
|
||||
initMap[i] = make(map[string]int)
|
||||
}
|
||||
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &netdevTracing{
|
||||
linkUpdateCh: make(chan netlink.LinkUpdate),
|
||||
linkDoneCh: make(chan struct{}),
|
||||
ifFlagsMap: make(map[string]uint32),
|
||||
metricsLinkStatusCountMap: initMap,
|
||||
name: "netdev_event",
|
||||
},
|
||||
Internal: 10,
|
||||
Flag: tracing.FlagTracing | tracing.FlagMetric,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (nt *netdevTracing) Start(ctx context.Context) (err error) {
|
||||
if err := nt.checkLinkStatus(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := netlink.LinkSubscribe(nt.linkUpdateCh, nt.linkDoneCh); err != nil {
|
||||
return err
|
||||
}
|
||||
defer nt.close()
|
||||
|
||||
for {
|
||||
update, ok := <-nt.linkUpdateCh
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
switch update.Header.Type {
|
||||
case unix.NLMSG_ERROR:
|
||||
return fmt.Errorf("NLMSG_ERROR")
|
||||
case unix.RTM_NEWLINK:
|
||||
ifname := update.Link.Attrs().Name
|
||||
if _, ok := nt.ifFlagsMap[ifname]; !ok {
|
||||
// new interface
|
||||
continue
|
||||
}
|
||||
nt.handleEvent(&update)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update implement Collector
|
||||
func (nt *netdevTracing) Update() ([]*metric.Data, error) {
|
||||
nt.mu.Lock()
|
||||
defer nt.mu.Unlock()
|
||||
|
||||
var metrics []*metric.Data
|
||||
|
||||
for typ, value := range nt.metricsLinkStatusCountMap {
|
||||
for ifname, count := range value {
|
||||
metrics = append(metrics, metric.NewGaugeData(
|
||||
typ.String(), float64(count), typ.String(), map[string]string{"device": ifname}))
|
||||
}
|
||||
}
|
||||
|
||||
return metrics, nil
|
||||
}
|
||||
|
||||
func (nt *netdevTracing) checkLinkStatus() error {
|
||||
links, err := netlink.LinkList()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, link := range links {
|
||||
ifname := link.Attrs().Name
|
||||
if !slices.Contains(conf.Get().Tracing.Netdev.Whitelist,
|
||||
ifname) {
|
||||
continue
|
||||
}
|
||||
|
||||
flags := link.Attrs().RawFlags
|
||||
nt.ifFlagsMap[ifname] = flags
|
||||
|
||||
data := &netdevEventData{
|
||||
linkFlags: flags,
|
||||
Ifname: ifname,
|
||||
Index: link.Attrs().Index,
|
||||
Mac: link.Attrs().HardwareAddr.String(),
|
||||
AtStart: true,
|
||||
}
|
||||
nt.record(data)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (nt *netdevTracing) record(data *netdevEventData) {
|
||||
for _, status := range flags2status(data.linkFlags, data.flagsChange) {
|
||||
nt.mu.Lock()
|
||||
nt.metricsLinkStatusCountMap[status][data.Ifname]++
|
||||
nt.mu.Unlock()
|
||||
|
||||
if data.LinkStatus == "" {
|
||||
data.LinkStatus = status.String()
|
||||
} else {
|
||||
data.LinkStatus = data.LinkStatus + ", " + status.String()
|
||||
}
|
||||
}
|
||||
|
||||
if !data.AtStart && data.LinkStatus != "" {
|
||||
log.Infof("%s %+v", data.LinkStatus, data)
|
||||
storage.Save(nt.name, "", time.Now(), data)
|
||||
}
|
||||
}
|
||||
|
||||
func (nt *netdevTracing) handleEvent(ev *netlink.LinkUpdate) {
|
||||
ifname := ev.Link.Attrs().Name
|
||||
|
||||
currFlags := ev.Attrs().RawFlags
|
||||
lastFlags := nt.ifFlagsMap[ifname]
|
||||
change := currFlags ^ lastFlags
|
||||
nt.ifFlagsMap[ifname] = currFlags
|
||||
|
||||
data := &netdevEventData{
|
||||
linkFlags: currFlags,
|
||||
flagsChange: change,
|
||||
Ifname: ifname,
|
||||
Index: ev.Link.Attrs().Index,
|
||||
Mac: ev.Link.Attrs().HardwareAddr.String(),
|
||||
AtStart: false,
|
||||
}
|
||||
nt.record(data)
|
||||
}
|
||||
|
||||
func (nt *netdevTracing) close() {
|
||||
close(nt.linkDoneCh)
|
||||
close(nt.linkUpdateCh)
|
||||
}
|
|
@ -0,0 +1,294 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package events
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/internal/bpf"
|
||||
"huatuo-bamai/internal/conf"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/pod"
|
||||
"huatuo-bamai/internal/storage"
|
||||
"huatuo-bamai/internal/utils/bpfutil"
|
||||
"huatuo-bamai/internal/utils/netutil"
|
||||
"huatuo-bamai/internal/utils/procfsutil"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/netrecvlat.c -o $BPF_DIR/netrecvlat.o
|
||||
|
||||
type netRecvLatTracing struct{}
|
||||
|
||||
// NetTracingData is the full data structure.
|
||||
type NetTracingData struct {
|
||||
Comm string `json:"comm"`
|
||||
Pid uint64 `json:"pid"`
|
||||
Where string `json:"where"`
|
||||
Latency uint64 `json:"latency_ms"`
|
||||
State string `json:"state"`
|
||||
Saddr string `json:"saddr"`
|
||||
Daddr string `json:"daddr"`
|
||||
Sport uint16 `json:"sport"`
|
||||
Dport uint16 `json:"dport"`
|
||||
Seq uint32 `json:"seq"`
|
||||
AckSeq uint32 `json:"ack_seq"`
|
||||
PktLen uint64 `json:"pkt_len"`
|
||||
}
|
||||
|
||||
// from bpf perf
|
||||
type netRcvPerfEvent struct {
|
||||
Comm [bpfutil.TaskCommLen]byte
|
||||
Latency uint64
|
||||
TgidPid uint64
|
||||
PktLen uint64
|
||||
Sport uint16
|
||||
Dport uint16
|
||||
Saddr uint32
|
||||
Daddr uint32
|
||||
Seq uint32
|
||||
AckSeq uint32
|
||||
State uint8
|
||||
Where uint8
|
||||
}
|
||||
|
||||
// from include/net/tcp_states.h
|
||||
var tcpStateMap = []string{
|
||||
"<nil>", // 0
|
||||
"ESTABLISHED",
|
||||
"SYN_SENT",
|
||||
"SYN_RECV",
|
||||
"FIN_WAIT1",
|
||||
"FIN_WAIT2",
|
||||
"TIME_WAIT",
|
||||
"CLOSE",
|
||||
"CLOSE_WAIT",
|
||||
"LAST_ACK",
|
||||
"LISTEN",
|
||||
"CLOSING",
|
||||
"NEW_SYN_RECV",
|
||||
}
|
||||
|
||||
const userCopyCase = 2
|
||||
|
||||
var toWhere = []string{
|
||||
"TO_NETIF_RCV",
|
||||
"TO_TCPV4_RCV",
|
||||
"TO_USER_COPY",
|
||||
}
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("netrcvlat", newNetRcvLat)
|
||||
}
|
||||
|
||||
func newNetRcvLat() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &netRecvLatTracing{},
|
||||
Internal: 10,
|
||||
Flag: tracing.FlagTracing,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *netRecvLatTracing) Start(ctx context.Context) error {
|
||||
toNetIf := conf.Get().Tracing.NetRecvLat.ToNetIf // ms, before RPS to a core recv(__netif_receive_skb)
|
||||
toTCPV4 := conf.Get().Tracing.NetRecvLat.ToTCPV4 // ms, before RPS to TCP recv(tcp_v4_rcv)
|
||||
toUserCopy := conf.Get().Tracing.NetRecvLat.ToUserCopy // ms, before RPS to user recv(skb_copy_datagram_iovec)
|
||||
|
||||
if toNetIf == 0 || toTCPV4 == 0 || toUserCopy == 0 {
|
||||
return fmt.Errorf("netrecvlat threshold [%v %v %v]ms invalid", toNetIf, toTCPV4, toUserCopy)
|
||||
}
|
||||
log.Infof("netrecvlat start, latency threshold [%v %v %v]ms", toNetIf, toTCPV4, toUserCopy)
|
||||
|
||||
monoWallOffset, err := estMonoWallOffset()
|
||||
if err != nil {
|
||||
return fmt.Errorf("estimate monoWallOffset failed: %w", err)
|
||||
}
|
||||
|
||||
log.Infof("netrecvlat offset of mono to walltime: %v ns", monoWallOffset)
|
||||
|
||||
args := map[string]any{
|
||||
"mono_wall_offset": monoWallOffset,
|
||||
"to_netif": toNetIf * 1000 * 1000,
|
||||
"to_tcpv4": toTCPV4 * 1000 * 1000,
|
||||
"to_user_copy": toUserCopy * 1000 * 1000,
|
||||
}
|
||||
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), args)
|
||||
if err != nil {
|
||||
log.Infof("failed to LoadBpf, err: %v", err)
|
||||
return err
|
||||
}
|
||||
defer b.Close()
|
||||
|
||||
childCtx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
reader, err := b.AttachAndEventPipe(childCtx, "net_recv_lat_event_map", 8192)
|
||||
if err != nil {
|
||||
log.Infof("failed to AttachAndEventPipe, err: %v", err)
|
||||
return err
|
||||
}
|
||||
defer reader.Close()
|
||||
|
||||
b.WaitDetachByBreaker(childCtx, cancel)
|
||||
|
||||
// save host netns
|
||||
hostNetNsInode, err := procfsutil.NetNSInodeByPid(1)
|
||||
if err != nil {
|
||||
return fmt.Errorf("get host netns inode: %w", err)
|
||||
}
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-childCtx.Done():
|
||||
return nil
|
||||
default:
|
||||
var pd netRcvPerfEvent
|
||||
if err := reader.ReadInto(&pd); err != nil {
|
||||
return fmt.Errorf("read rrom perf event fail: %w", err)
|
||||
}
|
||||
tracerTime := time.Now()
|
||||
|
||||
comm := "<nil>" // not in process context
|
||||
var pid uint64
|
||||
var containerID string
|
||||
if pd.TgidPid != 0 {
|
||||
comm = strings.TrimRight(string(pd.Comm[:]), "\x00")
|
||||
pid = pd.TgidPid >> 32
|
||||
|
||||
// check if its netns same as host netns
|
||||
if pd.Where == userCopyCase {
|
||||
cid, skip, err := ignore(pid, comm, hostNetNsInode)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if skip {
|
||||
continue
|
||||
}
|
||||
containerID = cid
|
||||
}
|
||||
}
|
||||
|
||||
where := toWhere[pd.Where]
|
||||
lat := pd.Latency / 1000 / 1000 // ms
|
||||
state := tcpStateMap[pd.State]
|
||||
saddr, daddr := netutil.InetNtop(pd.Saddr).String(), netutil.InetNtop(pd.Daddr).String()
|
||||
sport, dport := netutil.InetNtohs(pd.Sport), netutil.InetNtohs(pd.Dport)
|
||||
seq, ackSeq := netutil.InetNtohl(pd.Seq), netutil.InetNtohl(pd.AckSeq)
|
||||
pktLen := pd.PktLen
|
||||
|
||||
title := fmt.Sprintf("comm=%s:%d to=%s lat(ms)=%v state=%s saddr=%s sport=%d daddr=%s dport=%d seq=%d ackSeq=%d pktLen=%d",
|
||||
comm, pid, where, lat, state, saddr, sport, daddr, dport, seq, ackSeq, pktLen)
|
||||
|
||||
// tcp state filter
|
||||
if (state != "ESTABLISHED") && (state != "<nil>") {
|
||||
continue
|
||||
}
|
||||
|
||||
// known issue filter
|
||||
caseName, _ := conf.KnownIssueSearch(title, "", "")
|
||||
if caseName == "netrecvlat" {
|
||||
log.Debugf("netrecvlat known issue")
|
||||
continue
|
||||
}
|
||||
|
||||
tracerData := &NetTracingData{
|
||||
Comm: comm,
|
||||
Pid: pid,
|
||||
Where: where,
|
||||
Latency: lat,
|
||||
State: state,
|
||||
Saddr: saddr,
|
||||
Daddr: daddr,
|
||||
Sport: sport,
|
||||
Dport: dport,
|
||||
Seq: seq,
|
||||
AckSeq: ackSeq,
|
||||
PktLen: pktLen,
|
||||
}
|
||||
log.Debugf("netrecvlat tracerData: %+v", tracerData)
|
||||
|
||||
// save storage
|
||||
storage.Save("netrecvlat", containerID, tracerTime, tracerData)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func ignore(pid uint64, comm string, hostNetnsInode uint64) (containerID string, skip bool, err error) {
|
||||
// check if its netns same as host netns
|
||||
dstInode, err := procfsutil.NetNSInodeByPid(int(pid))
|
||||
if err != nil {
|
||||
// ignore the missing program
|
||||
if errors.Is(err, syscall.ENOENT) {
|
||||
return "", true, nil
|
||||
}
|
||||
return "", skip, fmt.Errorf("get netns inode of pid %v failed: %w", pid, err)
|
||||
}
|
||||
if conf.Get().Tracing.NetRecvLat.IgnoreHost && dstInode == hostNetnsInode {
|
||||
log.Debugf("ignore %s:%v the same netns as host", comm, pid)
|
||||
return "", true, nil
|
||||
}
|
||||
|
||||
// check container level
|
||||
var container *pod.Container
|
||||
if container, err = pod.GetContainerByNetNamespaceInode(dstInode); err != nil {
|
||||
log.Warnf("get container info by netns inode %v pid %v, failed: %v", dstInode, pid, err)
|
||||
}
|
||||
if container != nil {
|
||||
for _, level := range conf.Get().Tracing.NetRecvLat.IgnoreContainerLevel {
|
||||
if container.Qos.Int() == level {
|
||||
log.Debugf("ignore container %+v", container)
|
||||
skip = true
|
||||
break
|
||||
}
|
||||
}
|
||||
containerID = container.ID
|
||||
}
|
||||
|
||||
return containerID, skip, nil
|
||||
}
|
||||
|
||||
// estimate the offset between clock monotonic and real time
|
||||
// bpf_ktime_get_ns() access to clock monotonic, but skb->tstamp = ktime_get_real() at netif_receive_skb_internal
|
||||
// ref: https://github.com/torvalds/linux/blob/v4.18/net/core/dev.c#L4736
|
||||
// t3 - t2 + (t3 - t1) / 2 => (t3 + t1) / 2 - t2
|
||||
func estMonoWallOffset() (int64, error) {
|
||||
var t1, t2, t3 unix.Timespec
|
||||
var bestDelta int64
|
||||
var offset int64
|
||||
|
||||
for i := 0; i < 10; i++ {
|
||||
err1 := unix.ClockGettime(unix.CLOCK_REALTIME, &t1)
|
||||
err2 := unix.ClockGettime(unix.CLOCK_MONOTONIC, &t2)
|
||||
err3 := unix.ClockGettime(unix.CLOCK_REALTIME, &t3)
|
||||
if err1 != nil || err2 != nil || err3 != nil {
|
||||
return 0, fmt.Errorf("%w, %w, %w", err1, err2, err3)
|
||||
}
|
||||
|
||||
delta := unix.TimespecToNsec(t3) - unix.TimespecToNsec(t1)
|
||||
if i == 0 || delta < bestDelta {
|
||||
bestDelta = delta
|
||||
offset = (unix.TimespecToNsec(t3)+unix.TimespecToNsec(t1))/2 - unix.TimespecToNsec(t2)
|
||||
}
|
||||
}
|
||||
|
||||
return offset, nil
|
||||
}
|
|
@ -0,0 +1,191 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package events
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/internal/bpf"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/pod"
|
||||
"huatuo-bamai/internal/storage"
|
||||
"huatuo-bamai/internal/utils/bpfutil"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
)
|
||||
|
||||
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/oom.c -o $BPF_DIR/oom.o
|
||||
|
||||
type perfEventData struct {
|
||||
TriggerProcessName [16]byte
|
||||
VictimProcessName [16]byte
|
||||
TriggerPid int32
|
||||
VictimPid int32
|
||||
TriggerMemcgCSS uint64
|
||||
VictimMemcgCSS uint64
|
||||
}
|
||||
|
||||
type OOMTracingData struct {
|
||||
TriggerMemcgCSS string `json:"trigger_memcg_css"`
|
||||
TriggerContainerID string `json:"trigger_container_id"`
|
||||
TriggerContainerHostname string `json:"trigger_container_hostname"`
|
||||
TriggerPid int32 `json:"trigger_pid"`
|
||||
TriggerProcessName string `json:"trigger_process_name"`
|
||||
|
||||
VictimMemcgCSS string `json:"victim_memcg_css"`
|
||||
VictimContainerID string `json:"victim_container_id"`
|
||||
VictimContainerHostname string `json:"victim_container_hostname"`
|
||||
VictimPid int32 `json:"victim_pid"`
|
||||
VictimProcessName string `json:"victim_process_name"`
|
||||
}
|
||||
|
||||
type oomMetric struct {
|
||||
count int
|
||||
victimProcessName string
|
||||
}
|
||||
|
||||
type oomCollector struct{}
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("oom", newOOMCollector)
|
||||
}
|
||||
|
||||
func newOOMCollector() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &oomCollector{},
|
||||
Internal: 10,
|
||||
Flag: tracing.FlagTracing | tracing.FlagMetric,
|
||||
}, nil
|
||||
}
|
||||
|
||||
var (
|
||||
hostOOMCounter float64
|
||||
containerOOMCounter = make(map[string]oomMetric)
|
||||
mutex sync.Mutex
|
||||
)
|
||||
|
||||
func (c *oomCollector) Update() ([]*metric.Data, error) {
|
||||
containers, err := pod.GetNormalContainers()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("get normal container: %w", err)
|
||||
}
|
||||
metrics := []*metric.Data{}
|
||||
mutex.Lock()
|
||||
metrics = append(metrics, metric.NewGaugeData("host_happened", hostOOMCounter, "host oom happened", nil))
|
||||
for _, container := range containers {
|
||||
if val, exists := containerOOMCounter[container.ID]; exists {
|
||||
metrics = append(metrics,
|
||||
metric.NewContainerGaugeData(container, "counter", float64(val.count), "ct oom happened", map[string]string{"process": val.victimProcessName}),
|
||||
)
|
||||
}
|
||||
}
|
||||
hostOOMCounter = 0
|
||||
containerOOMCounter = make(map[string]oomMetric)
|
||||
mutex.Unlock()
|
||||
return metrics, nil
|
||||
}
|
||||
|
||||
// Info return case's base info
|
||||
func (c *oomCollector) Start(ctx context.Context) error {
|
||||
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
|
||||
if err != nil {
|
||||
log.Infof("failed to LoadBpf, err: %v", err)
|
||||
return err
|
||||
}
|
||||
defer b.Close()
|
||||
|
||||
childCtx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
reader, err := b.AttachAndEventPipe(childCtx, "oom_perf_events", 8192)
|
||||
if err != nil {
|
||||
log.Infof("failed to AttachAndEventPipe, err: %v", err)
|
||||
return err
|
||||
}
|
||||
defer reader.Close()
|
||||
|
||||
b.WaitDetachByBreaker(childCtx, cancel)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-childCtx.Done():
|
||||
return nil
|
||||
default:
|
||||
var data perfEventData
|
||||
if err := reader.ReadInto(&data); err != nil {
|
||||
return fmt.Errorf("ReadFromPerfEvent fail: %w", err)
|
||||
}
|
||||
cssToCtMap, err := pod.GetCSSToContainerID("memory")
|
||||
if err != nil {
|
||||
log.Errorf("failed to GetCSSToContainerID, err: %v", err)
|
||||
continue
|
||||
}
|
||||
cts, err := pod.GetAllContainers()
|
||||
if err != nil {
|
||||
log.Errorf("Can't get GetAllContainers, err: %v", err)
|
||||
return err
|
||||
}
|
||||
caseData := &OOMTracingData{
|
||||
TriggerMemcgCSS: fmt.Sprintf("0x%x", data.TriggerMemcgCSS),
|
||||
TriggerPid: data.TriggerPid,
|
||||
TriggerProcessName: strings.TrimRight(string(data.TriggerProcessName[:]), "\x00"),
|
||||
TriggerContainerID: cssToCtMap[data.TriggerMemcgCSS],
|
||||
VictimMemcgCSS: fmt.Sprintf("0x%x", data.VictimMemcgCSS),
|
||||
VictimPid: data.VictimPid,
|
||||
VictimProcessName: strings.TrimRight(string(data.VictimProcessName[:]), "\x00"),
|
||||
VictimContainerID: cssToCtMap[data.VictimMemcgCSS],
|
||||
}
|
||||
|
||||
if caseData.TriggerContainerID == "" {
|
||||
caseData.TriggerContainerID = "None"
|
||||
caseData.TriggerContainerHostname = "Non-Container Cgroup"
|
||||
} else {
|
||||
caseData.TriggerContainerHostname = cts[caseData.TriggerContainerID].Hostname
|
||||
if caseData.TriggerContainerHostname == "" {
|
||||
caseData.TriggerContainerHostname = "unknown"
|
||||
}
|
||||
}
|
||||
mutex.Lock()
|
||||
if caseData.VictimContainerID == "" {
|
||||
hostOOMCounter++
|
||||
caseData.VictimContainerID = "None"
|
||||
caseData.VictimContainerHostname = "Non-Container Cgroup"
|
||||
} else {
|
||||
if val, exists := containerOOMCounter[cts[caseData.VictimContainerID].ID]; exists {
|
||||
val.count++
|
||||
val.victimProcessName = val.victimProcessName + "," + caseData.VictimProcessName
|
||||
containerOOMCounter[cts[caseData.VictimContainerID].ID] = val
|
||||
} else {
|
||||
containerOOMCounter[cts[caseData.VictimContainerID].ID] = oomMetric{
|
||||
count: 1,
|
||||
victimProcessName: caseData.VictimProcessName,
|
||||
}
|
||||
}
|
||||
caseData.VictimContainerHostname = cts[caseData.VictimContainerID].Hostname
|
||||
if caseData.VictimContainerHostname == "" {
|
||||
caseData.VictimContainerHostname = "unknown"
|
||||
}
|
||||
}
|
||||
mutex.Unlock()
|
||||
|
||||
// save storage
|
||||
storage.Save("oom", "", time.Now(), caseData)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,188 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package events
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/internal/bpf"
|
||||
"huatuo-bamai/internal/conf"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/storage"
|
||||
"huatuo-bamai/internal/utils/bpfutil"
|
||||
"huatuo-bamai/internal/utils/symbolutil"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
)
|
||||
|
||||
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/softirq.c -o $BPF_DIR/softirq.o
|
||||
|
||||
type softirqTracing struct{}
|
||||
|
||||
type softirqPerfEvent struct {
|
||||
Stack [symbolutil.KsymbolStackMaxDepth]uint64
|
||||
StackSize int64
|
||||
Now uint64
|
||||
StallTime uint64
|
||||
Comm [bpfutil.TaskCommLen]byte
|
||||
Pid uint32
|
||||
CPU uint32
|
||||
}
|
||||
|
||||
// SoftirqTracingData is the full data structure.
|
||||
type SoftirqTracingData struct {
|
||||
OffTime uint64 `json:"offtime"`
|
||||
Threshold uint64 `json:"threshold"`
|
||||
Comm string `json:"comm"`
|
||||
Pid uint32 `json:"pid"`
|
||||
CPU uint32 `json:"cpu"`
|
||||
Now uint64 `json:"now"`
|
||||
Stack string `json:"stack"`
|
||||
}
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("softirq", newSoftirq)
|
||||
}
|
||||
|
||||
func newSoftirq() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &softirqTracing{},
|
||||
Internal: 10,
|
||||
Flag: tracing.FlagTracing,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *softirqTracing) Start(ctx context.Context) error {
|
||||
log.Infof("Softirq start")
|
||||
|
||||
softirqThresh := conf.Get().Tracing.Softirq.ThresholdTime
|
||||
|
||||
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), map[string]any{"softirq_thresh": softirqThresh})
|
||||
if err != nil {
|
||||
log.Infof("failed to LoadBpf, err: %v", err)
|
||||
return err
|
||||
}
|
||||
defer b.Close()
|
||||
|
||||
childCtx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
reader, err := attachIrqAndEventPipe(childCtx, b)
|
||||
if err != nil {
|
||||
log.Infof("failed to attachIrqAndEventPipe, err: %v", err)
|
||||
return err
|
||||
}
|
||||
defer reader.Close()
|
||||
|
||||
b.WaitDetachByBreaker(childCtx, cancel)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-childCtx.Done():
|
||||
return nil
|
||||
default:
|
||||
var data softirqPerfEvent
|
||||
|
||||
if err := reader.ReadInto(&data); err != nil {
|
||||
return fmt.Errorf("Read From Perf Event fail: %w", err)
|
||||
}
|
||||
comm := fmt.Sprintf("%s", data.Comm)
|
||||
index := strings.Index(comm, "ksoftirqd")
|
||||
|
||||
if index == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
// stop recording the noise from swapper
|
||||
index = strings.Index(comm, "swapper")
|
||||
|
||||
if index == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
var stack string
|
||||
|
||||
if data.StackSize > 0 {
|
||||
stack = softirqDumpTrace(data.Stack[:])
|
||||
}
|
||||
|
||||
// save storage
|
||||
caseData := &SoftirqTracingData{
|
||||
OffTime: data.StallTime,
|
||||
Threshold: softirqThresh,
|
||||
Comm: strings.TrimRight(comm, "\x00"),
|
||||
Pid: data.Pid,
|
||||
CPU: data.CPU,
|
||||
Now: data.Now,
|
||||
Stack: fmt.Sprintf("stack:\n%s", stack),
|
||||
}
|
||||
storage.Save("softirq", "", time.Now(), caseData)
|
||||
}
|
||||
} // forever
|
||||
}
|
||||
|
||||
// softirqDumpTrace is an interface for dump stacks in this case with offset and module info
|
||||
func softirqDumpTrace(addrs []uint64) string {
|
||||
stacks := symbolutil.DumpKernelBackTrace(addrs, symbolutil.KsymbolStackMaxDepth)
|
||||
return strings.Join(stacks.BackTrace, "\n")
|
||||
}
|
||||
|
||||
func attachIrqAndEventPipe(ctx context.Context, b bpf.BPF) (bpf.PerfEventReader, error) {
|
||||
var err error
|
||||
|
||||
reader, err := b.EventPipeByName(ctx, "irqoff_event_map", 8192)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if err != nil {
|
||||
reader.Close()
|
||||
}
|
||||
}()
|
||||
|
||||
/*
|
||||
* NOTE: There might be more than 100ms gap between the attachment of hooks,
|
||||
* so the order of attaching the kprobe and tracepoint is important for us.
|
||||
* probe_scheduler_tick should not be attached before probe_tick_stop and not be
|
||||
* attached later than probe_tick_nohz_restart_sched_tick. So only
|
||||
* probe_tick_stop -> probe_scheduler_tick -> probe_tick_nohz_restart_sched_tick
|
||||
* works for the scenario.
|
||||
*
|
||||
* But we can't control the order of detachment, as it is executed in a random
|
||||
* sequence in HuaTuo. Therefore, when we exit due to some special reasons, a
|
||||
* small number of false alarm might be hit.
|
||||
*/
|
||||
if err := b.AttachWithOptions([]bpf.AttachOption{
|
||||
{
|
||||
ProgramName: "probe_scheduler_tick",
|
||||
Symbol: "scheduler_tick",
|
||||
},
|
||||
{
|
||||
ProgramName: "probe_tick_nohz_restart_sched_tick",
|
||||
Symbol: "tick_nohz_restart_sched_tick",
|
||||
},
|
||||
{
|
||||
ProgramName: "probe_tick_stop",
|
||||
Symbol: "timer/tick_stop",
|
||||
},
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return reader, nil
|
||||
}
|
|
@ -0,0 +1,123 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package events
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/internal/bpf"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/storage"
|
||||
"huatuo-bamai/internal/utils/bpfutil"
|
||||
"huatuo-bamai/internal/utils/kmsgutil"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
)
|
||||
|
||||
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/softlockup.c -o $BPF_DIR/softlockup.o
|
||||
|
||||
type softLockupPerfEventData struct {
|
||||
CPU int32
|
||||
Pid int32
|
||||
Comm [16]byte
|
||||
}
|
||||
|
||||
// TracerData is the full data structure.
|
||||
type SoftLockupTracerData struct {
|
||||
CPU int32 `json:"cpu"`
|
||||
Pid int32 `json:"pid"`
|
||||
Comm string `json:"comm"`
|
||||
CPUsStack string `json:"cpus_stack"`
|
||||
}
|
||||
|
||||
type softLockupTracing struct {
|
||||
softlockupMetric []*metric.Data
|
||||
}
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("softlockup", newSoftLockup)
|
||||
}
|
||||
|
||||
func newSoftLockup() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &softLockupTracing{
|
||||
softlockupMetric: []*metric.Data{
|
||||
metric.NewGaugeData("happened", 0, "softlockup happened", nil),
|
||||
},
|
||||
},
|
||||
Internal: 10,
|
||||
Flag: tracing.FlagTracing | tracing.FlagMetric,
|
||||
}, nil
|
||||
}
|
||||
|
||||
var softlockupCounter float64
|
||||
|
||||
func (c *softLockupTracing) Update() ([]*metric.Data, error) {
|
||||
c.softlockupMetric[0].Value = softlockupCounter
|
||||
softlockupCounter = 0
|
||||
return c.softlockupMetric, nil
|
||||
}
|
||||
|
||||
func (c *softLockupTracing) Start(ctx context.Context) error {
|
||||
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
|
||||
if err != nil {
|
||||
log.Infof("failed to LoadBpf, err: %v", err)
|
||||
return err
|
||||
}
|
||||
defer b.Close()
|
||||
|
||||
childCtx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
reader, err := b.AttachAndEventPipe(childCtx, "softlockup_perf_events", 8192)
|
||||
if err != nil {
|
||||
log.Infof("failed to AttachAndEventPipe, err: %v", err)
|
||||
return err
|
||||
}
|
||||
defer reader.Close()
|
||||
|
||||
b.WaitDetachByBreaker(childCtx, cancel)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-childCtx.Done():
|
||||
return nil
|
||||
default:
|
||||
var data softLockupPerfEventData
|
||||
if err := reader.ReadInto(&data); err != nil {
|
||||
return fmt.Errorf("ReadFromPerfEvent fail: %w", err)
|
||||
}
|
||||
|
||||
bt, err := kmsgutil.GetAllCPUsBT()
|
||||
if err != nil {
|
||||
bt = err.Error()
|
||||
}
|
||||
|
||||
caseData := &SoftLockupTracerData{
|
||||
CPU: data.CPU,
|
||||
Pid: data.Pid,
|
||||
Comm: strings.TrimRight(string(data.Comm[:]), "\x00"),
|
||||
CPUsStack: bt,
|
||||
}
|
||||
softlockupCounter++
|
||||
|
||||
// save storage
|
||||
storage.Save("softlockup", "", time.Now(), caseData)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,121 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"huatuo-bamai/internal/pod"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
)
|
||||
|
||||
var arpCachePath = "/proc/net/stat/arp_cache"
|
||||
|
||||
type arpCollector struct {
|
||||
metric []*metric.Data
|
||||
}
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("arp", newArp)
|
||||
}
|
||||
|
||||
func newArp() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &arpCollector{
|
||||
metric: []*metric.Data{
|
||||
metric.NewGaugeData("entries", 0, "host init namespace", nil),
|
||||
metric.NewGaugeData("total", 0, "arp_cache entries", nil),
|
||||
},
|
||||
},
|
||||
Flag: tracing.FlagMetric,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// NetStat contains statistics for all the counters from one file.
|
||||
// should be exported for /proc/net/stat/ndisc_cache
|
||||
type NetStat struct {
|
||||
Stats map[string]uint64
|
||||
Filename string
|
||||
}
|
||||
|
||||
func parseNetstatCache(filePath string) (NetStat, error) {
|
||||
netStat := NetStat{
|
||||
Stats: make(map[string]uint64),
|
||||
}
|
||||
|
||||
file, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
return netStat, err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
scanner.Scan()
|
||||
|
||||
// First string is always a header for stats
|
||||
var headers []string
|
||||
headers = append(headers, strings.Fields(scanner.Text())...)
|
||||
|
||||
// Fast path ...
|
||||
scanner.Scan()
|
||||
for num, counter := range strings.Fields(scanner.Text()) {
|
||||
value, err := strconv.ParseUint(counter, 16, 64)
|
||||
if err != nil {
|
||||
return NetStat{}, err
|
||||
}
|
||||
netStat.Stats[headers[num]] = value
|
||||
}
|
||||
|
||||
return netStat, nil
|
||||
}
|
||||
|
||||
func (c *arpCollector) Update() ([]*metric.Data, error) {
|
||||
arpMetric := []*metric.Data{}
|
||||
|
||||
containers, err := pod.GetNormalContainers()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("GetNormalContainers: %w", err)
|
||||
}
|
||||
|
||||
for _, container := range containers {
|
||||
count, err := fileLineCounter(fmt.Sprintf("/proc/%d/net/arp", container.InitPid))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
arpMetric = append(arpMetric, metric.NewContainerGaugeData(container, "entries", float64(count-1), "arp for container and host", nil))
|
||||
}
|
||||
|
||||
count, err := fileLineCounter("/proc/1/net/arp")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
stat, err := parseNetstatCache(arpCachePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
c.metric[0].Value = float64(count - 1)
|
||||
c.metric[1].Value = float64(stat.Stats["entries"])
|
||||
|
||||
arpMetric = append(arpMetric, c.metric...)
|
||||
return arpMetric, nil
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
"github.com/prometheus/procfs"
|
||||
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
)
|
||||
|
||||
type buddyInfoCollector struct {
|
||||
fs procfs.FS
|
||||
}
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("buddyinfo", newBuddyInfo)
|
||||
}
|
||||
|
||||
func newBuddyInfo() (*tracing.EventTracingAttr, error) {
|
||||
fs, err := procfs.NewDefaultFS()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open procfs: %w", err)
|
||||
}
|
||||
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &buddyInfoCollector{fs: fs},
|
||||
Flag: tracing.FlagMetric,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *buddyInfoCollector) Update() ([]*metric.Data, error) {
|
||||
buddyInfo, err := c.fs.BuddyInfo()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var (
|
||||
buddyLabel = make(map[string]string)
|
||||
metrics = []*metric.Data{}
|
||||
)
|
||||
|
||||
for _, entry := range buddyInfo {
|
||||
for size, value := range entry.Sizes {
|
||||
buddyLabel["node"] = entry.Node
|
||||
buddyLabel["zone"] = entry.Zone
|
||||
buddyLabel["size"] = strconv.Itoa(size)
|
||||
|
||||
metrics = append(metrics,
|
||||
metric.NewGaugeData("blocks", value, "buddy info", buddyLabel))
|
||||
}
|
||||
}
|
||||
|
||||
return metrics, nil
|
||||
}
|
|
@ -0,0 +1,166 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/pod"
|
||||
"huatuo-bamai/internal/utils/cgrouputil"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
)
|
||||
|
||||
type cpuStat struct {
|
||||
nrThrottled uint64
|
||||
throttledTime uint64
|
||||
nrBursts uint64
|
||||
burstTime uint64
|
||||
|
||||
// calculated values
|
||||
hierarchyWaitSum uint64
|
||||
innerWaitSum uint64
|
||||
cpuTotal uint64
|
||||
|
||||
waitrateHierarchy float64
|
||||
waitrateInner float64
|
||||
waitrateExter float64
|
||||
waitrateThrottled float64
|
||||
|
||||
lastUpdate time.Time
|
||||
}
|
||||
|
||||
type cpuStatCollector struct {
|
||||
cpu *cgrouputil.CPU
|
||||
cpuacct *cgrouputil.CPUAcct
|
||||
mutex sync.Mutex
|
||||
}
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("cpu_stat", newCPUStat)
|
||||
_ = pod.RegisterContainerLifeResources("collector_cpu_stat", reflect.TypeOf(&cpuStat{}))
|
||||
}
|
||||
|
||||
func newCPUStat() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &cpuStatCollector{
|
||||
cpu: cgrouputil.NewCPU(),
|
||||
cpuacct: cgrouputil.NewCPUAcctDefault(),
|
||||
},
|
||||
Flag: tracing.FlagMetric,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *cpuStatCollector) cpuMetricUpdate(cpu *cpuStat, container *pod.Container) error {
|
||||
var (
|
||||
deltaThrottledSum uint64
|
||||
deltaHierarchyWaitSum uint64
|
||||
deltaInnerWaitSum uint64
|
||||
deltaExterWaitSum uint64
|
||||
)
|
||||
|
||||
c.mutex.Lock()
|
||||
defer c.mutex.Unlock()
|
||||
|
||||
now := time.Now()
|
||||
if now.Sub(cpu.lastUpdate).Nanoseconds() < 1000000000 {
|
||||
return nil
|
||||
}
|
||||
|
||||
raw, err := c.cpu.StatRaw(container.CgroupSuffix)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
usageTotal, err := c.cpuacct.Usage(container.CgroupSuffix)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
stat := cpuStat{
|
||||
nrThrottled: raw["nr_throttled"],
|
||||
throttledTime: raw["throttled_time"],
|
||||
hierarchyWaitSum: raw["hierarchy_wait_sum"],
|
||||
innerWaitSum: raw["inner_wait_sum"],
|
||||
nrBursts: raw["nr_bursts"],
|
||||
burstTime: raw["burst_time"],
|
||||
cpuTotal: usageTotal,
|
||||
lastUpdate: now,
|
||||
}
|
||||
|
||||
deltaHierarchyWaitSum = stat.hierarchyWaitSum - cpu.hierarchyWaitSum
|
||||
if deltaHierarchyWaitSum <= 0 {
|
||||
deltaThrottledSum = 0
|
||||
deltaHierarchyWaitSum = 0
|
||||
deltaInnerWaitSum = 0
|
||||
deltaExterWaitSum = 0
|
||||
} else {
|
||||
deltaThrottledSum = stat.throttledTime - cpu.throttledTime
|
||||
deltaInnerWaitSum = stat.innerWaitSum - cpu.innerWaitSum
|
||||
|
||||
if deltaHierarchyWaitSum < deltaThrottledSum+deltaInnerWaitSum {
|
||||
deltaHierarchyWaitSum = deltaThrottledSum + deltaInnerWaitSum
|
||||
}
|
||||
|
||||
deltaExterWaitSum = deltaHierarchyWaitSum - deltaThrottledSum - deltaInnerWaitSum
|
||||
}
|
||||
|
||||
deltaWaitRunSum := deltaHierarchyWaitSum + stat.cpuTotal - cpu.cpuTotal
|
||||
if deltaWaitRunSum == 0 {
|
||||
stat.waitrateHierarchy = 0
|
||||
stat.waitrateInner = 0
|
||||
stat.waitrateExter = 0
|
||||
stat.waitrateThrottled = 0
|
||||
} else {
|
||||
stat.waitrateHierarchy = float64(deltaHierarchyWaitSum) * 100 / float64(deltaWaitRunSum)
|
||||
stat.waitrateInner = float64(deltaInnerWaitSum) * 100 / float64(deltaWaitRunSum)
|
||||
stat.waitrateExter = float64(deltaExterWaitSum) * 100 / float64(deltaWaitRunSum)
|
||||
stat.waitrateThrottled = float64(deltaThrottledSum) * 100 / float64(deltaWaitRunSum)
|
||||
}
|
||||
|
||||
*cpu = stat
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *cpuStatCollector) Update() ([]*metric.Data, error) {
|
||||
metrics := []*metric.Data{}
|
||||
|
||||
containers, err := pod.GetContainersByType(pod.ContainerTypeNormal | pod.ContainerTypeSidecar)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, container := range containers {
|
||||
containerMetric := container.LifeResouces("collector_cpu_stat").(*cpuStat)
|
||||
if err := c.cpuMetricUpdate(containerMetric, container); err != nil {
|
||||
log.Infof("failed to update cpu info of %s, %v", container, err)
|
||||
continue
|
||||
}
|
||||
|
||||
metrics = append(metrics, metric.NewContainerGaugeData(container, "wait_rate", containerMetric.waitrateHierarchy, "wait rate for containers", nil),
|
||||
metric.NewContainerGaugeData(container, "inner_wait_rate", containerMetric.waitrateInner, "inner wait rate for container", nil),
|
||||
metric.NewContainerGaugeData(container, "exter_wait_rate", containerMetric.waitrateExter, "exter wait rate for container", nil),
|
||||
metric.NewContainerGaugeData(container, "throttle_wait_rate", containerMetric.waitrateThrottled, "throttle wait rate for container", nil),
|
||||
metric.NewContainerGaugeData(container, "nr_throttled", float64(containerMetric.nrThrottled), "throttle nr for container", nil),
|
||||
metric.NewContainerGaugeData(container, "nr_bursts", float64(containerMetric.nrBursts), "burst nr for container", nil),
|
||||
metric.NewContainerGaugeData(container, "burst_time", float64(containerMetric.burstTime), "burst time for container", nil),
|
||||
)
|
||||
}
|
||||
|
||||
return metrics, nil
|
||||
}
|
|
@ -0,0 +1,177 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"runtime"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/pod"
|
||||
"huatuo-bamai/internal/utils/cgrouputil"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
)
|
||||
|
||||
type cpuMetric struct {
|
||||
lastUsrTime uint64
|
||||
lastSysTime uint64
|
||||
lastCPUTotal uint64
|
||||
lasTimestamp time.Time
|
||||
utilTotal float64
|
||||
utilSys float64
|
||||
utilUsr float64
|
||||
}
|
||||
|
||||
type cpuUtilCollector struct {
|
||||
cpuUtil []*metric.Data
|
||||
cpuacct *cgrouputil.CPUAcct
|
||||
cpu *cgrouputil.CPU
|
||||
|
||||
// included struct for used in multi modules
|
||||
hostCPUCount int
|
||||
hostCPUMetric cpuMetric
|
||||
|
||||
mutex sync.Mutex
|
||||
}
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("cpu_util", newCPUUtil)
|
||||
_ = pod.RegisterContainerLifeResources("collector_cpu_util", reflect.TypeOf(&cpuMetric{}))
|
||||
}
|
||||
|
||||
func newCPUUtil() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &cpuUtilCollector{
|
||||
cpuUtil: []*metric.Data{
|
||||
metric.NewGaugeData("usr", 0, "usr for container and host", nil),
|
||||
metric.NewGaugeData("sys", 0, "sys for container and host", nil),
|
||||
metric.NewGaugeData("total", 0, "total for container and host", nil),
|
||||
},
|
||||
cpuacct: cgrouputil.NewCPUAcctDefault(),
|
||||
cpu: cgrouputil.NewCPU(),
|
||||
hostCPUCount: runtime.NumCPU(),
|
||||
},
|
||||
Flag: tracing.FlagMetric,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *cpuUtilCollector) cpuMetricUpdate(cpuMetric *cpuMetric, container *pod.Container, cpuCount int) error {
|
||||
var (
|
||||
utilUsr float64
|
||||
utilSys float64
|
||||
utilTotal float64
|
||||
cgroupPath string
|
||||
)
|
||||
|
||||
c.mutex.Lock()
|
||||
defer c.mutex.Unlock()
|
||||
|
||||
now := time.Now()
|
||||
if now.Sub(cpuMetric.lasTimestamp).Nanoseconds() < 1000000000 {
|
||||
return nil
|
||||
}
|
||||
|
||||
if container != nil {
|
||||
cgroupPath = container.CgroupSuffix
|
||||
}
|
||||
|
||||
usageTotal, err := c.cpuacct.Usage(cgroupPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
usageUsr, usageSys, err := c.cpuacct.Stat(cgroupPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// allow statistics 0
|
||||
deltaTotal := usageTotal - cpuMetric.lastCPUTotal
|
||||
deltaUsrTime := usageUsr - cpuMetric.lastUsrTime
|
||||
deltaSysTime := usageSys - cpuMetric.lastSysTime
|
||||
deltaUsageSum := float64(cpuCount) * float64(now.Sub(cpuMetric.lasTimestamp).Nanoseconds())
|
||||
|
||||
if (float64(deltaTotal) > deltaUsageSum) || (float64(deltaUsrTime+deltaSysTime) > deltaUsageSum) {
|
||||
cpuMetric.lastUsrTime = usageUsr
|
||||
cpuMetric.lastSysTime = usageSys
|
||||
cpuMetric.lastCPUTotal = usageTotal
|
||||
cpuMetric.lasTimestamp = now
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
utilTotal = float64(deltaTotal) * 100 / deltaUsageSum
|
||||
utilUsr = float64(deltaUsrTime) * 100 / deltaUsageSum
|
||||
utilSys = float64(deltaSysTime) * 100 / deltaUsageSum
|
||||
|
||||
cpuMetric.lastUsrTime = usageUsr
|
||||
cpuMetric.lastSysTime = usageSys
|
||||
cpuMetric.lastCPUTotal = usageTotal
|
||||
cpuMetric.utilTotal = utilTotal
|
||||
cpuMetric.utilUsr = utilUsr
|
||||
cpuMetric.utilSys = utilSys
|
||||
cpuMetric.lasTimestamp = now
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *cpuUtilCollector) hostMetricUpdate() error {
|
||||
if err := c.cpuMetricUpdate(&c.hostCPUMetric, nil, c.hostCPUCount); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c.cpuUtil[0].Value = c.hostCPUMetric.utilUsr
|
||||
c.cpuUtil[1].Value = c.hostCPUMetric.utilSys
|
||||
c.cpuUtil[2].Value = c.hostCPUMetric.utilTotal
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *cpuUtilCollector) Update() ([]*metric.Data, error) {
|
||||
metrics := []*metric.Data{}
|
||||
|
||||
containers, err := pod.GetContainersByType(pod.ContainerTypeNormal | pod.ContainerTypeSidecar)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, container := range containers {
|
||||
count, err := c.cpu.CPUNum(container.CgroupSuffix)
|
||||
if err != nil {
|
||||
log.Infof("failed to get cpu count of %s, %v", container, err)
|
||||
continue
|
||||
}
|
||||
|
||||
containerMetric := container.LifeResouces("collector_cpu_util").(*cpuMetric)
|
||||
if err := c.cpuMetricUpdate(containerMetric, container, count); err != nil {
|
||||
log.Infof("failed to update cpu info of %s, %v", container, err)
|
||||
continue
|
||||
}
|
||||
|
||||
metrics = append(metrics, metric.NewContainerGaugeData(container, "count", float64(count), "cpu count for containers", nil),
|
||||
metric.NewContainerGaugeData(container, "usr", containerMetric.utilUsr, "usr for container and host", nil),
|
||||
metric.NewContainerGaugeData(container, "sys", containerMetric.utilSys, "sys for container and host", nil),
|
||||
metric.NewContainerGaugeData(container, "total", containerMetric.utilTotal, "total for container and host", nil))
|
||||
}
|
||||
|
||||
if err := c.hostMetricUpdate(); err != nil {
|
||||
log.Errorf("c.hostCpuMetricUpdate :%v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
metrics = append(metrics, c.cpuUtil...)
|
||||
return metrics, nil
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import "regexp"
|
||||
|
||||
type fieldFilter struct {
|
||||
ignorePattern *regexp.Regexp
|
||||
acceptPattern *regexp.Regexp
|
||||
}
|
||||
|
||||
func newFieldFilter(ignoredPattern, acceptPattern string) *fieldFilter {
|
||||
f := &fieldFilter{}
|
||||
if ignoredPattern != "" {
|
||||
f.ignorePattern = regexp.MustCompile(ignoredPattern)
|
||||
}
|
||||
|
||||
if acceptPattern != "" {
|
||||
f.acceptPattern = regexp.MustCompile(acceptPattern)
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
// ignored returns whether the field should be ignored
|
||||
func (f *fieldFilter) ignored(name string) bool {
|
||||
return (f.ignorePattern != nil && f.ignorePattern.MatchString(name)) ||
|
||||
(f.acceptPattern != nil && !f.acceptPattern.MatchString(name))
|
||||
}
|
|
@ -0,0 +1,106 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/pod"
|
||||
"huatuo-bamai/internal/utils/cgrouputil"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
|
||||
"github.com/google/cadvisor/utils/cpuload/netlink"
|
||||
"github.com/prometheus/procfs"
|
||||
)
|
||||
|
||||
type loadavgCollector struct {
|
||||
loadAvg []*metric.Data
|
||||
}
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("loadavg", newLoadavg)
|
||||
}
|
||||
|
||||
// NewLoadavgCollector returns a new Collector exposing load average stats.
|
||||
func newLoadavg() (*tracing.EventTracingAttr, error) {
|
||||
collector := &loadavgCollector{
|
||||
// Load average of last 1, 5 & 15 minutes.
|
||||
// See linux kernel Documentation/filesystems/proc.rst
|
||||
loadAvg: []*metric.Data{
|
||||
metric.NewGaugeData("load1", 0, "1m load average", nil),
|
||||
metric.NewGaugeData("load5", 0, "5m load average", nil),
|
||||
metric.NewGaugeData("load15", 0, "15m load average", nil),
|
||||
},
|
||||
}
|
||||
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: collector, Flag: tracing.FlagMetric,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Read loadavg from /proc.
|
||||
func (c *loadavgCollector) hostLoadAvg() error {
|
||||
fs, err := procfs.NewDefaultFS()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
load, err := fs.LoadAvg()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c.loadAvg[0].Value = load.Load1
|
||||
c.loadAvg[1].Value = load.Load5
|
||||
c.loadAvg[2].Value = load.Load15
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *loadavgCollector) Update() ([]*metric.Data, error) {
|
||||
loadAvgMetrics := []*metric.Data{}
|
||||
|
||||
n, err := netlink.New()
|
||||
if err != nil {
|
||||
log.Infof("Failed to create netlink: %s", err)
|
||||
return nil, err
|
||||
}
|
||||
defer n.Stop()
|
||||
|
||||
containers, err := pod.GetContainersByType(pod.ContainerTypeNormal | pod.ContainerTypeSidecar)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("GetContainersByType: %w", err)
|
||||
}
|
||||
|
||||
for _, container := range containers {
|
||||
stats, err := n.GetCpuLoad(container.Hostname, cgrouputil.NewCPU().Path(container.CgroupSuffix))
|
||||
if err != nil {
|
||||
log.Debugf("failed to get %s load, %v", container, err)
|
||||
continue
|
||||
}
|
||||
|
||||
loadAvgMetrics = append(loadAvgMetrics,
|
||||
metric.NewContainerGaugeData(container, "container_nr_running", float64(stats.NrRunning), "nr_running of container", nil),
|
||||
metric.NewContainerGaugeData(container, "container_nr_uninterruptible", float64(stats.NrUninterruptible), "nr_uninterruptible of container", nil))
|
||||
}
|
||||
|
||||
if err := c.hostLoadAvg(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
loadAvgMetrics = append(loadAvgMetrics, c.loadAvg...)
|
||||
return loadAvgMetrics, nil
|
||||
}
|
|
@ -0,0 +1,129 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
|
||||
"huatuo-bamai/internal/bpf"
|
||||
"huatuo-bamai/internal/pod"
|
||||
"huatuo-bamai/internal/utils/bpfutil"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
)
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("mmcgroup", newMemoryCgroup)
|
||||
}
|
||||
|
||||
func newMemoryCgroup() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &memoryCgroup{},
|
||||
Internal: 10,
|
||||
Flag: tracing.FlagTracing | tracing.FlagMetric,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type memoryCgroupMetric struct {
|
||||
DirectstallCount uint64
|
||||
}
|
||||
|
||||
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/memory_cgroup.c -o $BPF_DIR/memory_cgroup.o
|
||||
|
||||
type memoryCgroup struct {
|
||||
bpf bpf.BPF
|
||||
isRuning bool
|
||||
}
|
||||
|
||||
func (c *memoryCgroup) Update() ([]*metric.Data, error) {
|
||||
if !c.isRuning {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
containersMap := make(map[uint64]*pod.Container)
|
||||
containers, err := pod.GetNormalContainers()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Can't get normal container: %w", err)
|
||||
}
|
||||
|
||||
for _, container := range containers {
|
||||
containersMap[container.CSS["memory"]] = container
|
||||
}
|
||||
|
||||
items, err := c.bpf.DumpMapByName("mem_cgroup_map")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Can't dump mem_cgroup_map: %w", err)
|
||||
}
|
||||
|
||||
var (
|
||||
cgroupMetric memoryCgroupMetric
|
||||
containersMetric []*metric.Data
|
||||
css uint64
|
||||
)
|
||||
for _, v := range items {
|
||||
keyBuf := bytes.NewReader(v.Key)
|
||||
if err := binary.Read(keyBuf, binary.LittleEndian, &css); err != nil {
|
||||
return nil, fmt.Errorf("mem_cgroup_map key: %w", err)
|
||||
}
|
||||
|
||||
valBuf := bytes.NewReader(v.Value)
|
||||
if err := binary.Read(valBuf, binary.LittleEndian, &cgroupMetric); err != nil {
|
||||
return nil, fmt.Errorf("mem_cgroup_map value: %w", err)
|
||||
}
|
||||
|
||||
if container, exist := containersMap[css]; exist {
|
||||
containersMetric = append(containersMetric,
|
||||
metric.NewContainerGaugeData(container, "directstallcount",
|
||||
float64(cgroupMetric.DirectstallCount),
|
||||
"counting of cgroup try_charge reclaim", nil))
|
||||
}
|
||||
}
|
||||
|
||||
// if events haven't happened, upload zero for all containers.
|
||||
if len(items) == 0 {
|
||||
for _, container := range containersMap {
|
||||
containersMetric = append(containersMetric,
|
||||
metric.NewContainerGaugeData(container, "directstallcount", float64(0),
|
||||
"counting of cgroup try_charge reclaim", nil))
|
||||
}
|
||||
}
|
||||
|
||||
return containersMetric, nil
|
||||
}
|
||||
|
||||
func (c *memoryCgroup) Start(ctx context.Context) error {
|
||||
var err error
|
||||
c.bpf, err = bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("LoadBpf memory_cgroup.o: %w", err)
|
||||
}
|
||||
defer c.bpf.Close()
|
||||
|
||||
if err = c.bpf.Attach(); err != nil {
|
||||
return fmt.Errorf("failed to Attach, err: %w", err)
|
||||
}
|
||||
|
||||
childCtx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
c.bpf.WaitDetachByBreaker(childCtx, cancel)
|
||||
c.isRuning = true
|
||||
<-childCtx.Done()
|
||||
c.isRuning = false
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"huatuo-bamai/internal/conf"
|
||||
"huatuo-bamai/internal/pod"
|
||||
"huatuo-bamai/internal/utils/cgrouputil"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
)
|
||||
|
||||
type memEventsCollector struct {
|
||||
mem cgrouputil.Memory
|
||||
}
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("memory_events", newMemEvents)
|
||||
}
|
||||
|
||||
func newMemEvents() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &memEventsCollector{
|
||||
mem: *cgrouputil.NewMemory(),
|
||||
}, Flag: tracing.FlagMetric,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *memEventsCollector) Update() ([]*metric.Data, error) {
|
||||
filter := newFieldFilter(conf.Get().MetricCollector.MemoryEvents.ExcludedMetrics,
|
||||
conf.Get().MetricCollector.MemoryEvents.IncludedMetrics)
|
||||
|
||||
containers, err := pod.GetNormalContainers()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("get normal container: %w", err)
|
||||
}
|
||||
|
||||
metrics := []*metric.Data{}
|
||||
for _, container := range containers {
|
||||
raw, err := c.mem.EventsRaw(container.CgroupSuffix)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for key, value := range raw {
|
||||
if filter.ignored(key) {
|
||||
continue
|
||||
}
|
||||
|
||||
metrics = append(metrics,
|
||||
metric.NewContainerGaugeData(container, key, float64(value), fmt.Sprintf("memory events %s", key), nil))
|
||||
}
|
||||
}
|
||||
|
||||
return metrics, nil
|
||||
}
|
|
@ -0,0 +1,110 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
|
||||
"huatuo-bamai/internal/bpf"
|
||||
"huatuo-bamai/internal/utils/bpfutil"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
)
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("mmhost", newMemoryHost)
|
||||
}
|
||||
|
||||
func newMemoryHost() (*tracing.EventTracingAttr, error) {
|
||||
mm := &memoryHost{
|
||||
metrics: []*metric.Data{
|
||||
metric.NewGaugeData("compactionstat", 0, "time spent during mm compaction", nil),
|
||||
metric.NewGaugeData("allocstallstat", 0, "time spent during mm allocstall", nil),
|
||||
},
|
||||
}
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: mm,
|
||||
Internal: 10,
|
||||
Flag: tracing.FlagTracing | tracing.FlagMetric,
|
||||
}, nil
|
||||
}
|
||||
|
||||
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/memory_free_compact.c -o $BPF_DIR/memory_free_compact.o
|
||||
|
||||
type memoryHost struct {
|
||||
metrics []*metric.Data
|
||||
bpf bpf.BPF
|
||||
isRuning bool
|
||||
}
|
||||
|
||||
type memoryHostMetric struct {
|
||||
/* host: compaction latency */
|
||||
CompactionStat uint64
|
||||
/* host: page alloc latency in direct reclaim */
|
||||
AllocstallStat uint64
|
||||
}
|
||||
|
||||
func (c *memoryHost) Update() ([]*metric.Data, error) {
|
||||
if !c.isRuning {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
items, err := c.bpf.DumpMapByName("mm_free_compact_map")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Can't dump mm_host_metrictable_relay: %w", err)
|
||||
}
|
||||
|
||||
if len(items) == 0 {
|
||||
c.metrics[0].Value = float64(0)
|
||||
c.metrics[1].Value = float64(0)
|
||||
} else {
|
||||
mmMetric := memoryHostMetric{}
|
||||
buf := bytes.NewReader(items[0].Value)
|
||||
err := binary.Read(buf, binary.LittleEndian, &mmMetric)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("read mem_cgroup_map: %w", err)
|
||||
}
|
||||
c.metrics[0].Value = float64(mmMetric.CompactionStat) / 1000 / 1000
|
||||
c.metrics[1].Value = float64(mmMetric.AllocstallStat) / 1000 / 1000
|
||||
}
|
||||
return c.metrics, nil
|
||||
}
|
||||
|
||||
// Start detect work, load bpf and wait data form perfevent
|
||||
func (c *memoryHost) Start(ctx context.Context) error {
|
||||
var err error
|
||||
c.bpf, err = bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("LoadBpf mmhostbpf.o: %w", err)
|
||||
}
|
||||
defer c.bpf.Close()
|
||||
|
||||
if err = c.bpf.Attach(); err != nil {
|
||||
return fmt.Errorf("Attach memory_free_compact.o: %w", err)
|
||||
}
|
||||
|
||||
childCtx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
c.bpf.WaitDetachByBreaker(childCtx, cancel)
|
||||
|
||||
c.isRuning = true
|
||||
<-childCtx.Done()
|
||||
c.isRuning = false
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/pod"
|
||||
"huatuo-bamai/internal/utils/cgrouputil"
|
||||
"huatuo-bamai/internal/utils/parseutil"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
)
|
||||
|
||||
type memOthersCollector struct{}
|
||||
|
||||
func init() {
|
||||
// only for didicloud
|
||||
tracing.RegisterEventTracing("memory_others", newMemOthersCollector)
|
||||
}
|
||||
|
||||
func newMemOthersCollector() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &memOthersCollector{},
|
||||
Flag: tracing.FlagMetric,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func parseValueWithKey(path, key string) (uint64, error) {
|
||||
filePath := filepath.Join(cgrouputil.V1MemoryPath(), path)
|
||||
if key == "" {
|
||||
return parseutil.ReadUint(filePath)
|
||||
}
|
||||
|
||||
raw, err := parseutil.ParseRawKV(filePath)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return raw[key], nil
|
||||
}
|
||||
|
||||
func (c *memOthersCollector) Update() ([]*metric.Data, error) {
|
||||
containers, err := pod.GetNormalContainers()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Can't get normal container: %w", err)
|
||||
}
|
||||
|
||||
metrics := []*metric.Data{}
|
||||
|
||||
for _, container := range containers {
|
||||
for _, t := range []struct {
|
||||
path string
|
||||
key string
|
||||
name string
|
||||
}{
|
||||
{
|
||||
path: "memory.directstall_stat",
|
||||
key: "directstall_time",
|
||||
name: "directstall_time",
|
||||
},
|
||||
{
|
||||
path: "memory.asynreclaim_stat",
|
||||
key: "asyncreclaim_time",
|
||||
name: "asyncreclaim_time",
|
||||
},
|
||||
{
|
||||
path: "memory.local_direct_reclaim_time",
|
||||
key: "",
|
||||
name: "local_direct_reclaim_time",
|
||||
},
|
||||
} {
|
||||
path := filepath.Join(container.CgroupSuffix, t.path)
|
||||
value, err := parseValueWithKey(path, t.key)
|
||||
if err != nil {
|
||||
// FIXME: os maynot support this metric
|
||||
log.Debugf("parse %s: %s", path, err)
|
||||
continue
|
||||
}
|
||||
|
||||
metrics = append(metrics,
|
||||
metric.NewContainerGaugeData(container, t.name, float64(value), fmt.Sprintf("memory cgroup %s", t.name), nil))
|
||||
}
|
||||
}
|
||||
|
||||
return metrics, nil
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"huatuo-bamai/internal/conf"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/pod"
|
||||
"huatuo-bamai/internal/utils/cgrouputil"
|
||||
"huatuo-bamai/internal/utils/parseutil"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
)
|
||||
|
||||
type memStatCollector struct{}
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("memory_stat", newMemStat)
|
||||
}
|
||||
|
||||
func newMemStat() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &memStatCollector{},
|
||||
Flag: tracing.FlagMetric,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *memStatCollector) Update() ([]*metric.Data, error) {
|
||||
filter := newFieldFilter(conf.Get().MetricCollector.MemoryStat.ExcludedMetrics,
|
||||
conf.Get().MetricCollector.MemoryStat.IncludedMetrics)
|
||||
|
||||
metrics := []*metric.Data{}
|
||||
containers, err := pod.GetNormalContainers()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("GetNormalContainers: %w", err)
|
||||
}
|
||||
|
||||
for _, container := range containers {
|
||||
raw, err := parseutil.ParseRawKV(cgrouputil.V1MemoryPath() + container.CgroupSuffix + "/memory.stat")
|
||||
if err != nil {
|
||||
log.Infof("parse %s memory.stat %v", container.CgroupSuffix, err)
|
||||
continue
|
||||
}
|
||||
|
||||
for m, v := range raw {
|
||||
if filter.ignored(m) {
|
||||
log.Debugf("Ignoring memory_stat metric: %s", m)
|
||||
continue
|
||||
}
|
||||
|
||||
metrics = append(metrics, metric.NewContainerGaugeData(container, m, float64(v), fmt.Sprintf("memory stat %s", m), nil))
|
||||
}
|
||||
}
|
||||
|
||||
return metrics, nil
|
||||
}
|
|
@ -0,0 +1,136 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"huatuo-bamai/pkg/metric"
|
||||
)
|
||||
|
||||
const (
|
||||
softirqHi = iota
|
||||
softirqTime
|
||||
softirqNetTx
|
||||
softirqNetRx
|
||||
softirqBlock
|
||||
softirqIrqPoll
|
||||
softirqTasklet
|
||||
softirqSched
|
||||
softirqHrtimer
|
||||
sofirqRcu
|
||||
softirqMax
|
||||
)
|
||||
|
||||
const (
|
||||
latZONE0 = iota // 0 ~ 10us
|
||||
latZONE1 // 10us ~ 100us
|
||||
latZONE2 // 100us ~ 1ms
|
||||
latZONE3 // 1ms ~ inf
|
||||
latZoneMax
|
||||
)
|
||||
|
||||
const (
|
||||
// HI:0x1
|
||||
// TIMER:0x2
|
||||
// NET_TX:0x4
|
||||
// NET_RX:0x8
|
||||
// BLOCK:0x10
|
||||
// IRQ_POLL:0x20
|
||||
// TASKLET:0x40
|
||||
// SCHED:0x80
|
||||
// HRTIMER:0x100
|
||||
// RCU:0x200
|
||||
// fullmask => 0x2ff
|
||||
defaultSiTypeMask = 0x0c // default: only report NET_TX and NET_RX so far
|
||||
|
||||
// Because bpf access array is strictly checked,
|
||||
// the size of the array must be aligned in order
|
||||
// of 2, so we should not use softirqMax, but
|
||||
// use softirqArrayMax as the size of the array
|
||||
softirqArrayMax = 16 // must be 2^order
|
||||
)
|
||||
|
||||
var monTracerIsRunning bool
|
||||
|
||||
func latZoneName(latZone int) string {
|
||||
switch latZone {
|
||||
case latZONE0: // 0 ~ 10us
|
||||
return "0~10 us"
|
||||
case latZONE1: // 10us ~ 100us
|
||||
return "10us ~ 100us"
|
||||
case latZONE2: // 100us ~ 1ms
|
||||
return "100us ~ 1ms"
|
||||
case latZONE3: // 1ms ~ inf
|
||||
return "1ms ~ inf"
|
||||
default:
|
||||
return "ERR_ZONE"
|
||||
}
|
||||
}
|
||||
|
||||
func siTypeName(siType int) string {
|
||||
switch siType {
|
||||
case softirqHi:
|
||||
return "HI"
|
||||
case softirqTime:
|
||||
return "TIMER"
|
||||
case softirqNetTx:
|
||||
return "NET_TX"
|
||||
case softirqNetRx:
|
||||
return "NET_RX"
|
||||
case softirqBlock:
|
||||
return "BLOCK"
|
||||
case softirqIrqPoll:
|
||||
return "IRQ_POLL"
|
||||
case softirqTasklet:
|
||||
return "TASKLET"
|
||||
case softirqSched:
|
||||
return "SCHED"
|
||||
case softirqHrtimer:
|
||||
return "HRTIMER"
|
||||
case sofirqRcu:
|
||||
return "RCU"
|
||||
default:
|
||||
return "ERR_TYPE"
|
||||
}
|
||||
}
|
||||
|
||||
func getMonsoftirqInfo() ([]*metric.Data, error) {
|
||||
siLabel := make(map[string]string)
|
||||
monsoftirqMetric := []*metric.Data{}
|
||||
|
||||
for siType, lats := range &monsoftirqData.SoftirqLat {
|
||||
if (1<<siType)&defaultSiTypeMask == 0 {
|
||||
continue
|
||||
}
|
||||
siLabel["softirqType"] = siTypeName(siType)
|
||||
|
||||
for zone, count := range lats {
|
||||
siLabel["zone"] = latZoneName(zone)
|
||||
monsoftirqMetric = append(monsoftirqMetric, metric.NewGaugeData("latency", float64(count), "softirq latency", siLabel))
|
||||
}
|
||||
}
|
||||
|
||||
return monsoftirqMetric, nil
|
||||
}
|
||||
|
||||
func (c *monsoftirqTracing) Update() ([]*metric.Data, error) {
|
||||
if !monTracerIsRunning {
|
||||
return nil, nil
|
||||
}
|
||||
monsoftirqMetric, err := getMonsoftirqInfo()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return monsoftirqMetric, nil
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/internal/bpf"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/utils/bpfutil"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
)
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("monsoftirq", newSoftirqCollector)
|
||||
}
|
||||
|
||||
func newSoftirqCollector() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &monsoftirqTracing{},
|
||||
Internal: 10,
|
||||
Flag: tracing.FlagTracing | tracing.FlagMetric,
|
||||
}, nil
|
||||
}
|
||||
|
||||
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/monsoftirq_tracing.c -o $BPF_DIR/monsoftirq_tracing.o
|
||||
|
||||
type monsoftirqBpfData struct {
|
||||
SoftirqLat [softirqArrayMax][latZoneMax]uint64
|
||||
}
|
||||
|
||||
type monsoftirqTracing struct{}
|
||||
|
||||
var monsoftirqData monsoftirqBpfData
|
||||
|
||||
// Start monsoftirq work, load bpf and wait data form perfevent
|
||||
func (c *monsoftirqTracing) Start(ctx context.Context) error {
|
||||
// load bpf.
|
||||
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to LoadBpf, err: %w", err)
|
||||
}
|
||||
defer b.Close()
|
||||
|
||||
if err = b.Attach(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
childCtx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
b.WaitDetachByBreaker(childCtx, cancel)
|
||||
|
||||
ticker := time.NewTicker(2 * time.Second)
|
||||
defer ticker.Stop()
|
||||
|
||||
monTracerIsRunning = true
|
||||
defer func() { monTracerIsRunning = false }()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-childCtx.Done():
|
||||
return nil
|
||||
case <-ticker.C:
|
||||
item, err := b.ReadMap(b.MapIDByName("softirq_lats"), []byte{0, 0, 0, 0})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read softirq_lats: %w", err)
|
||||
}
|
||||
buf := bytes.NewReader(item)
|
||||
if err = binary.Read(buf, binary.LittleEndian, &monsoftirqData); err != nil {
|
||||
log.Errorf("can't read softirq_lats: %v", err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"github.com/prometheus/procfs"
|
||||
|
||||
"huatuo-bamai/internal/conf"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
)
|
||||
|
||||
type mountPointStatCollector struct{}
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("mountpoint_perm", newMountPointStat)
|
||||
}
|
||||
|
||||
func newMountPointStat() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &mountPointStatCollector{},
|
||||
Flag: tracing.FlagMetric,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *mountPointStatCollector) Update() ([]*metric.Data, error) {
|
||||
mountinfo, err := procfs.GetMounts()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
filter := newFieldFilter("", conf.Get().MetricCollector.MountPointStat.IncludedMountPoints)
|
||||
|
||||
metrics := []*metric.Data{}
|
||||
for _, v := range mountinfo {
|
||||
if filter.ignored(v.MountPoint) {
|
||||
continue
|
||||
}
|
||||
|
||||
mountTag := map[string]string{"mountpoint": v.MountPoint}
|
||||
ro := 0
|
||||
if _, ok := v.Options["ro"]; ok {
|
||||
ro = 1
|
||||
}
|
||||
|
||||
metrics = append(metrics,
|
||||
metric.NewGaugeData("ro", float64(ro), "whether mountpoint is readonly or not", mountTag))
|
||||
}
|
||||
return metrics, nil
|
||||
}
|
|
@ -0,0 +1,261 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
// ref: https://github.com/prometheus/node_exporter/tree/master/collector
|
||||
// - netdev_common.go
|
||||
// - netdev_linuxt.go
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
|
||||
"huatuo-bamai/internal/conf"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/pod"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
|
||||
"github.com/jsimonetti/rtnetlink"
|
||||
"github.com/mdlayher/netlink"
|
||||
"github.com/prometheus/procfs"
|
||||
)
|
||||
|
||||
type (
|
||||
netdevStats map[string]map[string]uint64
|
||||
netdevCollector struct{}
|
||||
)
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("netdev", newNetdevCollector)
|
||||
}
|
||||
|
||||
func newNetdevCollector() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &netdevCollector{},
|
||||
Flag: tracing.FlagMetric,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *netdevCollector) Update() ([]*metric.Data, error) {
|
||||
filter := newFieldFilter(conf.Get().MetricCollector.Netdev.IgnoredDevices,
|
||||
conf.Get().MetricCollector.Netdev.AcceptDevices)
|
||||
|
||||
log.Debugf("Updating netdev metrics by filter: %v", filter)
|
||||
|
||||
// normal containers
|
||||
containers, err := pod.GetNormalContainers()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("GetNormalContainers: %w", err)
|
||||
}
|
||||
|
||||
// support the empty container
|
||||
if containers == nil {
|
||||
containers = make(map[string]*pod.Container)
|
||||
}
|
||||
// append host into containers
|
||||
containers[""] = nil
|
||||
|
||||
var metrics []*metric.Data
|
||||
for _, container := range containers {
|
||||
devStats, err := c.getStats(container, filter)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("couldn't get netdev statistic for container %v: %w", container, err)
|
||||
}
|
||||
|
||||
for dev, stats := range devStats {
|
||||
for key, val := range stats {
|
||||
tags := map[string]string{"device": dev}
|
||||
if container != nil {
|
||||
metrics = append(metrics,
|
||||
metric.NewContainerGaugeData(container, key+"_total", float64(val), fmt.Sprintf("Network device statistic %s.", key), tags))
|
||||
} else {
|
||||
metrics = append(metrics,
|
||||
metric.NewGaugeData(key+"_total", float64(val), fmt.Sprintf("Network device statistic %s.", key), tags))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log.Debugf("Updated netdev metrics by filter %v: %v", filter, metrics)
|
||||
return metrics, nil
|
||||
}
|
||||
|
||||
func (c *netdevCollector) getStats(container *pod.Container, filter *fieldFilter) (netdevStats, error) {
|
||||
if conf.Get().MetricCollector.Netdev.EnableNetlink {
|
||||
return c.netlinkStats(container, filter)
|
||||
}
|
||||
return c.procStats(container, filter)
|
||||
}
|
||||
|
||||
func (c *netdevCollector) netlinkStats(container *pod.Container, filter *fieldFilter) (netdevStats, error) {
|
||||
pid := 1 // host
|
||||
if container != nil {
|
||||
pid = container.InitPid
|
||||
}
|
||||
|
||||
file, err := os.Open(filepath.Join("/proc", strconv.Itoa(pid), "ns/net"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
conn, err := rtnetlink.Dial(&netlink.Config{NetNS: int(file.Fd())})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
links, err := conn.Link.List()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
metrics := netdevStats{}
|
||||
for _, msg := range links {
|
||||
if msg.Attributes == nil {
|
||||
log.Debug("No netlink attributes, skipping")
|
||||
continue
|
||||
}
|
||||
name := msg.Attributes.Name
|
||||
stats := msg.Attributes.Stats64
|
||||
if stats32 := msg.Attributes.Stats; stats == nil && stats32 != nil {
|
||||
stats = &rtnetlink.LinkStats64{
|
||||
RXPackets: uint64(stats32.RXPackets),
|
||||
TXPackets: uint64(stats32.TXPackets),
|
||||
RXBytes: uint64(stats32.RXBytes),
|
||||
TXBytes: uint64(stats32.TXBytes),
|
||||
RXErrors: uint64(stats32.RXErrors),
|
||||
TXErrors: uint64(stats32.TXErrors),
|
||||
RXDropped: uint64(stats32.RXDropped),
|
||||
TXDropped: uint64(stats32.TXDropped),
|
||||
Multicast: uint64(stats32.Multicast),
|
||||
Collisions: uint64(stats32.Collisions),
|
||||
RXLengthErrors: uint64(stats32.RXLengthErrors),
|
||||
RXOverErrors: uint64(stats32.RXOverErrors),
|
||||
RXCRCErrors: uint64(stats32.RXCRCErrors),
|
||||
RXFrameErrors: uint64(stats32.RXFrameErrors),
|
||||
RXFIFOErrors: uint64(stats32.RXFIFOErrors),
|
||||
RXMissedErrors: uint64(stats32.RXMissedErrors),
|
||||
TXAbortedErrors: uint64(stats32.TXAbortedErrors),
|
||||
TXCarrierErrors: uint64(stats32.TXCarrierErrors),
|
||||
TXFIFOErrors: uint64(stats32.TXFIFOErrors),
|
||||
TXHeartbeatErrors: uint64(stats32.TXHeartbeatErrors),
|
||||
TXWindowErrors: uint64(stats32.TXWindowErrors),
|
||||
RXCompressed: uint64(stats32.RXCompressed),
|
||||
TXCompressed: uint64(stats32.TXCompressed),
|
||||
RXNoHandler: uint64(stats32.RXNoHandler),
|
||||
RXOtherhostDropped: 0,
|
||||
}
|
||||
}
|
||||
|
||||
if filter.ignored(name) {
|
||||
log.Debugf("Ignoring device: %s", name)
|
||||
continue
|
||||
}
|
||||
|
||||
// Make sure we don't panic when accessing `stats` attributes below.
|
||||
if stats == nil {
|
||||
log.Debug("No netlink stats, skipping")
|
||||
continue
|
||||
}
|
||||
|
||||
// https://github.com/torvalds/linux/blob/master/include/uapi/linux/if_link.h#L42-L246
|
||||
metrics[name] = map[string]uint64{
|
||||
"receive_packets": stats.RXPackets,
|
||||
"transmit_packets": stats.TXPackets,
|
||||
"receive_bytes": stats.RXBytes,
|
||||
"transmit_bytes": stats.TXBytes,
|
||||
"receive_errors": stats.RXErrors,
|
||||
"transmit_errors": stats.TXErrors,
|
||||
"receive_dropped": stats.RXDropped,
|
||||
"transmit_dropped": stats.TXDropped,
|
||||
"multicast": stats.Multicast,
|
||||
"collisions": stats.Collisions,
|
||||
|
||||
// detailed rx_errors
|
||||
"receive_length_errors": stats.RXLengthErrors,
|
||||
"receive_over_errors": stats.RXOverErrors,
|
||||
"receive_crc_errors": stats.RXCRCErrors,
|
||||
"receive_frame_errors": stats.RXFrameErrors,
|
||||
"receive_fifo_errors": stats.RXFIFOErrors,
|
||||
"receive_missed_errors": stats.RXMissedErrors,
|
||||
|
||||
// detailed tx_errors
|
||||
"transmit_aborted_errors": stats.TXAbortedErrors,
|
||||
"transmit_carrier_errors": stats.TXCarrierErrors,
|
||||
"transmit_fifo_errors": stats.TXFIFOErrors,
|
||||
"transmit_heartbeat_errors": stats.TXHeartbeatErrors,
|
||||
"transmit_window_errors": stats.TXWindowErrors,
|
||||
|
||||
// for cslip etc
|
||||
"receive_compressed": stats.RXCompressed,
|
||||
"transmit_compressed": stats.TXCompressed,
|
||||
"receive_nohandler": stats.RXNoHandler,
|
||||
}
|
||||
}
|
||||
|
||||
return metrics, nil
|
||||
}
|
||||
|
||||
func (c *netdevCollector) procStats(container *pod.Container, filter *fieldFilter) (netdevStats, error) {
|
||||
pid := 1 // host
|
||||
if container != nil {
|
||||
pid = container.InitPid
|
||||
}
|
||||
|
||||
fs, err := procfs.NewProc(pid)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open procfs: %w", err)
|
||||
}
|
||||
|
||||
netdev, err := fs.NetDev()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse /proc/[%d]/net/dev: %w", pid, err)
|
||||
}
|
||||
|
||||
metrics := netdevStats{}
|
||||
for name := range netdev {
|
||||
stats := netdev[name]
|
||||
|
||||
if filter.ignored(name) {
|
||||
log.Debugf("Ignoring device: %s", name)
|
||||
continue
|
||||
}
|
||||
|
||||
metrics[name] = map[string]uint64{
|
||||
"receive_bytes": stats.RxBytes,
|
||||
"receive_packets": stats.RxPackets,
|
||||
"receive_errors": stats.RxErrors,
|
||||
"receive_dropped": stats.RxDropped,
|
||||
"receive_fifo": stats.RxFIFO,
|
||||
"receive_frame": stats.RxFrame,
|
||||
"receive_compressed": stats.RxCompressed,
|
||||
"receive_multicast": stats.RxMulticast,
|
||||
"transmit_bytes": stats.TxBytes,
|
||||
"transmit_packets": stats.TxPackets,
|
||||
"transmit_errors": stats.TxErrors,
|
||||
"transmit_dropped": stats.TxDropped,
|
||||
"transmit_fifo": stats.TxFIFO,
|
||||
"transmit_colls": stats.TxCollisions,
|
||||
"transmit_carrier": stats.TxCarrier,
|
||||
"transmit_compressed": stats.TxCompressed,
|
||||
}
|
||||
}
|
||||
|
||||
return metrics, nil
|
||||
}
|
|
@ -0,0 +1,162 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
// ref: https://github.com/prometheus/node_exporter/tree/master/collector
|
||||
// - netstat_linux.go
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"huatuo-bamai/internal/conf"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/pod"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
)
|
||||
|
||||
type netstatCollector struct{}
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("netstat", newNetstatCollector)
|
||||
}
|
||||
|
||||
func newNetstatCollector() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &netstatCollector{},
|
||||
Flag: tracing.FlagMetric,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *netstatCollector) Update() ([]*metric.Data, error) {
|
||||
filter := newFieldFilter(conf.Get().MetricCollector.Netstat.ExcludedMetrics, conf.Get().MetricCollector.Netstat.IncludedMetrics)
|
||||
log.Debugf("Updating netstat metrics by filter: %v", filter)
|
||||
|
||||
// normal containers
|
||||
containers, err := pod.GetNormalContainers()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("GetNormalContainers: %w", err)
|
||||
}
|
||||
|
||||
// support the empty container
|
||||
if containers == nil {
|
||||
containers = make(map[string]*pod.Container)
|
||||
}
|
||||
// append host into containers
|
||||
containers[""] = nil
|
||||
|
||||
var metrics []*metric.Data
|
||||
for _, container := range containers {
|
||||
m, err := c.getStatMetrics(container, filter)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("couldn't get netstat metrics for container %v: %w", container, err)
|
||||
}
|
||||
metrics = append(metrics, m...)
|
||||
}
|
||||
|
||||
log.Debugf("Updated netstat metrics by filter %v: %v", filter, metrics)
|
||||
return metrics, nil
|
||||
}
|
||||
|
||||
func (c *netstatCollector) getStatMetrics(container *pod.Container, filter *fieldFilter) ([]*metric.Data, error) {
|
||||
pid := 1 // host
|
||||
if container != nil {
|
||||
pid = container.InitPid
|
||||
}
|
||||
|
||||
pidProc := filepath.Join("/proc", strconv.Itoa(pid))
|
||||
netStats, err := c.procNetstats(filepath.Join(pidProc, "net/netstat"))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("couldn't get netstats for %v: %w", container, err)
|
||||
}
|
||||
snmpStats, err := c.procNetstats(filepath.Join(pidProc, "net/snmp"))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("couldn't get SNMP stats for %v: %w", container, err)
|
||||
}
|
||||
|
||||
// Merge the results of snmpStats into netStats (collisions are possible, but
|
||||
// we know that the keys are always unique for the given use case).
|
||||
for k, v := range snmpStats {
|
||||
netStats[k] = v
|
||||
}
|
||||
|
||||
var metrics []*metric.Data
|
||||
for protocol, protocolStats := range netStats {
|
||||
for name, value := range protocolStats {
|
||||
key := protocol + "_" + name
|
||||
v, err := strconv.ParseFloat(value, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid value %s in netstats for %v: %w", value, container, err)
|
||||
}
|
||||
|
||||
if filter.ignored(key) {
|
||||
log.Debugf("Ignoring netstat metric %s", key)
|
||||
continue
|
||||
}
|
||||
|
||||
if container != nil {
|
||||
metrics = append(metrics,
|
||||
metric.NewContainerGaugeData(container, key, v, fmt.Sprintf("Statistic %s.", protocol+name), nil))
|
||||
} else {
|
||||
metrics = append(metrics,
|
||||
metric.NewGaugeData(key, v, fmt.Sprintf("Statistic %s.", protocol+name), nil))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return metrics, nil
|
||||
}
|
||||
|
||||
func (c *netstatCollector) procNetstats(fileName string) (map[string]map[string]string, error) {
|
||||
file, err := os.Open(fileName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
var (
|
||||
netStats = map[string]map[string]string{}
|
||||
scanner = bufio.NewScanner(file)
|
||||
)
|
||||
|
||||
for scanner.Scan() {
|
||||
nameParts := strings.Split(scanner.Text(), " ")
|
||||
scanner.Scan()
|
||||
valueParts := strings.Split(scanner.Text(), " ")
|
||||
// Remove trailing :.
|
||||
protocol := nameParts[0][:len(nameParts[0])-1]
|
||||
|
||||
// protocol: only for Tcp/TcpExt
|
||||
if protocol != "Tcp" && protocol != "TcpExt" {
|
||||
continue
|
||||
}
|
||||
|
||||
netStats[protocol] = map[string]string{}
|
||||
if len(nameParts) != len(valueParts) {
|
||||
return nil, fmt.Errorf("mismatch field count mismatch in %s: %s",
|
||||
fileName, protocol)
|
||||
}
|
||||
for i := 1; i < len(nameParts); i++ {
|
||||
netStats[protocol][nameParts[i]] = valueParts[i]
|
||||
}
|
||||
}
|
||||
|
||||
return netStats, scanner.Err()
|
||||
}
|
|
@ -0,0 +1,132 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
// ref: https://github.com/prometheus/node_exporter/tree/master/collector
|
||||
// - qdisc_linux.go
|
||||
|
||||
import (
|
||||
"huatuo-bamai/internal/conf"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
|
||||
"github.com/ema/qdisc"
|
||||
)
|
||||
|
||||
type qdiscStats struct {
|
||||
ifaceName string
|
||||
kind string
|
||||
bytes uint64
|
||||
packets uint32
|
||||
drops uint32
|
||||
requeues uint32
|
||||
overlimits uint32
|
||||
qlen uint32
|
||||
backlog uint32
|
||||
}
|
||||
|
||||
const tcHMajMask = 0xFFFF0000
|
||||
|
||||
type qdiscCollector struct{}
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("qdisc", newQdiscCollector)
|
||||
}
|
||||
|
||||
func newQdiscCollector() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &qdiscCollector{},
|
||||
Flag: tracing.FlagMetric,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// sum of same level(parent major) for a device, example:
|
||||
// <device0> (1+2, 3)
|
||||
// 1: qidsc <kind> handle0 parent0
|
||||
// 2: qidsc <kind> handle1 parent0
|
||||
// 3: qidsc <kind> handle2 parent1
|
||||
//
|
||||
// <device1> (1, 2+3)
|
||||
// 1: qidsc <kind> handle0 parent0
|
||||
// 2: qidsc <kind> handle1 parent1
|
||||
// 3: qidsc <kind> handle2 parent1
|
||||
func (c *qdiscCollector) Update() ([]*metric.Data, error) {
|
||||
filter := newFieldFilter(conf.Get().MetricCollector.Qdisc.IgnoredDevices,
|
||||
conf.Get().MetricCollector.Qdisc.AcceptDevices)
|
||||
|
||||
allQdisc, err := qdisc.Get()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
allQdiscMap := make(map[string]map[uint32]*qdiscStats)
|
||||
for _, q := range allQdisc {
|
||||
if filter.ignored(q.IfaceName) || q.Kind == "noqueue" {
|
||||
continue
|
||||
}
|
||||
|
||||
parentMaj := (q.Parent & tcHMajMask) >> 16
|
||||
if _, ok := allQdiscMap[q.IfaceName]; !ok {
|
||||
allQdiscMap[q.IfaceName] = make(map[uint32]*qdiscStats)
|
||||
}
|
||||
netQdisc, ok := allQdiscMap[q.IfaceName][parentMaj]
|
||||
if !ok {
|
||||
allQdiscMap[q.IfaceName][parentMaj] = &qdiscStats{
|
||||
ifaceName: q.IfaceName,
|
||||
kind: q.Kind,
|
||||
bytes: q.Bytes,
|
||||
packets: q.Packets,
|
||||
drops: q.Drops,
|
||||
requeues: q.Requeues,
|
||||
overlimits: q.Overlimits,
|
||||
qlen: q.Qlen,
|
||||
backlog: q.Backlog,
|
||||
}
|
||||
} else {
|
||||
netQdisc.bytes += q.Bytes
|
||||
netQdisc.packets += q.Packets
|
||||
netQdisc.drops += q.Drops
|
||||
netQdisc.requeues += q.Requeues
|
||||
netQdisc.overlimits += q.Overlimits
|
||||
netQdisc.qlen += q.Qlen
|
||||
netQdisc.backlog += q.Backlog
|
||||
}
|
||||
}
|
||||
|
||||
var metrics []*metric.Data
|
||||
for _, netdevQdisc := range allQdiscMap {
|
||||
for _, oneQdisc := range netdevQdisc {
|
||||
tags := map[string]string{"device": oneQdisc.ifaceName, "kind": oneQdisc.kind}
|
||||
metrics = append(metrics,
|
||||
metric.NewGaugeData("bytes_total", float64(oneQdisc.bytes),
|
||||
"Number of bytes sent.", tags),
|
||||
metric.NewGaugeData("packets_total", float64(oneQdisc.packets),
|
||||
"Number of packets sent.", tags),
|
||||
metric.NewGaugeData("drops_total", float64(oneQdisc.drops),
|
||||
"Number of packet drops.", tags),
|
||||
metric.NewGaugeData("requeues_total", float64(oneQdisc.requeues),
|
||||
"Number of packets dequeued, not transmitted, and requeued.", tags),
|
||||
metric.NewGaugeData("overlimits_total", float64(oneQdisc.overlimits),
|
||||
"Number of packet overlimits.", tags),
|
||||
metric.NewGaugeData("current_queue_length", float64(oneQdisc.qlen),
|
||||
"Number of packets currently in queue to be sent.", tags),
|
||||
metric.NewGaugeData("backlog", float64(oneQdisc.backlog),
|
||||
"Number of bytes currently in queue to be sent.", tags),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return metrics, nil
|
||||
}
|
|
@ -0,0 +1,82 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"huatuo-bamai/internal/pod"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
)
|
||||
|
||||
type runqlatCollector struct {
|
||||
runqlatMetric []*metric.Data
|
||||
}
|
||||
|
||||
func init() {
|
||||
_ = pod.RegisterContainerLifeResources("runqlat", reflect.TypeOf(&latencyBpfData{}))
|
||||
tracing.RegisterEventTracing("runqlat", newRunqlatCollector)
|
||||
}
|
||||
|
||||
func newRunqlatCollector() (*tracing.EventTracingAttr, error) {
|
||||
collector := &runqlatCollector{
|
||||
runqlatMetric: []*metric.Data{
|
||||
metric.NewGaugeData("g_nlat_01", 0, "nlat_01 of host", nil),
|
||||
metric.NewGaugeData("g_nlat_02", 0, "nlat_02 of host", nil),
|
||||
metric.NewGaugeData("g_nlat_03", 0, "nlat_03 of host", nil),
|
||||
metric.NewGaugeData("g_nlat_04", 0, "nlat_04 of host", nil),
|
||||
},
|
||||
}
|
||||
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: collector,
|
||||
Internal: 10,
|
||||
Flag: tracing.FlagTracing | tracing.FlagMetric,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *runqlatCollector) Update() ([]*metric.Data, error) {
|
||||
runqlatMetric := []*metric.Data{}
|
||||
|
||||
if !runqlatRunning {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
containers, err := pod.GetContainersByType(pod.ContainerTypeNormal)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("GetContainersByType: %w", err)
|
||||
}
|
||||
|
||||
for _, container := range containers {
|
||||
metrics := container.LifeResouces("runqlat").(*latencyBpfData)
|
||||
|
||||
runqlatMetric = append(runqlatMetric,
|
||||
metric.NewContainerGaugeData(container, "nlat_01", float64(metrics.NumLatency01), "nlat_01", nil),
|
||||
metric.NewContainerGaugeData(container, "nlat_02", float64(metrics.NumLatency02), "nlat_02", nil),
|
||||
metric.NewContainerGaugeData(container, "nlat_03", float64(metrics.NumLatency03), "nlat_03", nil),
|
||||
metric.NewContainerGaugeData(container, "nlat_04", float64(metrics.NumLatency04), "nlat_04", nil))
|
||||
}
|
||||
|
||||
c.runqlatMetric[0].Value = float64(globalRunqlat.NumLatency01)
|
||||
c.runqlatMetric[1].Value = float64(globalRunqlat.NumLatency02)
|
||||
c.runqlatMetric[2].Value = float64(globalRunqlat.NumLatency03)
|
||||
c.runqlatMetric[3].Value = float64(globalRunqlat.NumLatency04)
|
||||
|
||||
runqlatMetric = append(runqlatMetric, c.runqlatMetric...)
|
||||
|
||||
return runqlatMetric, nil
|
||||
}
|
|
@ -0,0 +1,120 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/internal/bpf"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/pod"
|
||||
"huatuo-bamai/internal/utils/bpfutil"
|
||||
)
|
||||
|
||||
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/runqlat_tracing.c -o $BPF_DIR/runqlat_tracing.o
|
||||
|
||||
type latencyBpfData struct {
|
||||
NumVoluntarySwitch uint64
|
||||
NumInVoluntarySwitch uint64
|
||||
NumLatency01 uint64
|
||||
NumLatency02 uint64
|
||||
NumLatency03 uint64
|
||||
NumLatency04 uint64
|
||||
}
|
||||
|
||||
var (
|
||||
globalRunqlat latencyBpfData
|
||||
runqlatRunning bool
|
||||
)
|
||||
|
||||
func startRunqlatTracerWork(ctx context.Context) error {
|
||||
// load bpf.
|
||||
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to LoadBpf, err: %w", err)
|
||||
}
|
||||
defer b.Close()
|
||||
|
||||
if err = b.Attach(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
childCtx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
b.WaitDetachByBreaker(childCtx, cancel)
|
||||
|
||||
runqlatRunning = true
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil
|
||||
default:
|
||||
var css uint64
|
||||
|
||||
items, err := b.DumpMapByName("cpu_tg_metric")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to dump cpu_tg_metric: %w", err)
|
||||
}
|
||||
for _, v := range items {
|
||||
buf := bytes.NewReader(v.Key)
|
||||
if err = binary.Read(buf, binary.LittleEndian, &css); err != nil {
|
||||
return fmt.Errorf("can't read cpu_tg_metric key: %w", err)
|
||||
}
|
||||
container, _ := pod.GetContainerByCSS(css, "cpu")
|
||||
if container == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
buf = bytes.NewReader(v.Value)
|
||||
if err = binary.Read(buf, binary.LittleEndian, container.LifeResouces("runqlat").(*latencyBpfData)); err != nil {
|
||||
return fmt.Errorf("can't read cpu_tg_metric value: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
item, err := b.ReadMap(b.MapIDByName("cpu_host_metric"), []byte{0, 0, 0, 0})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read cpu_host_metric: %w", err)
|
||||
}
|
||||
buf := bytes.NewReader(item)
|
||||
if err = binary.Read(buf, binary.LittleEndian, &globalRunqlat); err != nil {
|
||||
log.Errorf("can't read cpu_host_metric: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
time.Sleep(2 * time.Second)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Start runqlat work, load bpf and wait data form perfevent
|
||||
func (c *runqlatCollector) Start(ctx context.Context) error {
|
||||
err := startRunqlatTracerWork(ctx)
|
||||
|
||||
containers, _ := pod.GetContainersByType(pod.ContainerTypeNormal)
|
||||
for _, container := range containers {
|
||||
runqlatData := container.LifeResouces("runqlat").(*latencyBpfData)
|
||||
*runqlatData = latencyBpfData{}
|
||||
}
|
||||
|
||||
runqlatRunning = false
|
||||
|
||||
return err
|
||||
}
|
|
@ -0,0 +1,122 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/utils/cgrouputil"
|
||||
"huatuo-bamai/internal/utils/parseutil"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
|
||||
"github.com/prometheus/procfs"
|
||||
)
|
||||
|
||||
const (
|
||||
// CLK_TCK is a constant on Linux for all architectures except alpha and ia64.
|
||||
// See e.g.
|
||||
// https://git.musl-libc.org/cgit/musl/tree/src/conf/sysconf.c#n30
|
||||
// https://github.com/containerd/cgroups/pull/12
|
||||
// https://lore.kernel.org/lkml/agtlq6$iht$1@penguin.transmeta.com/
|
||||
userHZ int64 = 100
|
||||
)
|
||||
|
||||
type runtimeCollector struct {
|
||||
oldStat *procfs.ProcStat
|
||||
oldTs int64
|
||||
}
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("runtime", newQosCollector)
|
||||
}
|
||||
|
||||
func newQosCollector() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &runtimeCollector{},
|
||||
Flag: tracing.FlagMetric,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *runtimeCollector) Update() ([]*metric.Data, error) {
|
||||
runtimeMetric := make([]*metric.Data, 0)
|
||||
|
||||
p, err := procfs.Self()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
runtimeMetric = append(runtimeMetric, getCPUMetric(c, &p)...)
|
||||
runtimeMetric = append(runtimeMetric, getMemoryMetric(&p)...)
|
||||
|
||||
return runtimeMetric, nil
|
||||
}
|
||||
|
||||
func getCPUMetric(c *runtimeCollector, p *procfs.Proc) []*metric.Data {
|
||||
stat, err := p.Stat()
|
||||
if err != nil {
|
||||
log.Warnf("not get process stat: %v", err)
|
||||
return nil
|
||||
}
|
||||
ts := time.Now().Unix()
|
||||
|
||||
if c.oldStat == nil {
|
||||
c.oldStat = &stat
|
||||
}
|
||||
|
||||
if c.oldTs == 0 {
|
||||
c.oldTs = ts
|
||||
return nil
|
||||
}
|
||||
|
||||
data := make([]*metric.Data, 2)
|
||||
duration := ts - c.oldTs
|
||||
|
||||
// huatuo-bamai.cpu.user(*100)
|
||||
user := float64(stat.UTime-c.oldStat.UTime) / float64(userHZ*duration)
|
||||
data[0] = metric.NewGaugeData("cpu_user", user*100, "user cpu", nil)
|
||||
|
||||
// huatuo-bamai.cpu.sys(*100)
|
||||
sys := float64(stat.STime-c.oldStat.STime) / float64(userHZ*duration)
|
||||
data[1] = metric.NewGaugeData("cpu_sys", sys*100, "sys cpu", nil)
|
||||
|
||||
// save stat
|
||||
c.oldStat = &stat
|
||||
c.oldTs = ts
|
||||
|
||||
return data
|
||||
}
|
||||
|
||||
func getMemoryMetric(p *procfs.Proc) []*metric.Data {
|
||||
data := make([]*metric.Data, 3)
|
||||
status, err := p.NewStatus()
|
||||
if err != nil {
|
||||
log.Warnf("not get process status: %v", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
data[0] = metric.NewGaugeData("memory_vss", float64(status.VmSize)/1024, "memory vss", nil)
|
||||
data[1] = metric.NewGaugeData("memory_rss", float64(status.VmRSS)/1024, "memory rss", nil)
|
||||
|
||||
rssI, err := parseutil.ReadUint(cgrouputil.V1MemoryPath() + "/huatuo-bamai/memory.usage_in_bytes")
|
||||
if err != nil {
|
||||
log.Warnf("can't ParseUint, err: %v", err)
|
||||
return nil
|
||||
}
|
||||
data[2] = metric.NewGaugeData("memory_cgroup_rss", float64(rssI)/1024, "memory cgroup rss", nil)
|
||||
|
||||
return data
|
||||
}
|
|
@ -0,0 +1,188 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
// ref: https://github.com/prometheus/node_exporter/tree/master/collector
|
||||
// - sockstat_linux.go
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/pod"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
|
||||
"github.com/prometheus/procfs"
|
||||
)
|
||||
|
||||
type sockstatCollector struct{}
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("sockstat", newSockstatCollector)
|
||||
}
|
||||
|
||||
func newSockstatCollector() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &sockstatCollector{},
|
||||
Flag: tracing.FlagMetric,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *sockstatCollector) Update() ([]*metric.Data, error) {
|
||||
log.Debugf("Updating sockstat metrics")
|
||||
|
||||
// normal containers
|
||||
containers, err := pod.GetNormalContainers()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("GetNormalContainers: %w", err)
|
||||
}
|
||||
|
||||
// support the empty container
|
||||
if containers == nil {
|
||||
containers = make(map[string]*pod.Container)
|
||||
}
|
||||
// append host into containers
|
||||
containers[""] = nil
|
||||
|
||||
var metrics []*metric.Data
|
||||
for _, container := range containers {
|
||||
m, err := c.procStatMetrics(container)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("couldn't get sockstat metrics for container %v: %w", container, err)
|
||||
}
|
||||
metrics = append(metrics, m...)
|
||||
}
|
||||
|
||||
log.Debugf("Updated sockstat metrics: %v", metrics)
|
||||
return metrics, nil
|
||||
}
|
||||
|
||||
func (c *sockstatCollector) procStatMetrics(container *pod.Container) ([]*metric.Data, error) {
|
||||
pid := 1 // host
|
||||
if container != nil {
|
||||
pid = container.InitPid
|
||||
}
|
||||
|
||||
// NOTE: non-standard using procfs.NewFS.
|
||||
fs, err := procfs.NewFS(filepath.Join("/proc", strconv.Itoa(pid)))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open procfs: %w", err)
|
||||
}
|
||||
|
||||
// If IPv4 and/or IPv6 are disabled on this kernel, handle it gracefully.
|
||||
stat, err := fs.NetSockstat()
|
||||
switch {
|
||||
case err == nil:
|
||||
case errors.Is(err, os.ErrNotExist):
|
||||
log.Debug("IPv4 sockstat statistics not found, skipping")
|
||||
default:
|
||||
return nil, fmt.Errorf("failed to get IPv4 sockstat data: %w", err)
|
||||
}
|
||||
|
||||
if stat == nil { // nothing to do.
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var metrics []*metric.Data
|
||||
|
||||
// If sockstat contains the number of used sockets, export it.
|
||||
if stat.Used != nil {
|
||||
if container != nil {
|
||||
metrics = append(metrics,
|
||||
metric.NewContainerGaugeData(container, "sockets_used", float64(*stat.Used), "Number of IPv4 sockets in use.", nil))
|
||||
} else {
|
||||
metrics = append(metrics,
|
||||
metric.NewGaugeData("sockets_used", float64(*stat.Used), "Number of IPv4 sockets in use.", nil))
|
||||
}
|
||||
}
|
||||
|
||||
// A name and optional value for a sockstat metric.
|
||||
type ssPair struct {
|
||||
name string
|
||||
v *int
|
||||
}
|
||||
|
||||
// Previously these metric names were generated directly from the file output.
|
||||
// In order to keep the same level of compatibility, we must map the fields
|
||||
// to their correct names.
|
||||
for i := range stat.Protocols {
|
||||
p := stat.Protocols[i]
|
||||
pairs := []ssPair{
|
||||
{
|
||||
name: "inuse",
|
||||
v: &p.InUse,
|
||||
},
|
||||
{
|
||||
name: "orphan",
|
||||
v: p.Orphan,
|
||||
},
|
||||
{
|
||||
name: "tw",
|
||||
v: p.TW,
|
||||
},
|
||||
{
|
||||
name: "alloc",
|
||||
v: p.Alloc,
|
||||
},
|
||||
{
|
||||
name: "mem",
|
||||
v: p.Mem,
|
||||
},
|
||||
{
|
||||
name: "memory",
|
||||
v: p.Memory,
|
||||
},
|
||||
}
|
||||
|
||||
// Also export mem_bytes values for sockets which have a mem value
|
||||
// stored in pages.
|
||||
if p.Mem != nil {
|
||||
v := *p.Mem * skMemQuantum
|
||||
pairs = append(pairs, ssPair{
|
||||
name: "mem_bytes",
|
||||
v: &v,
|
||||
})
|
||||
}
|
||||
|
||||
for _, pair := range pairs {
|
||||
if pair.v == nil {
|
||||
// This value is not set for this protocol; nothing to do.
|
||||
continue
|
||||
}
|
||||
|
||||
// mem, mem_bytes are only for `Host` environment.
|
||||
if container != nil && (pair.name == "mem" || pair.name == "mem_bytes") {
|
||||
continue
|
||||
}
|
||||
|
||||
if container != nil {
|
||||
metrics = append(metrics,
|
||||
metric.NewContainerGaugeData(container, fmt.Sprintf("%s_%s", p.Protocol, pair.name), float64(*pair.v),
|
||||
fmt.Sprintf("Number of %s sockets in state %s.", p.Protocol, pair.name), nil))
|
||||
} else {
|
||||
metrics = append(metrics,
|
||||
metric.NewGaugeData(fmt.Sprintf("%s_%s", p.Protocol, pair.name), float64(*pair.v),
|
||||
fmt.Sprintf("Number of %s sockets in state %s.", p.Protocol, pair.name), nil))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return metrics, nil
|
||||
}
|
|
@ -0,0 +1,103 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
|
||||
"github.com/prometheus/procfs"
|
||||
)
|
||||
|
||||
const (
|
||||
skMemQuantum = 4096
|
||||
)
|
||||
|
||||
type tcpMemCollector struct {
|
||||
tcpMemMetric []*metric.Data
|
||||
}
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("tcp_mem", newTCPMemCollector)
|
||||
}
|
||||
|
||||
func newTCPMemCollector() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &tcpMemCollector{
|
||||
tcpMemMetric: []*metric.Data{
|
||||
metric.NewGaugeData("usage_pages", 0, "tcp mem usage(pages)", nil),
|
||||
metric.NewGaugeData("usage_bytes", 0, "tcp mem usage(bytes)", nil),
|
||||
metric.NewGaugeData("limit_pages", 0, "tcp mem limit(pages)", nil),
|
||||
metric.NewGaugeData("usage_percent", 0, "tcp mem usage percent", nil),
|
||||
},
|
||||
},
|
||||
Flag: tracing.FlagMetric,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *tcpMemCollector) getTCPMem() (tcpMem, tcpMemBytes, tcpMemLimit float64, err error) {
|
||||
fs, err := procfs.NewDefaultFS()
|
||||
if err != nil {
|
||||
log.Infof("failed to open sysfs: %v", err)
|
||||
return -1, -1, -1, err
|
||||
}
|
||||
|
||||
values, err := fs.SysctlInts("net.ipv4.tcp_mem")
|
||||
if err != nil {
|
||||
log.Infof("error obtaining sysctl info: %v", err)
|
||||
return -1, -1, -1, err
|
||||
}
|
||||
|
||||
tcpMemLimit = float64(values[2])
|
||||
|
||||
stat4, err := fs.NetSockstat()
|
||||
if err != nil {
|
||||
log.Infof("failed to get NetSockstat: %v", err)
|
||||
return -1, -1, -1, err
|
||||
}
|
||||
|
||||
for _, p := range stat4.Protocols {
|
||||
if p.Protocol != "TCP" {
|
||||
continue
|
||||
}
|
||||
|
||||
if p.Mem == nil {
|
||||
return -1, -1, -1, fmt.Errorf("failed to read tcpmem usage")
|
||||
}
|
||||
|
||||
tcpMem = float64(*p.Mem)
|
||||
tcpMemBytes = float64(*p.Mem * skMemQuantum)
|
||||
}
|
||||
|
||||
return tcpMem, tcpMemBytes, tcpMemLimit, nil
|
||||
}
|
||||
|
||||
func (c *tcpMemCollector) Update() ([]*metric.Data, error) {
|
||||
tcpMem, tcpMemBytes, tcpMemLimit, err := c.getTCPMem()
|
||||
if err != nil {
|
||||
log.Infof("couldn't get tcpmem: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
c.tcpMemMetric[0].Value = tcpMem
|
||||
c.tcpMemMetric[1].Value = tcpMemBytes
|
||||
c.tcpMemMetric[2].Value = tcpMemLimit
|
||||
c.tcpMemMetric[3].Value = tcpMem / tcpMemLimit
|
||||
|
||||
return c.tcpMemMetric, nil
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"huatuo-bamai/internal/log"
|
||||
)
|
||||
|
||||
// xfs_util maps superblocks of XFS devices to retrieve
|
||||
// essential information from superblock.
|
||||
const (
|
||||
XFS_SB_MAGIC = 0x58465342
|
||||
XFSLABEL_MAX = 12
|
||||
)
|
||||
|
||||
// Construct the XFS superblock, hiding unused variables
|
||||
type xfsSuperBlock struct {
|
||||
SbMagicnum uint32
|
||||
SbBlocksize uint32
|
||||
_ [16]byte
|
||||
_ [7]uint64
|
||||
_ [4]uint32
|
||||
SbLogblocks uint32
|
||||
_ [6]uint16
|
||||
_ [XFSLABEL_MAX]byte
|
||||
_ [12]uint8
|
||||
_ [8]uint64
|
||||
_ [12]uint32
|
||||
_ [16]byte
|
||||
}
|
||||
|
||||
func fileLineCounter(filePath string) (int, error) {
|
||||
count := 0
|
||||
buf := make([]byte, 8*20*4096)
|
||||
|
||||
file, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
return count, err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
r := io.Reader(file)
|
||||
|
||||
for {
|
||||
c, err := r.Read(buf)
|
||||
count += bytes.Count(buf[:c], []byte("\n"))
|
||||
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return count, err
|
||||
}
|
||||
}
|
||||
|
||||
return count, nil
|
||||
}
|
||||
|
||||
// Calculate the Xlog size from superblock
|
||||
func xfsLogSize(path string) (float64, error) {
|
||||
file, err := os.Open(path)
|
||||
if err != nil {
|
||||
log.Infof("open failed: %v", err)
|
||||
return -1, err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
var sb xfsSuperBlock
|
||||
err = binary.Read(file, binary.BigEndian, &sb)
|
||||
if err != nil {
|
||||
log.Infof("read superblock failed: err%v", err)
|
||||
return -1, err
|
||||
}
|
||||
|
||||
// Check Magic Number of Super Block
|
||||
if sb.SbMagicnum != XFS_SB_MAGIC {
|
||||
log.Infof("Not a valid XFS superblock (Magic: 0x%x)", sb.SbMagicnum)
|
||||
return -1, err
|
||||
}
|
||||
|
||||
xlogBytes := float64(sb.SbLogblocks * sb.SbBlocksize)
|
||||
return xlogBytes, nil
|
||||
}
|
|
@ -0,0 +1,96 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"huatuo-bamai/internal/conf"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
)
|
||||
|
||||
type vmStatCollector struct{}
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("vmstat", newVMStatCollector)
|
||||
}
|
||||
|
||||
var vmStatMetricDesc = map[string]string{
|
||||
"allocstall_normal": "host direct reclaim count on normal zone",
|
||||
"allocstall_movable": "host direct reclaim count on movable zone",
|
||||
"compact_stall": "memory compaction count",
|
||||
"nr_active_anon": "anonymous pages on active lru",
|
||||
"nr_active_file": "file pages on active lru",
|
||||
"nr_boost_pages": "kswapd boost pages",
|
||||
"nr_dirty": "dirty pages",
|
||||
"nr_free_pages": "free pages in buddy system",
|
||||
"nr_inactive_anon": "anonymous pages on inactive lru",
|
||||
"nr_inactive_file": "file pages on inactive lru",
|
||||
"nr_kswapd_boost": "kswapd boosting count",
|
||||
"nr_mlock": "mlocked pages",
|
||||
"nr_shmem": "shared memory pages",
|
||||
"nr_slab_reclaimable": "reclaimable slab pages",
|
||||
"nr_slab_unreclaimable": "unreclaimable slab pages",
|
||||
"nr_unevictable": "unevictable pages",
|
||||
"nr_writeback": "writing-back pages",
|
||||
"numa_pages_migrated": "numa migrated pages",
|
||||
"pgdeactivate": "pages deactivated from active lru to inactive lru",
|
||||
"pgrefill": "pages scanned on active lru",
|
||||
"pgscan_direct": "scanned pages in host direct reclaim",
|
||||
"pgscan_kswapd": "scanned pages in host kswapd reclaim",
|
||||
"pgsteal_direct": "reclaimed pages in host direct reclaim",
|
||||
"pgsteal_kswapd": "reclaimed pages in host kswapd reclaim",
|
||||
}
|
||||
|
||||
func newVMStatCollector() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &vmStatCollector{},
|
||||
Flag: tracing.FlagMetric,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *vmStatCollector) Update() ([]*metric.Data, error) {
|
||||
filter := newFieldFilter(conf.Get().MetricCollector.Vmstat.ExcludedMetrics,
|
||||
conf.Get().MetricCollector.Vmstat.IncludedMetrics)
|
||||
|
||||
file, err := os.Open("/proc/vmstat")
|
||||
if err != nil {
|
||||
log.Error("Fail to open vmstat")
|
||||
return nil, err
|
||||
}
|
||||
defer file.Close()
|
||||
scanner := bufio.NewScanner(file)
|
||||
var metrics []*metric.Data
|
||||
for scanner.Scan() {
|
||||
parts := strings.Fields(scanner.Text())
|
||||
if filter.ignored(parts[0]) {
|
||||
log.Debugf("Ignoring vmstat metric: %s", parts[0])
|
||||
continue
|
||||
}
|
||||
value, err := strconv.ParseFloat(parts[1], 64)
|
||||
if err != nil {
|
||||
log.Error("Fail to strconv")
|
||||
return nil, err
|
||||
}
|
||||
metrics = append(metrics,
|
||||
metric.NewGaugeData(parts[0], value, vmStatMetricDesc[parts[0]], nil))
|
||||
}
|
||||
return metrics, nil
|
||||
}
|
|
@ -0,0 +1,136 @@
|
|||
[简体中文](./CUSTOM_CN.md) | English
|
||||
|
||||
HuaTuo framework provides three data collection modes: `autotracing`, `event`, and `metrics`, covering different monitoring scenarios, helping users gain comprehensive insights into system performance.
|
||||
|
||||
## Collection Mode Comparison
|
||||
| Mode | Type | Trigger Condition | Data Output | Use Case |
|
||||
|-----------------|----------------|-------------------|------------------|----------------|
|
||||
| **Autotracing** | Event-driven | Triggered on system anomalies | ES + Local Storage, Prometheus (optional) | Non-routine operations, triggered on anomalies |
|
||||
| **Event** | Event-driven | Continuously running, triggered on preset thresholds | ES + Local Storage, Prometheus (optional) | Continuous operations, directly dump context |
|
||||
| **Metrics** | Metric collection | Passive collection | Prometheus format | Monitoring system metrics |
|
||||
|
||||
- **Autotracing**
|
||||
- **Type**: Event-driven (tracing).
|
||||
- **Function**: Automatically tracks system anomalies and dump context when anomalies occur.
|
||||
- **Features**:
|
||||
- When a system anomaly occurs, `autotracing` is triggered automatically to dump relevant context.
|
||||
- Data is stored to ES in real-time and stored locally for subsequent analysis and troubleshooting. It can also be monitored in Prometheus format for statistics and alerts.
|
||||
- Suitable for scenarios with high performance overhead, such as triggering captures when metrics exceed a threshold or rise too quickly.
|
||||
- **Integrated Features**: CPU anomaly tracking (cpu idle), D-state tracking (dload), container contention (waitrate), memory burst allocation (memburst), disk anomaly tracking (iotracer).
|
||||
|
||||
- **Event**
|
||||
- **Type**: Event-driven (tracing).
|
||||
- **Function**: Continuously operates within the system context, directly dump context when preset thresholds are met.
|
||||
- **Features**:
|
||||
- Unlike `autotracing`, `event` continuously operates within the system context, rather than being triggered by anomalies.
|
||||
- Data is also stored to ES and locally, and can be monitored in Prometheus format.
|
||||
- Suitable for continuous monitoring and real-time analysis, enabling timely detection of abnormal behaviors. The performance impact of `event` collection is negligible.
|
||||
- **Integrated Features**: Soft interrupt anomalies (softirq), memory allocation anomalies (oom), soft lockups (softlockup), D-state processes (hungtask), memory reclamation (memreclaim), packet droped abnormal (dropwatch), network ingress latency (netrecvlat).
|
||||
|
||||
- **Metrics**
|
||||
- **Type**: Metric collection.
|
||||
- **Function**: Collects performance metrics from subsystems.
|
||||
- **Features**:
|
||||
- Metric data can be sourced from regular procfs collection or derived from `tracing` (autotracing, event) data.
|
||||
- Outputs in Prometheus format for easy integration into Prometheus monitoring systems.
|
||||
- Unlike `tracing` data, `metrics` primarily focus on system performance metrics such as CPU usage, memory usage, and network traffic, etc.
|
||||
- Suitable for monitoring system performance metrics, supporting real-time analysis and long-term trend observation.
|
||||
- **Integrated Features**: CPU (sys, usr, util, load, nr_running, etc.), memory (vmstat, memory_stat, directreclaim, asyncreclaim, etc.), IO (d2c, q2c, freeze, flush, etc.), network (arp, socket mem, qdisc, netstat, netdev, sockstat, etc.).
|
||||
|
||||
## Multiple Purpose of Tracing Mode
|
||||
Both `autotracing` and `event` belong to the **tracing** collection mode, offering the following dual purposes:
|
||||
1. **Real-time storage to ES and local storage**: For tracing and analyzing anomalies, helping users quickly identify root causes.
|
||||
2. **Output in Prometheus format**: As metric data integrated into Prometheus monitoring systems, providing comprehensive system monitoring capabilities.
|
||||
|
||||
By flexibly combining these three modes, users can comprehensively monitor system performance, capturing both contextual information during anomalies and continuous performance metrics to meet various monitoring needs.
|
||||
|
||||
# How to Add Custom Collection
|
||||
The framework provides convenient APIs, including module startup, data storage, container information, BPF-related (load, attach, read, detach, unload), etc. You can implement custom collection logic and flexibly choose the appropriate collection mode and storage method.
|
||||
|
||||
## Tracing Type
|
||||
Based on your scenarios, you can implement the `ITracingEvent` interface in the `core/autotracing` or `core/events` directory to complete tracing-type collection.
|
||||
```go
|
||||
// ITracingEvent represents a tracing/event
|
||||
type ITracingEvent interface {
|
||||
Start(ctx context.Context) error
|
||||
}
|
||||
```
|
||||
|
||||
example:
|
||||
```go
|
||||
type exampleTracing struct{}
|
||||
|
||||
// Register callback
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("example", newExample)
|
||||
}
|
||||
|
||||
// Create tracing
|
||||
func newExample() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &exampleTracing{},
|
||||
Internal: 10, // Interval for enable tracing again (in seconds)
|
||||
Flag: tracing.FlagTracing, // mark as tracing type
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Implement ITracingEvent
|
||||
func (t *exampleTracing) Start(ctx context.Context) error {
|
||||
// do something
|
||||
...
|
||||
|
||||
// Save data to ES and local file
|
||||
storage.Save("example", ccontainerID, time.Now(), tracerData)
|
||||
}
|
||||
|
||||
// Implement Collector interface for Prometheus format output (optional)
|
||||
func (c *exampleTracing) Update() ([]*metric.Data, error) {
|
||||
// from tracerData to prometheus.Metric
|
||||
...
|
||||
|
||||
return data, nil
|
||||
}
|
||||
```
|
||||
|
||||
## Metric Type
|
||||
Implement the `Collector` interface in the path `core/metrics` to complete metric-type collection.
|
||||
|
||||
```go
|
||||
type Collector interface {
|
||||
// Get new metrics and expose them via prometheus registry.
|
||||
Update() ([]*Data, error)
|
||||
}
|
||||
```
|
||||
|
||||
example:
|
||||
```go
|
||||
type exampleMetric struct{}
|
||||
|
||||
// Register callback
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("example", newExample)
|
||||
}
|
||||
|
||||
// Create Metric
|
||||
func newExample() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &filenrCollector{
|
||||
metric: []*metric.Data{
|
||||
metric.NewGaugeData("name1", 0, "description of example_name1", nil),
|
||||
metric.NewGaugeData("name2", 0, "description of example_name2", nil),
|
||||
},
|
||||
},
|
||||
Flag: tracing.FlagMetric, // mark as Metric type
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Implement Collector interface for Prometheus format output
|
||||
func (c *exampleMetric) Update() ([]*metric.Data, error) {
|
||||
// do something
|
||||
...
|
||||
|
||||
return data, nil
|
||||
}
|
||||
```
|
||||
|
||||
The path `core` of the project includes multiple useful examples of the three collection modules, covering BPF code, map data interaction, container information, and more. For further details, refer to the corresponding code implementations.
|
|
@ -0,0 +1,136 @@
|
|||
[English](./CUSTOM.md) | 简体中文
|
||||
|
||||
本框架提供三种数据采集模式:`autotracing`、`event` 和 `metrics`,分别针对不同的监控场景和需求,帮助用户全面掌握系统的运行状态。
|
||||
|
||||
## 采集模式对比
|
||||
| 模式 | 类型 | 触发条件 | 数据输出 | 适用场景 |
|
||||
|------------- |----------------|--------------|------------------|-----------------|
|
||||
| **Autotracing** | 异常事件驱动 | 系统异常时触发 | ES + 本地存储,Prometheus(可选)| 不能常态运行,异常时触发运行 |
|
||||
| **Event** | 异常事件驱动 | 常态运行 | ES + 本地存储,Prometheus(可选)| 常态运行,直接抓取上下文信息 |
|
||||
| **Metrics** | 指标数据采集 | 被动采集 | Prometheus 格式 | 监控系统性能指标 |
|
||||
|
||||
- **Autotracing**
|
||||
- **类型**:异常事件驱动(tracing)。
|
||||
- **功能**:自动跟踪系统异常状态,并在异常发生时再触发抓取现场上下文信息。
|
||||
- **特点**:
|
||||
- 当系统出现异常时,`autotracing` 会自动触发,捕获相关的上下文信息。
|
||||
- 数据会实时上报到 ES 并存储在本地,便于后续分析和排查问题,也可通过 Prometheus 格式进行监控,便于统计和告警。
|
||||
- 适用于获取现场时性能开销较大的场景,例如检测到指标上升到一定阈值、上升速度过快再触发抓取。
|
||||
- **已集成**:cpu 异常使用跟踪(cpu idle)、D状态跟踪(dload)、容器内外部争抢(waitrate)、内存突发分配(memburst)、磁盘异常跟踪(iotracer)。
|
||||
|
||||
- **Event**
|
||||
- **类型**:异常事件驱动(tracing)。
|
||||
- **功能**:常态运行在系统上下文中,达到预设阈值直接抓取上下文信息。
|
||||
- **特点**:
|
||||
- 与 `autotracing` 不同,`event` 是常态运行,而不是在异常时再触发。
|
||||
- 数据同样会实时上报到 ES 并存储在本地,也可通过 Prometheus 格式进行监控。
|
||||
- 适合用于常态监控和实时分析,能够及时发现系统中的异常行为, `event` 类型的采集对系统性能影响可忽略。
|
||||
- **已集成**:软中断异常(softirq)、内存异常分配(oom)、软锁定(softlockup)、D 状态进程(hungtask)、内存回收(memreclaim)、异常丢包(dropwatch)、网络入向延迟(netrecvlat)。
|
||||
|
||||
- **Metrics**
|
||||
- **类型**:指标数据采集。
|
||||
- **功能**:采集各子系统的性能指标数据。
|
||||
- **特点**:
|
||||
- 指标数据可以来自常规 procfs 采集,也可以从 `tracing` (autotracing,event) 类型获取数据。
|
||||
- 以 Prometheus 格式输出,便于集成到 Prometheus 监控系统中。
|
||||
- 与 `tracing` 类数据不同,`metrics` 主要用于采集系统的性能指标,如 CPU 使用率、内存使用率、网络等。
|
||||
- 适合用于监控系统的性能指标,支持实时分析和长期趋势观察。
|
||||
- **已集成**:cpu (sys, usr, util, load, nr_running...), memory(vmstat, memory_stat, directreclaim, asyncreclaim...), IO(d2c, q2c, freeze, flush...), 网络(arp, socket mem, qdisc, netstat, netdev, socketstat...)
|
||||
|
||||
## Tracing 模式的多重用途
|
||||
`autotracing` 和 `event` 都属于 **tracing** 类数据采集模式,它们具备以下双重用途:
|
||||
1. **实时保存到 ES 和 本地存储**:用于异常事件的追踪和分析,帮助用户快速根因定位。
|
||||
2. **以 Prometheus 格式输出**:作为指标数据集成到 Prometheus 监控系统中,提供更全面的系统监控能力。
|
||||
|
||||
通过这三种模式的灵活组合,用户可以全面监控系统的运行状态,既能捕获异常事件的上下文信息,也能持续采集性能指标数据,满足不同场景下的监控需求。
|
||||
|
||||
# 如何添加自定义采集
|
||||
框架提供了非常便捷的 API,包括模块启动、数据存储、容器信息、bpf 相关 (load, attach, read, detach, unload)等,用户可通过自定义的采集逻辑,灵活选择合适的采集模式和数据存储的方式。
|
||||
|
||||
## tracing 类型
|
||||
根据实际场景,你可以在 `core/autotracing` 或 `core/events` 目录下实现接口 `ITracingEvent` 即可完成 tracing 类型的采集。
|
||||
```go
|
||||
// ITracingEvent represents a tracing/event
|
||||
type ITracingEvent interface {
|
||||
Start(ctx context.Context) error
|
||||
}
|
||||
```
|
||||
|
||||
步骤如下:
|
||||
```go
|
||||
type exampleTracing struct{}
|
||||
|
||||
// 注册回调
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("example", newExample)
|
||||
}
|
||||
|
||||
// 创建 tracing
|
||||
func newExample() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &exampleTracing{},
|
||||
Internal: 10, // 再次开启 tracing 的间隔时间 seconds
|
||||
Flag: tracing.FlagTracing, // 标记为 tracing 类型
|
||||
}, nil
|
||||
}
|
||||
|
||||
// 实现接口 ITracingEvent
|
||||
func (t *exampleTracing) Start(ctx context.Context) error {
|
||||
// do something
|
||||
...
|
||||
|
||||
// 存储数据到 ES 和 本地
|
||||
storage.Save("example", ccontainerID, time.Now(), tracerData)
|
||||
}
|
||||
|
||||
// 也可同时实现接口 Collector 以 Prometheus 格式输出 (可选)
|
||||
func (c *exampleTracing) Update() ([]*metric.Data, error) {
|
||||
// from tracerData to prometheus.Metric
|
||||
...
|
||||
|
||||
return data, nil
|
||||
}
|
||||
```
|
||||
|
||||
## Metric 类型
|
||||
在 `core/metrics` 目录下添加接口 `Collector` 的实现即可完成 Metric 类型的采集。
|
||||
|
||||
```go
|
||||
type Collector interface {
|
||||
// Get new metrics and expose them via prometheus registry.
|
||||
Update() ([]*Data, error)
|
||||
}
|
||||
```
|
||||
|
||||
步骤如下:
|
||||
```go
|
||||
type exampleMetric struct{}
|
||||
|
||||
// 注册回调
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("example", newExample)
|
||||
}
|
||||
|
||||
// 创建 Metric
|
||||
func newExample() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &filenrCollector{
|
||||
metric: []*metric.Data{
|
||||
metric.NewGaugeData("name1", 0, "description of example_name1", nil),
|
||||
metric.NewGaugeData("name2", 0, "description of example_name2", nil),
|
||||
},
|
||||
},
|
||||
Flag: tracing.FlagMetric, // 标记为 Metric 类型
|
||||
}, nil
|
||||
}
|
||||
|
||||
// 实现接口 Collector 以 Prometheus 格式输出
|
||||
func (c *exampleMetric) Update() ([]*metric.Data, error) {
|
||||
// do something
|
||||
...
|
||||
|
||||
return data, nil
|
||||
}
|
||||
```
|
||||
|
||||
在项目 core 目录下已集成了 3 个采集模块的多种实际场景的示例,包括 bpf 代码、map 数据交互、容器信息等,更多详情可参考对应代码实现。
|
After Width: | Height: | Size: 630 KiB |
After Width: | Height: | Size: 629 KiB |
After Width: | Height: | Size: 883 KiB |
After Width: | Height: | Size: 107 KiB |
After Width: | Height: | Size: 322 KiB |
After Width: | Height: | Size: 174 KiB |
After Width: | Height: | Size: 1.2 MiB |
|
@ -0,0 +1,270 @@
|
|||
| Subsystem | Metric | Description | Unit | Dimension | Source |
|
||||
| --------- | ------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------- | -------------- | ------------------------------------------------------------------------------------- |
|
||||
| cpu | cpu_util_sys | Time of running kernel processes percentage of host | % | host | Calculate base on cpuacct.stat and cpuacct.usage |
|
||||
| cpu | cpu_util_usr | Time of running user processes percentage of host | % | host | Calculate base on cpuacct.stat and cpuacct.usage |
|
||||
| cpu | cpu_util_total | Total time of running percentage of host | % | host | Calculate base on cpuacct.stat and cpuacct.usage |
|
||||
| cpu | cpu_util_container_sys | Time of running kernel processes percentage of container | % | container | Calculate base on cpuacct.stat and cpuacct.usage |
|
||||
| cpu | cpu_util_container_usr | Time of running user processes percentage of container | % | container | Calculate base on cpuacct.stat and cpuacct.usage |
|
||||
| cpu | cpu_util_container_total | Total time of running percentage of container | % | container | Calculate base on cpuacct.stat and cpuacct.usage |
|
||||
| cpu | cpu_stat_container_burst_time | Cumulative wall-time (in nanoseconds) that any CPUs has used above quota in respective periods | ns | container | cpu.stat |
|
||||
| cpu | cpu_stat_container_nr_bursts | Number of periods burst occurs | count | container | cpu.stat |
|
||||
| cpu | cpu_stat_container_nr_throttled | Number of times the group has been throttled/limited | count | container | cpu.stat |
|
||||
| cpu | cpu_stat_container_exter_wait_rate | Wait rate caused by processes outside the container | % | container | Calculate base on throttled_time/hierarchy_wait_sum/inner_wait_sum read from cpu.stat |
|
||||
| cpu | cpu_stat_container_inner_wait_rate | Wait rate caused by processes inside the container | % | container | Calculate base on throttled_time/hierarchy_wait_sum/inner_wait_sum read from cpu.stat |
|
||||
| cpu | cpu_stat_container_throttle_wait_rate | Wait rate caused by throttle of container | % | container | Calculate base on throttled_time/hierarchy_wait_sum/inner_wait_sum read from cpu.stat |
|
||||
| cpu | cpu_stat_container_wait_rate | Total wait rate: exter_wait_rate + inner_wait_rate + throttle_wait_rate | % | container | Calculate base on throttled_time/hierarchy_wait_sum/inner_wait_sum read from cpu.stat |
|
||||
| cpu | loadavg_container_container_nr_running | The number of running tasks in the container | count | container | get from kernel via netlink |
|
||||
| cpu | loadavg_container_container_nr_uninterruptible | The number of uninterruptible tasks in the container | count | container | get from kernel via netlink |
|
||||
| cpu | loadavg_load1 | System load avg over the last 1 minute | count | host | proc fs |
|
||||
| cpu | loadavg_load5 | System load avg over the last 5 minute | count | host | proc fs |
|
||||
| cpu | loadavg_load15 | system load avg over the last 15 minute | count | host | proc fs |
|
||||
| cpu | monsoftirq_latency | The number of NET_RX/NET_TX irq latency happend in the following regions:<br>0~10 us<br>100us ~ 1ms<br>10us ~ 100us<br>1ms ~ inf | count | host | hook the softirq event and do time statistics via bpf |
|
||||
| cpu | runqlat_container_nlat_01 | The number of times when schedule latency of processes in the container is within 0~10ms | count | container | hook the scheduling switch event and do time statistics via bpf |
|
||||
| cpu | runqlat_container_nlat_02 | The number of times when schedule latency of processes in the container is within 10~20ms | count | container | hook the scheduling switch event and do time statistics via bpf |
|
||||
| cpu | runqlat_container_nlat_03 | The number of times when schedule latency of processes in the container is within 20~50ms | count | container | hook the scheduling switch event and do time statistics via bpf |
|
||||
| cpu | runqlat_container_nlat_04 | The number of times when schedule latency of processes in the container is more than 50ms | count | container | hook the scheduling switch event and do time statistics via bpf |
|
||||
| cpu | runqlat_g_nlat_01 | The number of times when schedule latency of processes in the host is within<br>0~10ms | count | host | hook the scheduling switch event and do time statistics via bpf |
|
||||
| cpu | runqlat_g_nlat_02 | The number of times when schedule latency of processes in the host is within 10~20ms | count | host | hook the scheduling switch event and do time statistics via bpf |
|
||||
| cpu | runqlat_g_nlat_03 | The number of times when schedule latency of processes in the host is within 20~50ms | count | host | hook the scheduling switch event and do time statistics via bpf |
|
||||
| cpu | runqlat_g_nlat_04 | The number of times when schedule latency of processes in the host is more than 50ms | count | host | hook the scheduling switch event and do time statistics via bpf |
|
||||
| cpu | reschedipi_oversell_probability | The possibility of cpu overselling exists on the host where the vm is located | 0-1 | host | hook the scheduling ipi event and do time statistics via bpf |
|
||||
| memory | buddyinfo_blocks | Kernel memory allocator information | pages | host | proc fs |
|
||||
| memory | memory_events_container_watermark_inc | Counts of memory allocation watermark increasing | count | container | memory.events |
|
||||
| memory | memory_events_container_watermark_dec | Counts of memory allocation watermark decreasing | count | container | memory.events |
|
||||
| memory | memory_others_container_local_direct_reclaim_time | Time speed in page allocation in memory cgroup | nanosecond | container | memory.local_direct_reclaim_time |
|
||||
| memory | memory_others_container_directstall_time | Memory cgroup's direct reclaim time in try_charge | nanosecond | container | memory.directstall_stat |
|
||||
| memory | memory_others_container_asyncreclaim_time | Memory cgroup's direct reclaim time in cgroup async memory reclaim | nanosecond | container | memory.asynreclaim_stat |
|
||||
| memory | priority_reclaim_kswapd | Kswapd's reclaim stat in priority reclaiming | pages | host | proc fs |
|
||||
| memory | priority_reclaim_direct | Direct reclaim stat in priority reclaiming | pages | host | proc fs |
|
||||
| memory | memory_stat_container_writeback | Bytes of file/anon cache that are queued for syncing to disk | bytes | container | memory.stat |
|
||||
| memory | memory_stat_container_unevictable | Bytes of memory that cannot be reclaimed (mlocked etc) | bytes | container | memory.stat |
|
||||
| memory | memory_stat_container_shmem | Bytes of shmem memory | bytes | container | memory.stat |
|
||||
| memory | memory_stat_container_pgsteal_kswapd | Bytes of reclaimed memory by kswapd and cswapd | bytes | container | memory.stat |
|
||||
| memory | memory_stat_container_pgsteal_globalkswapd | Bytes of reclaimed memory by kswapd | bytes | container | memory.stat |
|
||||
| memory | memory_stat_container_pgsteal_globaldirect | Bytes of reclaimed memory by direct reclaim during page allocation | bytes | container | memory.stat |
|
||||
| memory | memory_stat_container_pgsteal_direct | Bytes of reclaimed memory by direct reclaim during page allocation and try_charge | bytes | container | memory.stat |
|
||||
| memory | memory_stat_container_pgsteal_cswapd | Bytes of reclaimed memory by cswapd | bytes | container | memory.stat |
|
||||
| memory | memory_stat_container_pgscan_kswapd | Bytes of scanned memory by kswapd and cswapd | bytes | container | memory.stat |
|
||||
| memory | memory_stat_container_pgscan_globalkswapd | Bytes of scanned memory by kswapd | bytes | container | memory.stat |
|
||||
| memory | memory_stat_container_pgscan_globaldirect | Bytes of scanned memory by direct reclaim during page allocation | bytes | container | memory.stat |
|
||||
| memory | memory_stat_container_pgscan_direct | Bytes of scanned memory by direct reclaim during page allocation and try_charge | bytes | container | memory.stat |
|
||||
| memory | memory_stat_container_pgscan_cswapd | Bytes of scanned memory by cswapd | bytes | container | memory.stat |
|
||||
| memory | memory_stat_container_pgrefill | Bytes of memory that is scanned in active list | bytes | container | memory.stat |
|
||||
| memory | memory_stat_container_pgdeactivate | Bytes of memory that is deactivated into inactive list | bytes | container | memory.stat |
|
||||
| memory | memory_stat_container_inactive_file | Bytes of file-backed memory on inactive lru list. | bytes | container | memory.stat |
|
||||
| memory | memory_stat_container_inactive_anon | Bytes of anonymous and swap cache memory on inactive lru list | bytes | container | memory.stat |
|
||||
| memory | memory_stat_container_dirty | Bytes that are waiting to get written back to the disk | bytes | container | memory.stat |
|
||||
| memory | memory_stat_container_active_file | Bytes of file-backed memory on active lru list | bytes | container | memory.stat |
|
||||
| memory | memory_stat_container_active_anon | Bytes of anonymous and swap cache memory on active lru list | bytes | container | memory.stat |
|
||||
| memory | mountpoint_perm_ro | Whether mountpoint is readonly or not | bool | host | proc fs |
|
||||
| memory | vmstat_allocstall_normal | Host direct reclaim count on normal zone | count | host | /proc/vmstat |
|
||||
| memory | vmstat_allocstall_movable | Host direct reclaim count on movable zone | count | host | /proc/vmstat |
|
||||
| memory | vmstat_compact_stall | Count of memory compaction | count | host | /proc/vmstat |
|
||||
| memory | vmstat_nr_active_anon | Number of anonymous pages on active lru | pages | host | /proc/vmstat |
|
||||
| memory | vmstat_nr_active_file | Number of file-backed pages on active lru | pages | host | /proc/vmstat |
|
||||
| memory | vmstat_nr_boost_pages | Number of pages in kswapd boosting | pages | host | /proc/vmstat |
|
||||
| memory | vmstat_nr_dirty | Number of dirty pages | pages | host | /proc/vmstat |
|
||||
| memory | vmstat_nr_free_pages | Number of free pages | pages | host | /proc/vmstat |
|
||||
| memory | vmstat_nr_inactive_anon | Number of anonymous pages on inactive lru | pages | host | /proc/vmstat |
|
||||
| memory | vmstat_nr_inactive_file | Number of file-backed pages on inactive lru | pages | host | /proc/vmstat |
|
||||
| memory | vmstat_nr_kswapd_boost | Count of kswapd boosting | pages | host | /proc/vmstat |
|
||||
| memory | vmstat_nr_mlock | Number of locked pages | pages | host | /proc/vmstat |
|
||||
| memory | vmstat_nr_shmem | Number of shmem pages | pages | host | /proc/vmstat |
|
||||
| memory | vmstat_nr_slab_reclaimable | Number of relcaimable slab pages | pages | host | /proc/vmstat |
|
||||
| memory | vmstat_nr_slab_unreclaimable | Number of unrelcaimable slab pages | pages | host | /proc/vmstat |
|
||||
| memory | vmstat_nr_unevictable | Number of unevictable pages | pages | host | /proc/vmstat |
|
||||
| memory | vmstat_nr_writeback | Number of writebacking pages | pages | host | /proc/vmstat |
|
||||
| memory | vmstat_numa_pages_migrated | Number of pages in numa migrating | pages | host | /proc/vmstat |
|
||||
| memory | vmstat_pgdeactivate | Number of pages which are deactivated into inactive lru | pages | host | /proc/vmstat |
|
||||
| memory | vmstat_pgrefill | Number of pages which are scanned on active lru | pages | host | /proc/vmstat |
|
||||
| memory | vmstat_pgscan_direct | Number of pages which are scanned in direct reclaim | pages | host | /proc/vmstat |
|
||||
| memory | vmstat_pgscan_kswapd | Number of pages which are scanned in kswapd reclaim | pages | host | /proc/vmstat |
|
||||
| memory | vmstat_pgsteal_direct | Number of pages which are reclaimed in direct reclaim | pages | host | /proc/vmstat |
|
||||
| memory | vmstat_pgsteal_kswapd | Number of pages which are reclaimed in kswapd reclaim | pages | host | /proc/vmstat |
|
||||
| memory | hungtask_happened | Count of hungtask events | count | host | performance and statistics monitoring for BPF Programs |
|
||||
| memory | oom_happened | Count of oom events | count | host,container | performance and statistics monitoring for BPF Programs |
|
||||
| memory | softlockup_happened | Count of softlockup events | count | host | performance and statistics monitoring for BPF Programs |
|
||||
| memory | mmhostbpf_compactionstat | Time speed in memory compaction | nanosecond | host | performance and statistics monitoring for BPF Programs |
|
||||
| memory | mmhostbpf_allocstallstat | Time speed in memory direct reclaim on host | nanosecond | host | performance and statistics monitoring for BPF Programs |
|
||||
| memory | mmcgroupbpf_container_directstallcount | Count of cgroup's try_charge direct reclaim | count | container | performance and statistics monitoring for BPF Programs |
|
||||
| IO | iolatency_disk_d2c | Statistics of io latency when accessing the disk, including the time consumed by the driver and hardware components | count | host | performance and statistics monitoring for BPF Programs |
|
||||
| IO | iolatency_disk_q2c | Statistics of io latency for the entire io lifecycle when accessing the disk | count | host | performance and statistics monitoring for BPF Programs |
|
||||
| IO | iolatency_container_d2c | Statistics of io latency when accessing the disk, including the time consumed by the driver and hardware components | count | container | performance and statistics monitoring for BPF Programs |
|
||||
| IO | iolatency_container_q2c | Statistics of io latency for the entire io lifecycle when accessing the disk | count | container | performance and statistics monitoring for BPF Programs |
|
||||
| IO | iolatency_disk_flush | Statistics of delay for flush operations on disk raid device | count | host | performance and statistics monitoring for BPF Programs |
|
||||
| IO | iolatency_container_flush | Statistics of delay for flush operations on disk raid devices caused by containers | count | container | performance and statistics monitoring for BPF Programs |
|
||||
| IO | iolatency_disk_freeze | Statistics of disk freeze events | count | host | performance and statistics monitoring for BPF Programs |
|
||||
| network | tcp_mem_limit_pages | System TCP total memory size limit | pages | system | proc fs |
|
||||
| network | tcp_mem_usage_bytes | The total number of bytes of TCP memory used by the system | bytes | system | tcp_mem_usage_pages \* page_size |
|
||||
| network | tcp_mem_usage_pages | The total size of TCP memory used by the system | pages | system | proc fs |
|
||||
| network | tcp_mem_usage_percent | The percentage of TCP memory used by the system to the limit size | % | system | tcp_mem_usage_pages / tcp_mem_limit_pages |
|
||||
| network | arp_entries | The number of arp cache entries | count | host,container | proc fs |
|
||||
| network | arp_total | Total number of arp cache entries | count | system | proc fs |
|
||||
| network | qdisc_backlog | The number of bytes queued to be sent | bytes | host | sum of same level(parent major) for a device |
|
||||
| network | qdisc_bytes_total | The number of bytes sent | bytes | host | sum of same level(parent major) for a device |
|
||||
| network | qdisc_current_queue_length | The number of packets queued for sending | count | host | sum of same level(parent major) for a device |
|
||||
| network | qdisc_drops_total | The number of discarded packets | count | host | sum of same level(parent major) for a device |
|
||||
| network | qdisc_overlimits_total | The number of queued packets exceeds the limit | count | host | sum of same level(parent major) for a device |
|
||||
| network | qdisc_packets_total | The number of packets sent | count | host | sum of same level(parent major) for a device |
|
||||
| network | qdisc_requeues_total | The number of packets that were not sent successfully and were requeued | count | host | sum of same level(parent major) for a device |
|
||||
| network | ethtool_hardware_rx_dropped_errors | Statistics of inbound packet droped or errors of interface | count | host | related to hardware drivers, such as mlx, ixgbe, bnxt_en, etc. |
|
||||
| network | netdev_receive_bytes_total | Number of good received bytes | bytes | host,container | proc fs |
|
||||
| network | netdev_receive_compressed_total | Number of correctly received compressed packets | count | host,container | proc fs |
|
||||
| network | netdev_receive_dropped_total | Number of packets received but not processed | count | host,container | proc fs |
|
||||
| network | netdev_receive_errors_total | Total number of bad packets received on this network device | count | host,container | proc fs |
|
||||
| network | netdev_receive_fifo_total | Receiver FIFO error counter | count | host,container | proc fs |
|
||||
| network | netdev_receive_frame_total | Receiver frame alignment errors | count | host,container | proc fs |
|
||||
| network | netdev_receive_multicast_total | Multicast packets received. For hardware interfaces this statistic is commonly calculated at the device level (unlike rx_packets) and therefore may include packets which did not reach the host | count | host,container | proc fs |
|
||||
| network | netdev_receive_packets_total | Number of good packets received by the interface | count | host,container | proc fs |
|
||||
| network | netdev_transmit_bytes_total | Number of good transmitted bytes, corresponding to tx_packets | bytes | host,container | proc fs |
|
||||
| network | netdev_transmit_carrier_total | Number of frame transmission errors due to loss of carrier during transmission | count | host,container | proc fs |
|
||||
| network | netdev_transmit_colls_total | Number of collisions during packet transmissions | count | host,container | proc fs |
|
||||
| network | netdev_transmit_compressed_total | Number of transmitted compressed packets | count | host,container | proc fs |
|
||||
| network | netdev_transmit_dropped_total | Number of packets dropped on their way to transmission, e.g. due to lack of resources | count | host,container | proc fs |
|
||||
| network | netdev_transmit_errors_total | Total number of transmit problems | count | host,container | proc fs |
|
||||
| network | netdev_transmit_fifo_total | Number of frame transmission errors due to device FIFO underrun / underflow | count | host,container | proc fs |
|
||||
| network | netdev_transmit_packets_total | Number of packets successfully transmitted | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_ArpFilter | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_BusyPollRxPackets | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_DelayedACKLocked | A delayed ACK timer expires, but the TCP stack can’t send an ACK immediately due to the socket is locked by a userspace program. The TCP stack will send a pure ACK later (after the userspace program unlock the socket). When the TCP stack sends the pure ACK later, the TCP stack will also update TcpExtDelayedACKs and exit the delayed ACK mode | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_DelayedACKLost | It will be updated when the TCP stack receives a packet which has been ACKed. A Delayed ACK loss might cause this issue, but it would also be triggered by other reasons, such as a packet is duplicated in the network | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_DelayedACKs | A delayed ACK timer expires. The TCP stack will send a pure ACK packet and exit the delayed ACK mode | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_EmbryonicRsts | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_IPReversePathFilter | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_ListenDrops | When kernel receives a SYN from a client, and if the TCP accept queue is full, kernel will drop the SYN and add 1 to TcpExtListenOverflows. At the same time kernel will also add 1 to TcpExtListenDrops. When a TCP socket is in LISTEN state, and kernel need to drop a packet, kernel would always add 1 to TcpExtListenDrops. So increase TcpExtListenOverflows would let TcpExtListenDrops increasing at the same time, but TcpExtListenDrops would also increase without TcpExtListenOverflows increasing, e.g. a memory allocation fail would also let TcpExtListenDrops increase | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_ListenOverflows | When kernel receives a SYN from a client, and if the TCP accept queue is full, kernel will drop the SYN and add 1 to TcpExtListenOverflows. At the same time kernel will also add 1 to TcpExtListenDrops. When a TCP socket is in LISTEN state, and kernel need to drop a packet, kernel would always add 1 to TcpExtListenDrops. So increase TcpExtListenOverflows would let TcpExtListenDrops increasing at the same time, but TcpExtListenDrops would also increase without TcpExtListenOverflows increasing, e.g. a memory allocation fail would also let TcpExtListenDrops increase | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_LockDroppedIcmps | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_OfoPruned | The TCP stack tries to discard packet on the out of order queue | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_OutOfWindowIcmps | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_PAWSActive | Packets are dropped by PAWS in Syn-Sent status | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_PAWSEstab | Packets are dropped by PAWS in any status other than Syn-Sent | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_PFMemallocDrop | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_PruneCalled | The TCP stack tries to reclaim memory for a socket. After updates this counter, the TCP stack will try to collapse the out of order queue and the receiving queue. If the memory is still not enough, the TCP stack will try to discard packets from the out of order queue (and update the TcpExtOfoPruned counter) | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_RcvPruned | After ‘collapse’ and discard packets from the out of order queue, if the actually used memory is still larger than the max allowed memory, this counter will be updated. It means the ‘prune’ fails | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_SyncookiesFailed | The MSS decoded from the SYN cookie is invalid. When this counter is updated, the received packet won’t be treated as a SYN cookie and the TcpExtSyncookiesRecv counter won’t be updated | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_SyncookiesRecv | How many reply packets of the SYN cookies the TCP stack receives | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_SyncookiesSent | It indicates how many SYN cookies are sent | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPACKSkippedChallenge | The ACK is skipped if the ACK is a challenge ACK | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPACKSkippedFinWait2 | The ACK is skipped in Fin-Wait-2 status, the reason would be either PAWS check fails or the received sequence number is out of window | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPACKSkippedPAWS | The ACK is skipped due to PAWS (Protect Against Wrapped Sequence numbers) check fails | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPACKSkippedSeq | The sequence number is out of window and the timestamp passes the PAWS check and the TCP status is not Syn-Recv, Fin-Wait-2, and Time-Wait | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPACKSkippedSynRecv | The ACK is skipped in Syn-Recv status. The Syn-Recv status means the TCP stack receives a SYN and replies SYN+ACK | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPACKSkippedTimeWait | The ACK is skipped in Time-Wait status, the reason would be either PAWS check failed or the received sequence number is out of window | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPAbortFailed | The kernel TCP layer will send RST if the RFC2525 2.17 section is satisfied. If an internal error occurs during this process, TcpExtTCPAbortFailed will be increased | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPAbortOnClose | Number of sockets closed when the user-mode program has data in the buffer | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPAbortOnData | It means TCP layer has data in flight, but need to close the connection | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPAbortOnLinger | When a TCP connection comes into FIN_WAIT_2 state, instead of waiting for the fin packet from the other side, kernel could send a RST and delete the socket immediately | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPAbortOnMemory | When an application closes a TCP connection, kernel still need to track the connection, let it complete the TCP disconnect process | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPAbortOnTimeout | This counter will increase when any of the TCP timers expire. In such situation, kernel won’t send RST, just give up the connection | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPAckCompressed | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPAutoCorking | When sending packets, the TCP layer will try to merge small packets to a bigger one | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPBacklogDrop | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPChallengeACK | The number of challenge acks sent | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPDSACKIgnoredNoUndo | When a DSACK block is invalid, one of these two counters would be updated. Which counter will be updated depends on the undo_marker flag of the TCP socket | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPDSACKIgnoredOld | When a DSACK block is invalid, one of these two counters would be updated. Which counter will be updated depends on the undo_marker flag of the TCP socket | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPDSACKOfoRecv | The TCP stack receives a DSACK, which indicate an out of order duplicate packet is received | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPDSACKOfoSent | The TCP stack receives an out of order duplicate packet, so it sends a DSACK to the sender | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPDSACKOldSent | The TCP stack receives a duplicate packet which has been acked, so it sends a DSACK to the sender | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPDSACKRecv | The TCP stack receives a DSACK, which indicates an acknowledged duplicate packet is received | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPDSACKUndo | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPDeferAcceptDrop | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPDelivered | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPDeliveredCE | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPFastOpenActive | When the TCP stack receives an ACK packet in the SYN-SENT status, and the ACK packet acknowledges the data in the SYN packet, the TCP stack understand the TFO cookie is accepted by the other side, then it updates this counter | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPFastOpenActiveFail | Fast Open attempts (SYN/data) failed because the remote does not accept it or the attempts timed out | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPFastOpenBlackhole | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPFastOpenCookieReqd | This counter indicates how many times a client wants to request a TFO cookie | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPFastOpenListenOverflow | When the pending fast open request number is larger than fastopenq->max_qlen, the TCP stack will reject the fast open request and update this counter | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPFastOpenPassive | This counter indicates how many times the TCP stack accepts the fast open request | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPFastOpenPassiveFail | This counter indicates how many times the TCP stack rejects the fast open request. It is caused by either the TFO cookie is invalid or the TCP stack finds an error during the socket creating process | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPFastRetrans | The TCP stack wants to retransmit a packet and the congestion control state is not ‘Loss’ | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPFromZeroWindowAdv | The TCP receive window is set to no-zero value from zero | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPFullUndo | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPHPAcks | If a packet set ACK flag and has no data, it is a pure ACK packet, if kernel handles it in the fast path, TcpExtTCPHPAcks will increase 1 | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPHPHits | If a TCP packet has data (which means it is not a pure ACK packet), and this packet is handled in the fast path, TcpExtTCPHPHits will increase 1 | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPHystartDelayCwnd | The sum of CWND detected by packet delay. Dividing this value by TcpExtTCPHystartDelayDetect is the average CWND which detected by the packet delay | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPHystartDelayDetect | How many times the packet delay threshold is detected | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPHystartTrainCwnd | The sum of CWND detected by ACK train length. Dividing this value by TcpExtTCPHystartTrainDetect is the average CWND which detected by the ACK train length | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPHystartTrainDetect | How many times the ACK train length threshold is detected | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPKeepAlive | This counter indicates many keepalive packets were sent. The keepalive won’t be enabled by default. A userspace program could enable it by setting the SO_KEEPALIVE socket option | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPLossFailures | Number of connections that enter the TCP_CA_Loss phase and then undergo RTO timeout | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPLossProbeRecovery | A packet loss is detected and recovered by TLP | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPLossProbes | A TLP probe packet is sent | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPLossUndo | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPLostRetransmit | A SACK points out that a retransmission packet is lost again | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPMD5Failure | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPMD5NotFound | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPMD5Unexpected | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPMTUPFail | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPMTUPSuccess | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPMemoryPressures | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPMemoryPressuresChrono | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPMinTTLDrop | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPOFODrop | The TCP layer receives an out of order packet but doesn’t have enough memory, so drops it. Such packets won’t be counted into TcpExtTCPOFOQueue | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPOFOMerge | The received out of order packet has an overlay with the previous packet. the overlay part will be dropped. All of TcpExtTCPOFOMerge packets will also be counted into TcpExtTCPOFOQueue | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPOFOQueue | The TCP layer receives an out of order packet and has enough memory to queue it | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPOrigDataSent | Number of outgoing packets with original data (excluding retransmission but including data-in-SYN). This counter is different from TcpOutSegs because TcpOutSegs also tracks pure ACKs. TCPOrigDataSent is more useful to track the TCP retransmission rate | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPPartialUndo | Detected some erroneous retransmits, a partial ACK arrived while were fast retransmitting, so able to partially undo some of our CWND reduction | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPPureAcks | If a packet set ACK flag and has no data, it is a pure ACK packet, if kernel handles it in the fast path, TcpExtTCPHPAcks will increase 1, if kernel handles it in the slow path, TcpExtTCPPureAcks will increase 1 | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPRcvCoalesce | When packets are received by the TCP layer and are not be read by the application, the TCP layer will try to merge them. This counter indicate how many packets are merged in such situation. If GRO is enabled, lots of packets would be merged by GRO, these packets wouldn’t be counted to TcpExtTCPRcvCoalesce | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPRcvCollapsed | This counter indicates how many skbs are freed during ‘collapse’ | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPRenoFailures | Number of failures that enter the TCP_CA_Disorder phase and then undergo RTO | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPRenoRecovery | When the congestion control comes into Recovery state, if sack is used, TcpExtTCPSackRecovery increases 1, if sack is not used, TcpExtTCPRenoRecovery increases 1. These two counters mean the TCP stack begins to retransmit the lost packets | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPRenoRecoveryFail | Number of connections that enter the Recovery phase and then undergo RTO | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPRenoReorder | The reorder packet is detected by fast recovery. It would only be used if SACK is disabled | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPReqQFullDoCookies | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPReqQFullDrop | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPRetransFail | The TCP stack tries to deliver a retransmission packet to lower layers but the lower layers return an error | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPSACKDiscard | This counter indicates how many SACK blocks are invalid. If the invalid SACK block is caused by ACK recording, the TCP stack will only ignore it and won’t update this counter | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPSACKReneging | A packet was acknowledged by SACK, but the receiver has dropped this packet, so the sender needs to retransmit this packet | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPSACKReorder | The reorder packet detected by SACK | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPSYNChallenge | The number of challenge acks sent in response to SYN packets | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPSackFailures | Number of failures that enter the TCP_CA_Disorder phase and then undergo RTO | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPSackMerged | A skb is merged | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPSackRecovery | When the congestion control comes into Recovery state, if sack is used, TcpExtTCPSackRecovery increases 1, if sack is not used, TcpExtTCPRenoRecovery increases 1. These two counters mean the TCP stack begins to retransmit the lost packets | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPSackRecoveryFail | When the congestion control comes into Recovery state, if sack is used, TcpExtTCPSackRecovery increases 1 | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPSackShiftFallback | A skb should be shifted or merged, but the TCP stack doesn’t do it for some reasons | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPSackShifted | A skb is shifted | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPSlowStartRetrans | The TCP stack wants to retransmit a packet and the congestion control state is ‘Loss’ | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPSpuriousRTOs | The spurious retransmission timeout detected by the F-RTO algorithm | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPSpuriousRtxHostQueues | When the TCP stack wants to retransmit a packet, and finds that packet is not lost in the network, but the packet is not sent yet, the TCP stack would give up the retransmission and update this counter. It might happen if a packet stays too long time in a qdisc or driver queue | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPSynRetrans | Number of SYN and SYN/ACK retransmits to break down retransmissions into SYN, fast-retransmits, timeout retransmits, etc | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPTSReorder | The reorder packet is detected when a hole is filled | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPTimeWaitOverflow | Number of TIME_WAIT sockets unable to be allocated due to limit exceeding | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPTimeouts | TCP timeout events | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPToZeroWindowAdv | The TCP receive window is set to zero from a no-zero value | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPWantZeroWindowAdv | Depending on current memory usage, the TCP stack tries to set receive window to zero. But the receive window might still be a no-zero value | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPWinProbe | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPWqueueTooBig | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TW | TCP sockets finished time wait in fast timer | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TWKilled | TCP sockets finished time wait in slow timer | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TWRecycled | Time wait sockets recycled by time stamp | count | host,container | proc fs |
|
||||
| network | netstat_Tcp_ActiveOpens | It means the TCP layer sends a SYN, and come into the SYN-SENT state. Every time TcpActiveOpens increases 1, TcpOutSegs should always increase 1 | count | host,container | proc fs |
|
||||
| network | netstat_Tcp_AttemptFails | The number of times TCP connections have made a direct transition to the CLOSED state from either the SYN-SENT state or the SYN-RCVD state, plus the number of times TCP connections have made a direct transition to the LISTEN state from the SYN-RCVD state | count | host,container | proc fs |
|
||||
| network | netstat_Tcp_CurrEstab | The number of TCP connections for which the current state is either ESTABLISHED or CLOSE-WAIT | count | host,container | proc fs |
|
||||
| network | netstat_Tcp_EstabResets | The number of times TCP connections have made a direct transition to the CLOSED state from either the ESTABLISHED state or the CLOSE-WAIT state | count | host,container | proc fs |
|
||||
| network | netstat_Tcp_InCsumErrors | Incremented when a TCP checksum failure is detected | count | host,container | proc fs |
|
||||
| network | netstat_Tcp_InErrs | The total number of segments received in error (e.g., bad TCP checksums) | count | host,container | proc fs |
|
||||
| network | netstat_Tcp_InSegs | The number of packets received by the TCP layer. As mentioned in RFC1213, it includes the packets received in error, such as checksum error, invalid TCP header and so on | count | host,container | proc fs |
|
||||
| network | netstat_Tcp_MaxConn | The limit on the total number of TCP connections the entity can support. In entities where the maximum number of connections is dynamic, this object should contain the value -1 | count | host,container | proc fs |
|
||||
| network | netstat_Tcp_OutRsts | The number of TCP segments sent containing the RST flag | count | host,container | proc fs |
|
||||
| network | netstat_Tcp_OutSegs | The total number of segments sent, including those on current connections but excluding those containing only retransmitted octets | count | host,container | proc fs |
|
||||
| network | netstat_Tcp_PassiveOpens | The number of times TCP connections have made a direct transition to the SYN-RCVD state from the LISTEN state | count | host,container | proc fs |
|
||||
| network | netstat_Tcp_RetransSegs | The total number of segments retransmitted - that is, the number of TCP segments transmitted containing one or more previously transmitted octets | count | host,container | proc fs |
|
||||
| network | netstat_Tcp_RtoAlgorithm | The algorithm used to determine the timeout value used for retransmitting unacknowledged octets | count | host,container | proc fs |
|
||||
| network | netstat_Tcp_RtoMax | The maximum value permitted by a TCP implementation for the retransmission timeout, measured in milliseconds. More refined semantics for objects of this type depend upon the algorithm used to determine the retransmission timeout | count | host,container | proc fs |
|
||||
| network | netstat_Tcp_RtoMin | The minimum value permitted by a TCP implementation for the retransmission timeout, measured in milliseconds. More refined semantics for objects of this type depend upon the algorithm used to determine the retransmission timeout | count | host,container | proc fs |
|
||||
| network | sockstat_FRAG_inuse | \- | count | host,container | proc fs |
|
||||
| network | sockstat_FRAG_memory | \- | pages | host,container | proc fs |
|
||||
| network | sockstat_RAW_inuse | Number of RAW socket used | count | host,container | proc fs |
|
||||
| network | sockstat_TCP_alloc | The number of TCP sockets that have been allocated | count | host,container | proc fs |
|
||||
| network | sockstat_TCP_inuse | Established TCP socket number | count | host,container | proc fs |
|
||||
| network | sockstat_TCP_mem | The total size of TCP memory used by the system | pages | system | proc fs |
|
||||
| network | sockstat_TCP_mem_bytes | The total size of TCP memory used by the system | bytes | system | sockstat_TCP_mem \* page_size |
|
||||
| network | sockstat_TCP_orphan | Number of TCP connections waiting to be closed | count | host,container | proc fs |
|
||||
| network | sockstat_TCP_tw | Number of TCP sockets to be terminated | count | host,container | proc fs |
|
||||
| network | sockstat_UDPLITE_inuse | \- | count | host,container | proc fs |
|
||||
| network | sockstat_UDP_inuse | Number of UDP socket used | count | host,container | proc fs |
|
||||
| network | sockstat_UDP_mem | The total size of udp memory used by the system | pages | system | proc fs |
|
||||
| network | sockstat_UDP_mem_bytes | The total number of bytes of udp memory used by the system | bytes | system | sockstat_UDP_mem \* page_size |
|
||||
| network | sockstat_sockets_used | The number of sockets used by the system | count | system | proc fs |
|
|
@ -0,0 +1,193 @@
|
|||
module huatuo-bamai
|
||||
|
||||
go 1.22.4
|
||||
|
||||
require (
|
||||
git.xiaojukeji.com/kernel/huatuo v1.3.0
|
||||
github.com/cilium/ebpf v0.16.0
|
||||
github.com/containerd/cgroups/v3 v3.0.3
|
||||
github.com/deckarep/golang-set v1.8.0
|
||||
github.com/docker/docker v27.2.0+incompatible
|
||||
github.com/elastic/go-elasticsearch/v7 v7.17.10
|
||||
github.com/ema/qdisc v1.0.0
|
||||
github.com/gin-contrib/pprof v1.5.1
|
||||
github.com/gin-gonic/gin v1.10.0
|
||||
github.com/go-playground/validator/v10 v10.22.1
|
||||
github.com/google/cadvisor v0.50.0
|
||||
github.com/gopacket/gopacket v1.2.0
|
||||
github.com/grafana/grafana-plugin-sdk-go v0.251.0
|
||||
github.com/grafana/pyroscope v1.7.1
|
||||
github.com/grafana/pyroscope/api v0.4.0
|
||||
github.com/jsimonetti/rtnetlink v1.4.2
|
||||
github.com/mdlayher/netlink v1.7.2
|
||||
github.com/opencontainers/runtime-spec v1.2.0
|
||||
github.com/pelletier/go-toml v1.9.5
|
||||
github.com/pkg/errors v0.9.1
|
||||
github.com/prometheus/client_golang v1.20.3
|
||||
github.com/prometheus/procfs v0.15.1
|
||||
github.com/safchain/ethtool v0.4.1
|
||||
github.com/shirou/gopsutil v2.21.11+incompatible
|
||||
github.com/sirupsen/logrus v1.9.3
|
||||
github.com/tidwall/gjson v1.14.2
|
||||
github.com/urfave/cli/v2 v2.27.4
|
||||
github.com/vishvananda/netlink v1.3.0
|
||||
github.com/vishvananda/netns v0.0.4
|
||||
golang.org/x/net v0.31.0
|
||||
golang.org/x/sys v0.27.0
|
||||
golang.org/x/time v0.6.0
|
||||
gopkg.in/natefinch/lumberjack.v2 v2.2.1
|
||||
k8s.io/api v0.31.3
|
||||
k8s.io/cri-client v0.31.3
|
||||
)
|
||||
|
||||
require (
|
||||
connectrpc.com/connect v1.16.2 // indirect
|
||||
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect
|
||||
github.com/Microsoft/go-winio v0.6.2 // indirect
|
||||
github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9 // indirect
|
||||
github.com/apache/arrow/go/v15 v15.0.2 // indirect
|
||||
github.com/armon/go-metrics v0.4.1 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/blang/semver/v4 v4.0.0 // indirect
|
||||
github.com/bytedance/sonic v1.12.4 // indirect
|
||||
github.com/bytedance/sonic/loader v0.2.1 // indirect
|
||||
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||
github.com/cheekybits/genny v1.0.0 // indirect
|
||||
github.com/cloudwego/base64x v0.1.4 // indirect
|
||||
github.com/cloudwego/iasm v0.2.0 // indirect
|
||||
github.com/coreos/go-semver v0.3.0 // indirect
|
||||
github.com/coreos/go-systemd/v22 v22.5.0 // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
|
||||
github.com/dennwc/varint v1.0.0 // indirect
|
||||
github.com/distribution/reference v0.6.0 // indirect
|
||||
github.com/docker/go-connections v0.5.0 // indirect
|
||||
github.com/docker/go-units v0.5.0 // indirect
|
||||
github.com/dustin/go-humanize v1.0.1 // indirect
|
||||
github.com/fatih/color v1.15.0 // indirect
|
||||
github.com/felixge/httpsnoop v1.0.4 // indirect
|
||||
github.com/fxamacker/cbor/v2 v2.7.0 // indirect
|
||||
github.com/gabriel-vasile/mimetype v1.4.6 // indirect
|
||||
github.com/gin-contrib/sse v0.1.0 // indirect
|
||||
github.com/go-kit/log v0.2.1 // indirect
|
||||
github.com/go-logfmt/logfmt v0.6.0 // indirect
|
||||
github.com/go-logr/logr v1.4.2 // indirect
|
||||
github.com/go-logr/stdr v1.2.2 // indirect
|
||||
github.com/go-ole/go-ole v1.2.6 // indirect
|
||||
github.com/go-playground/locales v0.14.1 // indirect
|
||||
github.com/go-playground/universal-translator v0.18.1 // indirect
|
||||
github.com/goccy/go-json v0.10.3 // indirect
|
||||
github.com/godbus/dbus/v5 v5.0.6 // indirect
|
||||
github.com/gogo/googleapis v1.4.1 // indirect
|
||||
github.com/gogo/protobuf v1.3.2 // indirect
|
||||
github.com/gogo/status v1.1.1 // indirect
|
||||
github.com/golang/protobuf v1.5.4 // indirect
|
||||
github.com/golang/snappy v0.0.4 // indirect
|
||||
github.com/google/btree v1.1.2 // indirect
|
||||
github.com/google/flatbuffers v23.5.26+incompatible // indirect
|
||||
github.com/google/go-cmp v0.6.0 // indirect
|
||||
github.com/google/gofuzz v1.2.0 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/gorilla/mux v1.8.1 // indirect
|
||||
github.com/grafana/dskit v0.0.0-20231221015914-de83901bf4d6 // indirect
|
||||
github.com/grafana/regexp v0.0.0-20221123153739-15dc172cd2db // indirect
|
||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect
|
||||
github.com/hashicorp/consul/api v1.28.2 // indirect
|
||||
github.com/hashicorp/errwrap v1.1.0 // indirect
|
||||
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
|
||||
github.com/hashicorp/go-hclog v1.6.3 // indirect
|
||||
github.com/hashicorp/go-immutable-radix v1.3.1 // indirect
|
||||
github.com/hashicorp/go-msgpack v1.1.5 // indirect
|
||||
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
||||
github.com/hashicorp/go-rootcerts v1.0.2 // indirect
|
||||
github.com/hashicorp/go-sockaddr v1.0.6 // indirect
|
||||
github.com/hashicorp/golang-lru v0.6.0 // indirect
|
||||
github.com/hashicorp/memberlist v0.5.0 // indirect
|
||||
github.com/hashicorp/serf v0.10.1 // indirect
|
||||
github.com/josharian/native v1.1.0 // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/klauspost/compress v1.17.9 // indirect
|
||||
github.com/klauspost/cpuid/v2 v2.2.9 // indirect
|
||||
github.com/leodido/go-urn v1.4.0 // indirect
|
||||
github.com/mattetti/filebuffer v1.0.1 // indirect
|
||||
github.com/mattn/go-colorable v0.1.13 // indirect
|
||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||
github.com/mattn/go-runewidth v0.0.14 // indirect
|
||||
github.com/mdlayher/socket v0.4.1 // indirect
|
||||
github.com/miekg/dns v1.1.58 // indirect
|
||||
github.com/mitchellh/go-homedir v1.1.0 // indirect
|
||||
github.com/mitchellh/mapstructure v1.5.0 // indirect
|
||||
github.com/moby/docker-image-spec v1.3.1 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||
github.com/olekukonko/tablewriter v0.0.5 // indirect
|
||||
github.com/opencontainers/go-digest v1.0.0 // indirect
|
||||
github.com/opencontainers/image-spec v1.1.0 // indirect
|
||||
github.com/opentracing-contrib/go-stdlib v1.0.0 // indirect
|
||||
github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b // indirect
|
||||
github.com/pelletier/go-toml/v2 v2.2.3 // indirect
|
||||
github.com/pierrec/lz4/v4 v4.1.18 // indirect
|
||||
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
|
||||
github.com/prometheus/client_model v0.6.1 // indirect
|
||||
github.com/prometheus/common v0.55.0 // indirect
|
||||
github.com/prometheus/prometheus v0.51.2 // indirect
|
||||
github.com/rivo/uniseg v0.4.3 // indirect
|
||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||
github.com/samber/lo v1.38.1 // indirect
|
||||
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 // indirect
|
||||
github.com/spf13/pflag v1.0.5 // indirect
|
||||
github.com/tidwall/match v1.1.1 // indirect
|
||||
github.com/tidwall/pretty v1.2.0 // indirect
|
||||
github.com/tklauser/go-sysconf v0.3.12 // indirect
|
||||
github.com/tklauser/numcpus v0.6.1 // indirect
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
||||
github.com/uber/jaeger-client-go v2.30.0+incompatible // indirect
|
||||
github.com/uber/jaeger-lib v2.4.1+incompatible // indirect
|
||||
github.com/ugorji/go/codec v1.2.12 // indirect
|
||||
github.com/valyala/bytebufferpool v1.0.0 // indirect
|
||||
github.com/x448/float16 v0.8.4 // indirect
|
||||
github.com/xlab/treeprint v1.2.0 // indirect
|
||||
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
|
||||
github.com/yusufpapurcu/wmi v1.2.4 // indirect
|
||||
github.com/zeebo/xxh3 v1.0.2 // indirect
|
||||
go.etcd.io/etcd/api/v3 v3.5.7 // indirect
|
||||
go.etcd.io/etcd/client/pkg/v3 v3.5.7 // indirect
|
||||
go.etcd.io/etcd/client/v3 v3.5.7 // indirect
|
||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.53.0 // indirect
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 // indirect
|
||||
go.opentelemetry.io/otel v1.29.0 // indirect
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0 // indirect
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.28.0 // indirect
|
||||
go.opentelemetry.io/otel/metric v1.29.0 // indirect
|
||||
go.opentelemetry.io/otel/sdk v1.29.0 // indirect
|
||||
go.opentelemetry.io/otel/trace v1.29.0 // indirect
|
||||
go.opentelemetry.io/proto/otlp v1.3.1 // indirect
|
||||
go.uber.org/atomic v1.11.0 // indirect
|
||||
go.uber.org/multierr v1.11.0 // indirect
|
||||
go.uber.org/zap v1.26.0 // indirect
|
||||
golang.org/x/arch v0.12.0 // indirect
|
||||
golang.org/x/crypto v0.29.0 // indirect
|
||||
golang.org/x/exp v0.0.0-20240823005443-9b4947da3948 // indirect
|
||||
golang.org/x/mod v0.20.0 // indirect
|
||||
golang.org/x/oauth2 v0.23.0 // indirect
|
||||
golang.org/x/sync v0.9.0 // indirect
|
||||
golang.org/x/text v0.20.0 // indirect
|
||||
golang.org/x/tools v0.24.0 // indirect
|
||||
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20240822170219-fc7c04adadcd // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240814211410-ddb44dafa142 // indirect
|
||||
google.golang.org/grpc v1.66.0 // indirect
|
||||
google.golang.org/protobuf v1.35.2 // indirect
|
||||
gopkg.in/inf.v0 v0.9.1 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
k8s.io/apimachinery v0.31.3 // indirect
|
||||
k8s.io/client-go v0.31.3 // indirect
|
||||
k8s.io/component-base v0.31.3 // indirect
|
||||
k8s.io/cri-api v0.31.3 // indirect
|
||||
k8s.io/klog/v2 v2.130.1 // indirect
|
||||
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect
|
||||
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
|
||||
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
|
||||
)
|
|
@ -0,0 +1,691 @@
|
|||
connectrpc.com/connect v1.16.2 h1:ybd6y+ls7GOlb7Bh5C8+ghA6SvCBajHwxssO2CGFjqE=
|
||||
connectrpc.com/connect v1.16.2/go.mod h1:n2kgwskMHXC+lVqb18wngEpF95ldBHXjZYJussz5FRc=
|
||||
git.xiaojukeji.com/kernel/huatuo v1.3.0 h1:dCtjHnQg+2b2SEhXi3AuEWbdH3sC0j70xaDCvuOdFGs=
|
||||
git.xiaojukeji.com/kernel/huatuo v1.3.0/go.mod h1:oMnjctv7Dp754Vz1cZm5/k/8Eke0I2DijQNzBul3bTc=
|
||||
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0=
|
||||
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
|
||||
github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8=
|
||||
github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
|
||||
github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=
|
||||
github.com/HdrHistogram/hdrhistogram-go v1.1.2 h1:5IcZpTvzydCQeHzK4Ef/D5rrSqwxob0t8PQPMybUNFM=
|
||||
github.com/HdrHistogram/hdrhistogram-go v1.1.2/go.mod h1:yDgFjdqOqDEKOvasDdhWNXYg9BVp4O+o5f6V/ehm6Oo=
|
||||
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
|
||||
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
|
||||
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
||||
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
||||
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
||||
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
||||
github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9 h1:ez/4by2iGztzR4L0zgAOR8lTQK9VlyBVVd7G4omaOQs=
|
||||
github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9/go.mod h1:OMCwj8VM1Kc9e19TLln2VL61YJF0x1XFtfdL4JdbSyE=
|
||||
github.com/apache/arrow/go/v15 v15.0.2 h1:60IliRbiyTWCWjERBCkO1W4Qun9svcYoZrSLcyOsMLE=
|
||||
github.com/apache/arrow/go/v15 v15.0.2/go.mod h1:DGXsR3ajT524njufqf95822i+KTh+yea1jass9YXgjA=
|
||||
github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
|
||||
github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY=
|
||||
github.com/armon/go-metrics v0.4.1 h1:hR91U9KYmb6bLBYLQjyM+3j+rcd/UhE+G78SFnF8gJA=
|
||||
github.com/armon/go-metrics v0.4.1/go.mod h1:E6amYzXo6aW1tqzoZGT755KkbgrJsSdpwZ+3JqfkOG4=
|
||||
github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
|
||||
github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
|
||||
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
|
||||
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
|
||||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
|
||||
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
|
||||
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
|
||||
github.com/bytedance/sonic v1.12.4 h1:9Csb3c9ZJhfUWeMtpCDCq6BUoH5ogfDFLUgQ/jG+R0k=
|
||||
github.com/bytedance/sonic v1.12.4/go.mod h1:B8Gt/XvtZ3Fqj+iSKMypzymZxw/FVwgIGKzMzT9r/rk=
|
||||
github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
|
||||
github.com/bytedance/sonic/loader v0.2.1 h1:1GgorWTqf12TA8mma4DDSbaQigE2wOgQo7iCjjJv3+E=
|
||||
github.com/bytedance/sonic/loader v0.2.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
|
||||
github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
|
||||
github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
|
||||
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
||||
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/cheekybits/genny v1.0.0 h1:uGGa4nei+j20rOSeDeP5Of12XVm7TGUd4dJA9RDitfE=
|
||||
github.com/cheekybits/genny v1.0.0/go.mod h1:+tQajlRqAUrPI7DOSpB0XAqZYtQakVtB7wXkRAgjxjQ=
|
||||
github.com/chromedp/cdproto v0.0.0-20220208224320-6efb837e6bc2 h1:XCdvHbz3LhewBHN7+mQPx0sg/Hxil/1USnBmxkjHcmY=
|
||||
github.com/chromedp/cdproto v0.0.0-20220208224320-6efb837e6bc2/go.mod h1:At5TxYYdxkbQL0TSefRjhLE3Q0lgvqKKMSFUglJ7i1U=
|
||||
github.com/cilium/ebpf v0.16.0 h1:+BiEnHL6Z7lXnlGUsXQPPAE7+kenAd4ES8MQ5min0Ok=
|
||||
github.com/cilium/ebpf v0.16.0/go.mod h1:L7u2Blt2jMM/vLAVgjxluxtBKlz3/GWjB0dMOEngfwE=
|
||||
github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag=
|
||||
github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I=
|
||||
github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
|
||||
github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
|
||||
github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
|
||||
github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
|
||||
github.com/containerd/cgroups/v3 v3.0.3 h1:S5ByHZ/h9PMe5IOQoN7E+nMc2UcLEM/V48DGDJ9kip0=
|
||||
github.com/containerd/cgroups/v3 v3.0.3/go.mod h1:8HBe7V3aWGLFPd/k03swSIsGjZhHI2WzJmticMgVuz0=
|
||||
github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
|
||||
github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=
|
||||
github.com/coreos/go-semver v0.3.0 h1:wkHLiw0WNATZnSG7epLsujiMCgPAc9xhjJ4tgnAxmfM=
|
||||
github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
|
||||
github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs=
|
||||
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
|
||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/deckarep/golang-set v1.8.0 h1:sk9/l/KqpunDwP7pSjUg0keiOOLEnOBHzykLrsPppp4=
|
||||
github.com/deckarep/golang-set v1.8.0/go.mod h1:5nI87KwE7wgsBU1F4GKAw2Qod7p5kyS383rP6+o6qqo=
|
||||
github.com/dennwc/varint v1.0.0 h1:kGNFFSSw8ToIy3obO/kKr8U9GZYUAxQEVuix4zfDWzE=
|
||||
github.com/dennwc/varint v1.0.0/go.mod h1:hnItb35rvZvJrbTALZtY/iQfDs48JKRG1RPpgziApxA=
|
||||
github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
|
||||
github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
|
||||
github.com/docker/docker v27.2.0+incompatible h1:Rk9nIVdfH3+Vz4cyI/uhbINhEZ/oLmc+CBXmH6fbNk4=
|
||||
github.com/docker/docker v27.2.0+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
|
||||
github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c=
|
||||
github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc=
|
||||
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
|
||||
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
|
||||
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
|
||||
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
|
||||
github.com/elastic/go-elasticsearch/v7 v7.17.10 h1:TCQ8i4PmIJuBunvBS6bwT2ybzVFxxUhhltAs3Gyu1yo=
|
||||
github.com/elastic/go-elasticsearch/v7 v7.17.10/go.mod h1:OJ4wdbtDNk5g503kvlHLyErCgQwwzmDtaFC4XyOxXA4=
|
||||
github.com/elazarl/goproxy v0.0.0-20230731152917-f99041a5c027 h1:1L0aalTpPz7YlMxETKpmQoWMBkeiuorElZIXoNmgiPE=
|
||||
github.com/elazarl/goproxy v0.0.0-20230731152917-f99041a5c027/go.mod h1:Ro8st/ElPeALwNFlcTpWmkr6IoMFfkjXAvTHpevnDsM=
|
||||
github.com/ema/qdisc v1.0.0 h1:EHLG08FVRbWLg8uRICa3xzC9Zm0m7HyMHfXobWFnXYg=
|
||||
github.com/ema/qdisc v1.0.0/go.mod h1:FhIc0fLYi7f+lK5maMsesDqwYojIOh3VfRs8EVd5YJQ=
|
||||
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
|
||||
github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU=
|
||||
github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk=
|
||||
github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs=
|
||||
github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw=
|
||||
github.com/felixge/fgprof v0.9.4-0.20221116204635-ececf7638e93 h1:S8ZdFFDRXUKs3fHpMDPVh9oWd46hKqEEt/X3oxhtF5Q=
|
||||
github.com/felixge/fgprof v0.9.4-0.20221116204635-ececf7638e93/go.mod h1:RdbpDgzqYVh/T9fPELJyV7EYJuHB55UTEULNun8eiPw=
|
||||
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
|
||||
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
|
||||
github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E=
|
||||
github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ=
|
||||
github.com/gabriel-vasile/mimetype v1.4.6 h1:3+PzJTKLkvgjeTbts6msPJt4DixhT4YtFNf1gtGe3zc=
|
||||
github.com/gabriel-vasile/mimetype v1.4.6/go.mod h1:JX1qVKqZd40hUPpAfiNTe0Sne7hdfKSbOqqmkq8GCXc=
|
||||
github.com/getkin/kin-openapi v0.124.0 h1:VSFNMB9C9rTKBnQ/fpyDU8ytMTr4dWI9QovSKj9kz/M=
|
||||
github.com/getkin/kin-openapi v0.124.0/go.mod h1:wb1aSZA/iWmorQP9KTAS/phLj/t17B5jT7+fS8ed9NM=
|
||||
github.com/gin-contrib/pprof v1.5.1 h1:Mzy+3HHtHbtwr4VewBTXZp/hR7pS6ZuZkueBIrQiLL4=
|
||||
github.com/gin-contrib/pprof v1.5.1/go.mod h1:uwzoF6FxdzJJGyMdcZB+VSuVjOBe1kSH+KMIvKGwvCQ=
|
||||
github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
|
||||
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
|
||||
github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
|
||||
github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
|
||||
github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
|
||||
github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
|
||||
github.com/go-kit/log v0.2.1 h1:MRVx0/zhvdseW+Gza6N9rVzU/IVzaeE1SFI4raAhmBU=
|
||||
github.com/go-kit/log v0.2.1/go.mod h1:NwTd00d/i8cPZ3xOwwiv2PO5MOcx78fFErGNcVmBjv0=
|
||||
github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
|
||||
github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
|
||||
github.com/go-logfmt/logfmt v0.6.0 h1:wGYYu3uicYdqXVgoYbvnkrPVXkuLM1p1ifugDMEdRi4=
|
||||
github.com/go-logfmt/logfmt v0.6.0/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs=
|
||||
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
|
||||
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
|
||||
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
|
||||
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
|
||||
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
|
||||
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
|
||||
github.com/go-openapi/jsonpointer v0.20.2 h1:mQc3nmndL8ZBzStEo3JYF8wzmeWffDH4VbXz58sAx6Q=
|
||||
github.com/go-openapi/jsonpointer v0.20.2/go.mod h1:bHen+N0u1KEO3YlmqOjTT9Adn1RfD91Ar825/PuiRVs=
|
||||
github.com/go-openapi/swag v0.22.9 h1:XX2DssF+mQKM2DHsbgZK74y/zj4mo9I99+89xUmuZCE=
|
||||
github.com/go-openapi/swag v0.22.9/go.mod h1:3/OXnFfnMAwBD099SwYRk7GD3xOrr1iL7d/XNLXVVwE=
|
||||
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
|
||||
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
|
||||
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
|
||||
github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
|
||||
github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
|
||||
github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
|
||||
github.com/go-playground/validator/v10 v10.22.1 h1:40JcKH+bBNGFczGuoBYgX4I6m/i27HYW8P9FDk5PbgA=
|
||||
github.com/go-playground/validator/v10 v10.22.1/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
|
||||
github.com/go-quicktest/qt v1.101.0 h1:O1K29Txy5P2OK0dGo59b7b0LR6wKfIhttaAhHUyn7eI=
|
||||
github.com/go-quicktest/qt v1.101.0/go.mod h1:14Bz/f7NwaXPtdYEgzsx46kqSxVwTbzVZsDC26tQJow=
|
||||
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
|
||||
github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
|
||||
github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
|
||||
github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA=
|
||||
github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
|
||||
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||
github.com/godbus/dbus/v5 v5.0.6 h1:mkgN1ofwASrYnJ5W6U/BxG15eXXXjirgZc7CLqkcaro=
|
||||
github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||
github.com/gogo/googleapis v0.0.0-20180223154316-0cd9801be74a/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s=
|
||||
github.com/gogo/googleapis v1.4.1 h1:1Yx4Myt7BxzvUr5ldGSbwYiZG6t9wGBZ+8/fX3Wvtq0=
|
||||
github.com/gogo/googleapis v1.4.1/go.mod h1:2lpHqI5OcWCtVElxXnPt+s8oJvMpySlOyM6xDCrzib4=
|
||||
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
|
||||
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
|
||||
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
|
||||
github.com/gogo/status v1.1.1 h1:DuHXlSFHNKqTQ+/ACf5Vs6r4X/dH2EgIzR9Vr+H65kg=
|
||||
github.com/gogo/status v1.1.1/go.mod h1:jpG3dM5QPcqu19Hg8lkUhBFBa3TcLs1DG7+2Jqci7oU=
|
||||
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
|
||||
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
|
||||
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
|
||||
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
|
||||
github.com/google/btree v1.1.2 h1:xf4v41cLI2Z6FxbKm+8Bu+m8ifhj15JuZ9sa0jZCMUU=
|
||||
github.com/google/btree v1.1.2/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4=
|
||||
github.com/google/cadvisor v0.50.0 h1:7w/hKIbJKBWqQsRTy+Hpj2vj+fnxrLXcEXFy+LW0Bsg=
|
||||
github.com/google/cadvisor v0.50.0/go.mod h1:VxCDwZalpFyENvmfabFqaIGsqNKLtDzE62a19rfVTB8=
|
||||
github.com/google/flatbuffers v23.5.26+incompatible h1:M9dgRyhJemaM4Sw8+66GHBu8ioaQmyPLg1b8VwK5WJg=
|
||||
github.com/google/flatbuffers v23.5.26+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
|
||||
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
||||
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
|
||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||
github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
|
||||
github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||
github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af h1:kmjWCqn2qkEml422C2Rrd27c3VGxi6a/6HNq8QmHRKM=
|
||||
github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/gopacket/gopacket v1.2.0 h1:eXbzFad7f73P1n2EJHQlsKuvIMJjVXK5tXoSca78I3A=
|
||||
github.com/gopacket/gopacket v1.2.0/go.mod h1:BrAKEy5EOGQ76LSqh7DMAr7z0NNPdczWm2GxCG7+I8M=
|
||||
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
|
||||
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
|
||||
github.com/grafana/dskit v0.0.0-20231221015914-de83901bf4d6 h1:Z78JZ7pa6InQ5BcMB27M+NMTZ7LV+MXgOd3dZPfEdG4=
|
||||
github.com/grafana/dskit v0.0.0-20231221015914-de83901bf4d6/go.mod h1:kkWM4WUV230bNG3urVRWPBnSJHs64y/0RmWjftnnn0c=
|
||||
github.com/grafana/grafana-plugin-sdk-go v0.251.0 h1:gnOtxrC/1rqFvpSbQYyoZqkr47oWDlz4Q2L6Ozmsi3w=
|
||||
github.com/grafana/grafana-plugin-sdk-go v0.251.0/go.mod h1:gCGN9kHY3KeX4qyni3+Kead38Q+85pYOrsDcxZp6AIk=
|
||||
github.com/grafana/otel-profiling-go v0.5.1 h1:stVPKAFZSa7eGiqbYuG25VcqYksR6iWvF3YH66t4qL8=
|
||||
github.com/grafana/otel-profiling-go v0.5.1/go.mod h1:ftN/t5A/4gQI19/8MoWurBEtC6gFw8Dns1sJZ9W4Tls=
|
||||
github.com/grafana/pyroscope v1.7.1 h1:aGXOVNwUGXK3dNSpc40/IOtOG/ACvaS2C/mJ7jUxMFg=
|
||||
github.com/grafana/pyroscope v1.7.1/go.mod h1:RuSiNg8N9iufpHbScIFU4kU4LbWHaU7G1knyVDw/V5s=
|
||||
github.com/grafana/pyroscope-go v1.0.3 h1:8WWmItzLfg4m8G+j//ElSjMeMr88Y6Lvblar6qeTyKk=
|
||||
github.com/grafana/pyroscope-go/godeltaprof v0.1.8 h1:iwOtYXeeVSAeYefJNaxDytgjKtUuKQbJqgAIjlnicKg=
|
||||
github.com/grafana/pyroscope-go/godeltaprof v0.1.8/go.mod h1:2+l7K7twW49Ct4wFluZD3tZ6e0SjanjcUUBPVD/UuGU=
|
||||
github.com/grafana/pyroscope/api v0.4.0 h1:J86DxoNeLOvtJhB1Cn65JMZkXe682D+RqeoIUiYc/eo=
|
||||
github.com/grafana/pyroscope/api v0.4.0/go.mod h1:MFnZNeUM4RDsDOnbgKW3GWoLSBpLzMMT9nkvhHHo81o=
|
||||
github.com/grafana/regexp v0.0.0-20221123153739-15dc172cd2db h1:7aN5cccjIqCLTzedH7MZzRZt5/lsAHch6Z3L2ZGn5FA=
|
||||
github.com/grafana/regexp v0.0.0-20221123153739-15dc172cd2db/go.mod h1:M5qHK+eWfAv8VR/265dIuEpL3fNfeC21tXXp9itM24A=
|
||||
github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1 h1:qnpSQwGEnkcRpTqNOIR6bJbR0gAorgP9CSALpRcKoAA=
|
||||
github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1/go.mod h1:lXGCsh6c22WGtjr+qGHj1otzZpV/1kwTMAqkwZsnWRU=
|
||||
github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.1.0 h1:pRhl55Yx1eC7BZ1N+BBWwnKaMyD8uC+34TLdndZMAKk=
|
||||
github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.1.0/go.mod h1:XKMd7iuf/RGPSMJ/U4HP0zS2Z9Fh8Ps9a+6X26m/tmI=
|
||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0=
|
||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k=
|
||||
github.com/hashicorp/consul/api v1.28.2 h1:mXfkRHrpHN4YY3RqL09nXU1eHKLNiuAN4kHvDQ16k/8=
|
||||
github.com/hashicorp/consul/api v1.28.2/go.mod h1:KyzqzgMEya+IZPcD65YFoOVAgPpbfERu4I/tzG6/ueE=
|
||||
github.com/hashicorp/consul/sdk v0.16.0 h1:SE9m0W6DEfgIVCJX7xU+iv/hUl4m/nxqMTnCdMxDpJ8=
|
||||
github.com/hashicorp/consul/sdk v0.16.0/go.mod h1:7pxqqhqoaPqnBnzXD1StKed62LqJeClzVsUEy85Zr0A=
|
||||
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
|
||||
github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
|
||||
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
|
||||
github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
|
||||
github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ=
|
||||
github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
|
||||
github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k=
|
||||
github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M=
|
||||
github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=
|
||||
github.com/hashicorp/go-immutable-radix v1.3.1 h1:DKHmCUm2hRBK510BaiZlwvpD40f8bJFeZnpfm2KLowc=
|
||||
github.com/hashicorp/go-immutable-radix v1.3.1/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=
|
||||
github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM=
|
||||
github.com/hashicorp/go-msgpack v1.1.5 h1:9byZdVjKTe5mce63pRVNP1L7UAmdHOTEMGehn6KvJWs=
|
||||
github.com/hashicorp/go-msgpack v1.1.5/go.mod h1:gWVc3sv/wbDmR3rQsj1CAktEZzoz1YNK9NfGLXJ69/4=
|
||||
github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk=
|
||||
github.com/hashicorp/go-multierror v1.1.0/go.mod h1:spPvp8C1qA32ftKqdAHm4hHTbPw+vmowP0z+KUhOZdA=
|
||||
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
|
||||
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
|
||||
github.com/hashicorp/go-plugin v1.6.1 h1:P7MR2UP6gNKGPp+y7EZw2kOiq4IR9WiqLvp0XOsVdwI=
|
||||
github.com/hashicorp/go-plugin v1.6.1/go.mod h1:XPHFku2tFo3o3QKFgSYo+cghcUhw1NA1hZyMK0PWAw0=
|
||||
github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs=
|
||||
github.com/hashicorp/go-rootcerts v1.0.2 h1:jzhAVGtqPKbwpyCPELlgNWhE1znq+qwJtW5Oi2viEzc=
|
||||
github.com/hashicorp/go-rootcerts v1.0.2/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8=
|
||||
github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU=
|
||||
github.com/hashicorp/go-sockaddr v1.0.6 h1:RSG8rKU28VTUTvEKghe5gIhIQpv8evvNpnDEyqO4u9I=
|
||||
github.com/hashicorp/go-sockaddr v1.0.6/go.mod h1:uoUUmtwU7n9Dv3O4SNLeFvg0SxQ3lyjsj6+CCykpaxI=
|
||||
github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4=
|
||||
github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
|
||||
github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
|
||||
github.com/hashicorp/go-uuid v1.0.3 h1:2gKiV6YVmrJ1i2CKKa9obLvRieoRGviZFL26PcT/Co8=
|
||||
github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
|
||||
github.com/hashicorp/go-version v1.6.0 h1:feTTfFNnjP967rlCxM/I9g701jU+RN74YKx2mOkIeek=
|
||||
github.com/hashicorp/go-version v1.6.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
|
||||
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
|
||||
github.com/hashicorp/golang-lru v0.6.0 h1:uL2shRDx7RTrOrTCUZEGP/wJUFiUI8QT6E7z5o8jga4=
|
||||
github.com/hashicorp/golang-lru v0.6.0/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
|
||||
github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64=
|
||||
github.com/hashicorp/mdns v1.0.4/go.mod h1:mtBihi+LeNXGtG8L9dX59gAEa12BDtBQSp4v/YAJqrc=
|
||||
github.com/hashicorp/memberlist v0.5.0 h1:EtYPN8DpAURiapus508I4n9CzHs2W+8NZGbmmR/prTM=
|
||||
github.com/hashicorp/memberlist v0.5.0/go.mod h1:yvyXLpo0QaGE59Y7hDTsTzDD25JYBZ4mHgHUZ8lrOI0=
|
||||
github.com/hashicorp/serf v0.10.1 h1:Z1H2J60yRKvfDYAOZLd2MU0ND4AH/WDz7xYHDWQsIPY=
|
||||
github.com/hashicorp/serf v0.10.1/go.mod h1:yL2t6BqATOLGc5HF7qbFkTfXoPIY0WZdWHfEvMqbG+4=
|
||||
github.com/hashicorp/yamux v0.1.1 h1:yrQxtgseBDrq9Y652vSRDvsKCJKOUD+GzTS4Y0Y8pvE=
|
||||
github.com/hashicorp/yamux v0.1.1/go.mod h1:CtWFDAQgb7dxtzFs4tWbplKIe2jSi3+5vKbgIO0SLnQ=
|
||||
github.com/invopop/yaml v0.2.0 h1:7zky/qH+O0DwAyoobXUqvVBwgBFRxKoQ/3FjcVpjTMY=
|
||||
github.com/invopop/yaml v0.2.0/go.mod h1:2XuRLgs/ouIrW3XNzuNj7J3Nvu/Dig5MXvbCEdiBN3Q=
|
||||
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
|
||||
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
|
||||
github.com/josharian/native v1.1.0 h1:uuaP0hAbW7Y4l0ZRQ6C9zfb7Mg1mbFKry/xzDAfmtLA=
|
||||
github.com/josharian/native v1.1.0/go.mod h1:7X/raswPFr05uY3HiLlYeyQntB6OO7E/d2Cu7qoaN2w=
|
||||
github.com/jsimonetti/rtnetlink v1.4.2 h1:Df9w9TZ3npHTyDn0Ev9e1uzmN2odmXd0QX+J5GTEn90=
|
||||
github.com/jsimonetti/rtnetlink v1.4.2/go.mod h1:92s6LJdE+1iOrw+F2/RO7LYI2Qd8pPpFNNUYW06gcoM=
|
||||
github.com/jsimonetti/rtnetlink/v2 v2.0.1 h1:xda7qaHDSVOsADNouv7ukSuicKZO7GgVUCXxpaIEIlM=
|
||||
github.com/jsimonetti/rtnetlink/v2 v2.0.1/go.mod h1:7MoNYNbb3UaDHtF8udiJo/RH6VsTKP1pqKLUTVCvToE=
|
||||
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
|
||||
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
||||
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
|
||||
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
|
||||
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
||||
github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
|
||||
github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
|
||||
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
|
||||
github.com/klauspost/cpuid/v2 v2.2.9 h1:66ze0taIn2H33fBvCkXuv9BmCwDfafmiIVpKV9kKGuY=
|
||||
github.com/klauspost/cpuid/v2 v2.2.9/go.mod h1:rqkxqrZ1EhYM9G+hXH7YdowN5R5RGN6NK4QwQ3WMXF8=
|
||||
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
|
||||
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
||||
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
|
||||
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
|
||||
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
|
||||
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
|
||||
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
|
||||
github.com/magefile/mage v1.15.0 h1:BvGheCMAsG3bWUDbZ8AyXXpCNwU9u5CB6sM+HNb9HYg=
|
||||
github.com/magefile/mage v1.15.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=
|
||||
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
|
||||
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
|
||||
github.com/mattetti/filebuffer v1.0.1 h1:gG7pyfnSIZCxdoKq+cPa8T0hhYtD9NxCdI4D7PTjRLM=
|
||||
github.com/mattetti/filebuffer v1.0.1/go.mod h1:YdMURNDOttIiruleeVr6f56OrMc+MydEnTcXwtkxNVs=
|
||||
github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
|
||||
github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
|
||||
github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
|
||||
github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
|
||||
github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
|
||||
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
|
||||
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
|
||||
github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
|
||||
github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
|
||||
github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE=
|
||||
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
|
||||
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
|
||||
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
|
||||
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
||||
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
|
||||
github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
|
||||
github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
|
||||
github.com/mdlayher/netlink v1.7.2 h1:/UtM3ofJap7Vl4QWCPDGXY8d3GIY2UGSDbK+QWmY8/g=
|
||||
github.com/mdlayher/netlink v1.7.2/go.mod h1:xraEF7uJbxLhc5fpHL4cPe221LI2bdttWlU+ZGLfQSw=
|
||||
github.com/mdlayher/socket v0.4.1 h1:eM9y2/jlbs1M615oshPQOHZzj6R6wMT7bX5NPiQvn2U=
|
||||
github.com/mdlayher/socket v0.4.1/go.mod h1:cAqeGjoufqdxWkD7DkpyS+wcefOtmu5OQ8KuoJGIReA=
|
||||
github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso=
|
||||
github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI=
|
||||
github.com/miekg/dns v1.1.58 h1:ca2Hdkz+cDg/7eNF6V56jjzuZ4aCAE+DbVkILdQWG/4=
|
||||
github.com/miekg/dns v1.1.58/go.mod h1:Ypv+3b/KadlvW9vJfXOTf300O4UqaHFzFCuHz+rPkBY=
|
||||
github.com/mitchellh/cli v1.1.0/go.mod h1:xcISNoH86gajksDmfB23e/pu+B+GeFRMYmoHXxx3xhI=
|
||||
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
|
||||
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
|
||||
github.com/mitchellh/go-testing-interface v1.14.1 h1:jrgshOhYAUVNMAJiKbEu7EqAwgJJ2JqpQmpLJOu07cU=
|
||||
github.com/mitchellh/go-testing-interface v1.14.1/go.mod h1:gfgS7OtZj6MA4U1UrDRp04twqAjfvlZyCfX3sDjEym8=
|
||||
github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
|
||||
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
|
||||
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
|
||||
github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
|
||||
github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
|
||||
github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
|
||||
github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
|
||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
|
||||
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
||||
github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 h1:RWengNIwukTxcDr9M+97sNutRR1RKhG96O6jWumTTnw=
|
||||
github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826/go.mod h1:TaXosZuwdSHYgviHp1DAtfrULt5eUgsSMsZf+YrPgl8=
|
||||
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
|
||||
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
||||
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
|
||||
github.com/oklog/run v1.1.0 h1:GEenZ1cK0+q0+wsJew9qUg/DyD8k3JzYsZAi5gYi2mA=
|
||||
github.com/oklog/run v1.1.0/go.mod h1:sVPdnTZT1zYwAJeCMu2Th4T21pA3FPOQRfWjQlk7DVU=
|
||||
github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
|
||||
github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
|
||||
github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA=
|
||||
github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To=
|
||||
github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk=
|
||||
github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0=
|
||||
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
|
||||
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
|
||||
github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug=
|
||||
github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM=
|
||||
github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk=
|
||||
github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
|
||||
github.com/opentracing-contrib/go-stdlib v1.0.0 h1:TBS7YuVotp8myLon4Pv7BtCBzOTo1DeZCld0Z63mW2w=
|
||||
github.com/opentracing-contrib/go-stdlib v1.0.0/go.mod h1:qtI1ogk+2JhVPIXVc6q+NHziSmy2W5GbdQZFUHADCBU=
|
||||
github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
|
||||
github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b h1:FfH+VrHHk6Lxt9HdVS0PXzSXFyS2NbZKXv33FYPol0A=
|
||||
github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b/go.mod h1:AC62GU6hc0BrNm+9RK9VSiwa/EUe1bkIeFORAMcHvJU=
|
||||
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
|
||||
github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY=
|
||||
github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
|
||||
github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8=
|
||||
github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
|
||||
github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNHvL12M=
|
||||
github.com/pelletier/go-toml/v2 v2.2.3/go.mod h1:MfCQTFTvCcUyyvvwm1+G6H/jORL20Xlb6rzQu9GuUkc=
|
||||
github.com/perimeterx/marshmallow v1.1.5 h1:a2LALqQ1BlHM8PZblsDdidgv1mWi1DgC2UmX50IvK2s=
|
||||
github.com/perimeterx/marshmallow v1.1.5/go.mod h1:dsXbUu8CRzfYP5a87xpp0xq9S3u0Vchtcl8we9tYaXw=
|
||||
github.com/pierrec/lz4/v4 v4.1.18 h1:xaKrnTkyoqfh1YItXl56+6KJNVYWlEEPuAQW9xsplYQ=
|
||||
github.com/pierrec/lz4/v4 v4.1.18/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
|
||||
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo=
|
||||
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
|
||||
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI=
|
||||
github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSgv7Sy7s/s=
|
||||
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
|
||||
github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
|
||||
github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU=
|
||||
github.com/prometheus/client_golang v1.20.3 h1:oPksm4K8B+Vt35tUhw6GbSNSgVlVSBH0qELP/7u83l4=
|
||||
github.com/prometheus/client_golang v1.20.3/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
|
||||
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
|
||||
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
||||
github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
||||
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
|
||||
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
|
||||
github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
|
||||
github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4=
|
||||
github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc=
|
||||
github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8=
|
||||
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
|
||||
github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
|
||||
github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A=
|
||||
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
|
||||
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
|
||||
github.com/prometheus/prometheus v0.51.2 h1:U0faf1nT4CB9DkBW87XLJCBi2s8nwWXdTbyzRUAkX0w=
|
||||
github.com/prometheus/prometheus v0.51.2/go.mod h1:yv4MwOn3yHMQ6MZGHPg/U7Fcyqf+rxqiZfSur6myVtc=
|
||||
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
|
||||
github.com/rivo/uniseg v0.4.3 h1:utMvzDsuh3suAEnhH0RdHmoPbU648o6CvXxTx4SBMOw=
|
||||
github.com/rivo/uniseg v0.4.3/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
|
||||
github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
|
||||
github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
|
||||
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
|
||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
|
||||
github.com/safchain/ethtool v0.4.1 h1:S6mEleTADqgynileXoiapt/nKnatyR6bmIHoF+h2ADo=
|
||||
github.com/safchain/ethtool v0.4.1/go.mod h1:XLLnZmy4OCRTkksP/UiMjij96YmIsBfmBQcs7H6tA48=
|
||||
github.com/samber/lo v1.38.1 h1:j2XEAqXKb09Am4ebOg31SpvzUTTs6EN3VfgeLUhPdXM=
|
||||
github.com/samber/lo v1.38.1/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA=
|
||||
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I=
|
||||
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
|
||||
github.com/shirou/gopsutil v2.21.11+incompatible h1:lOGOyCG67a5dv2hq5Z1BLDUqqKp3HkbjPcz5j6XMS0U=
|
||||
github.com/shirou/gopsutil v2.21.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
|
||||
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
|
||||
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
|
||||
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
|
||||
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
||||
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
|
||||
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
|
||||
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
|
||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals=
|
||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
|
||||
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/tidwall/gjson v1.14.2 h1:6BBkirS0rAHjumnjHF6qgy5d2YAJ1TLIaFE2lzfOLqo=
|
||||
github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
|
||||
github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
|
||||
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
|
||||
github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
|
||||
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
|
||||
github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU=
|
||||
github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI=
|
||||
github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk=
|
||||
github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY=
|
||||
github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM=
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
|
||||
github.com/uber/jaeger-client-go v2.30.0+incompatible h1:D6wyKGCecFaSRUpo8lCVbaOOb6ThwMmTEbhRwtKR97o=
|
||||
github.com/uber/jaeger-client-go v2.30.0+incompatible/go.mod h1:WVhlPFC8FDjOFMMWRy2pZqQJSXxYSwNYOkTr/Z6d3Kk=
|
||||
github.com/uber/jaeger-lib v2.4.1+incompatible h1:td4jdvLcExb4cBISKIpHuGoVXh+dVKhn2Um6rjCsSsg=
|
||||
github.com/uber/jaeger-lib v2.4.1+incompatible/go.mod h1:ComeNDZlWwrWnDv8aPp0Ba6+uUTzImX/AauajbLI56U=
|
||||
github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
|
||||
github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
|
||||
github.com/unknwon/bra v0.0.0-20200517080246-1e3013ecaff8 h1:aVGB3YnaS/JNfOW3tiHIlmNmTDg618va+eT0mVomgyI=
|
||||
github.com/unknwon/bra v0.0.0-20200517080246-1e3013ecaff8/go.mod h1:fVle4kNr08ydeohzYafr20oZzbAkhQT39gKK/pFQ5M4=
|
||||
github.com/unknwon/com v1.0.1 h1:3d1LTxD+Lnf3soQiD4Cp/0BRB+Rsa/+RTvz8GMMzIXs=
|
||||
github.com/unknwon/com v1.0.1/go.mod h1:tOOxU81rwgoCLoOVVPHb6T/wt8HZygqH5id+GNnlCXM=
|
||||
github.com/unknwon/log v0.0.0-20150304194804-e617c87089d3 h1:4EYQaWAatQokdji3zqZloVIW/Ke1RQjYw2zHULyrHJg=
|
||||
github.com/unknwon/log v0.0.0-20150304194804-e617c87089d3/go.mod h1:1xEUf2abjfP92w2GZTV+GgaRxXErwRXcClbUwrNJffU=
|
||||
github.com/urfave/cli v1.22.15 h1:nuqt+pdC/KqswQKhETJjo7pvn/k4xMUxgW6liI7XpnM=
|
||||
github.com/urfave/cli v1.22.15/go.mod h1:wSan1hmo5zeyLGBjRJbzRTNk8gwoYa2B9n4q9dmRIc0=
|
||||
github.com/urfave/cli/v2 v2.27.4 h1:o1owoI+02Eb+K107p27wEX9Bb8eqIoZCfLXloLUSWJ8=
|
||||
github.com/urfave/cli/v2 v2.27.4/go.mod h1:m4QzxcD2qpra4z7WhzEGn74WZLViBnMpb1ToCAKdGRQ=
|
||||
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
|
||||
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
|
||||
github.com/vishvananda/netlink v1.3.0 h1:X7l42GfcV4S6E4vHTsw48qbrV+9PVojNfIhZcwQdrZk=
|
||||
github.com/vishvananda/netlink v1.3.0/go.mod h1:i6NetklAujEcC6fK0JPjT8qSwWyO0HLn4UKG+hGqeJs=
|
||||
github.com/vishvananda/netns v0.0.4 h1:Oeaw1EM2JMxD51g9uhtC0D7erkIjgmj8+JZc26m1YX8=
|
||||
github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
|
||||
github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
|
||||
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
|
||||
github.com/xlab/treeprint v1.2.0 h1:HzHnuAF1plUN2zGlAFHbSQP2qJ0ZAD3XF5XD7OesXRQ=
|
||||
github.com/xlab/treeprint v1.2.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0=
|
||||
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4=
|
||||
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM=
|
||||
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||
github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
|
||||
github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
|
||||
github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ=
|
||||
github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0=
|
||||
github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0=
|
||||
github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA=
|
||||
go.etcd.io/etcd/api/v3 v3.5.7 h1:sbcmosSVesNrWOJ58ZQFitHMdncusIifYcrBfwrlJSY=
|
||||
go.etcd.io/etcd/api/v3 v3.5.7/go.mod h1:9qew1gCdDDLu+VwmeG+iFpL+QlpHTo7iubavdVDgCAA=
|
||||
go.etcd.io/etcd/client/pkg/v3 v3.5.7 h1:y3kf5Gbp4e4q7egZdn5T7W9TSHUvkClN6u+Rq9mEOmg=
|
||||
go.etcd.io/etcd/client/pkg/v3 v3.5.7/go.mod h1:o0Abi1MK86iad3YrWhgUsbGx1pmTS+hrORWc2CamuhY=
|
||||
go.etcd.io/etcd/client/v3 v3.5.7 h1:u/OhpiuCgYY8awOHlhIhmGIGpxfBU/GZBUP3m/3/Iz4=
|
||||
go.etcd.io/etcd/client/v3 v3.5.7/go.mod h1:sOWmj9DZUMyAngS7QQwCyAXXAL6WhgTOPLNS/NabQgw=
|
||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.53.0 h1:9G6E0TXzGFVfTnawRzrPl83iHOAV7L8NJiR8RSGYV1g=
|
||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.53.0/go.mod h1:azvtTADFQJA8mX80jIH/akaE7h+dbm/sVuaHqN13w74=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.53.0 h1:IVtyPth4Rs5P8wIf0mP2KVKFNTJ4paX9qQ4Hkh5gFdc=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.53.0/go.mod h1:ImRBLMJv177/pwiLZ7tU7HDGNdBv7rS0HQ99eN/zBl8=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 h1:4K4tsIXefpVJtvA/8srF4V4y0akAoPHkIslgAkjixJA=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0/go.mod h1:jjdQuTGVsXV4vSs+CJ2qYDeDPf9yIJV23qlIzBm73Vg=
|
||||
go.opentelemetry.io/contrib/propagators/jaeger v1.29.0 h1:+YPiqF5rR6PqHBlmEFLPumbSP0gY0WmCGFayXRcCLvs=
|
||||
go.opentelemetry.io/contrib/propagators/jaeger v1.29.0/go.mod h1:6PD7q7qquWSp3Z4HeM3e/2ipRubaY1rXZO8NIHVDZjs=
|
||||
go.opentelemetry.io/contrib/samplers/jaegerremote v0.23.0 h1:qKi9ntCcronqWqfuKxqrxZlZd82jXJEgGiAWH1+phxo=
|
||||
go.opentelemetry.io/contrib/samplers/jaegerremote v0.23.0/go.mod h1:1kbAgQa5lgYC3rC6cE3jSxQ/Q13l33wv/WI8U+htwag=
|
||||
go.opentelemetry.io/otel v1.29.0 h1:PdomN/Al4q/lN6iBJEN3AwPvUiHPMlt93c8bqTG5Llw=
|
||||
go.opentelemetry.io/otel v1.29.0/go.mod h1:N/WtXPs1CNCUEx+Agz5uouwCba+i+bJGFicT8SR4NP8=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0 h1:dIIDULZJpgdiHz5tXrTgKIMLkus6jEFa7x5SOKcyR7E=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0/go.mod h1:jlRVBe7+Z1wyxFSUs48L6OBQZ5JwH2Hg/Vbl+t9rAgI=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.28.0 h1:R3X6ZXmNPRR8ul6i3WgFURCHzaXjHdm0karRG/+dj3s=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.28.0/go.mod h1:QWFXnDavXWwMx2EEcZsf3yxgEKAqsxQ+Syjp+seyInw=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.27.0 h1:QY7/0NeRPKlzusf40ZE4t1VlMKbqSNT7cJRYzWuja0s=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.27.0/go.mod h1:HVkSiDhTM9BoUJU8qE6j2eSWLLXvi1USXjyd2BXT8PY=
|
||||
go.opentelemetry.io/otel/metric v1.29.0 h1:vPf/HFWTNkPu1aYeIsc98l4ktOQaL6LeSoeV2g+8YLc=
|
||||
go.opentelemetry.io/otel/metric v1.29.0/go.mod h1:auu/QWieFVWx+DmQOUMgj0F8LHWdgalxXqvp7BII/W8=
|
||||
go.opentelemetry.io/otel/sdk v1.29.0 h1:vkqKjk7gwhS8VaWb0POZKmIEDimRCMsopNYnriHyryo=
|
||||
go.opentelemetry.io/otel/sdk v1.29.0/go.mod h1:pM8Dx5WKnvxLCb+8lG1PRNIDxu9g9b9g59Qr7hfAAok=
|
||||
go.opentelemetry.io/otel/trace v1.29.0 h1:J/8ZNK4XgR7a21DZUAsbF8pZ5Jcw1VhACmnYt39JTi4=
|
||||
go.opentelemetry.io/otel/trace v1.29.0/go.mod h1:eHl3w0sp3paPkYstJOmAimxhiFXPg+MMTlEh3nsQgWQ=
|
||||
go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0=
|
||||
go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8=
|
||||
go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE=
|
||||
go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0=
|
||||
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
|
||||
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
|
||||
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
|
||||
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
|
||||
go.uber.org/zap v1.26.0 h1:sI7k6L95XOKS281NhVKOFCUNIvv9e0w4BF8N3u+tCRo=
|
||||
go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so=
|
||||
golang.org/x/arch v0.12.0 h1:UsYJhbzPYGsT0HbEdmYcqtCv8UNGvnaL561NnIUvaKg=
|
||||
golang.org/x/arch v0.12.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
|
||||
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20190923035154-9ee001bba392/go.mod h1:/lpIB1dKB+9EgE3H3cr1v9wB50oz8l4C4h62xy7jSTY=
|
||||
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
||||
golang.org/x/crypto v0.29.0 h1:L5SG1JTTXupVV3n6sUqMTeWbjAyfPwoda2DLX8J8FrQ=
|
||||
golang.org/x/crypto v0.29.0/go.mod h1:+F4F4N5hv6v38hfeYwTdx20oUvLLc+QfrE9Ax9HtgRg=
|
||||
golang.org/x/exp v0.0.0-20240823005443-9b4947da3948 h1:kx6Ds3MlpiUHKj7syVnbp57++8WpuKPcR5yjLBjvLEA=
|
||||
golang.org/x/exp v0.0.0-20240823005443-9b4947da3948/go.mod h1:akd2r19cwCdwSwWeIdzYQGa/EZZyqcOdwWiwj5L5eKQ=
|
||||
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0=
|
||||
golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20190923162816-aa69164e4478/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
|
||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||
golang.org/x/net v0.0.0-20210410081132-afb366fc7cd1/go.mod h1:9tjilg8BloeKEkVJvy7fQ90B1CfIiPueXVOjqfkSzI8=
|
||||
golang.org/x/net v0.31.0 h1:68CPQngjLL0r2AlUKiSxtQFKvzRVbnzLwMUn5SzcLHo=
|
||||
golang.org/x/net v0.31.0/go.mod h1:P4fl1q7dY2hnZFxEk4pPSkDHF+QqjitcnDjUQyMM+pM=
|
||||
golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs=
|
||||
golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
|
||||
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ=
|
||||
golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190922100055-0a153f010e69/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190924154521-2837fb4f24fe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210303074136-134d130e1a04/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s=
|
||||
golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug=
|
||||
golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4=
|
||||
golang.org/x/time v0.6.0 h1:eTDhh4ZXt5Qf0augr54TN6suAUudPcawVZeIAPU7D4U=
|
||||
golang.org/x/time v0.6.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20190424220101-1e8e1cfdf96b/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
|
||||
golang.org/x/tools v0.0.0-20190907020128-2ca718005c18/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
|
||||
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
|
||||
golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24=
|
||||
golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 h1:H2TDz8ibqkAF6YGhCdN3jS9O0/s90v0rJh3X/OLHEUk=
|
||||
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8=
|
||||
gonum.org/v1/gonum v0.12.0 h1:xKuo6hzt+gMav00meVPUlXwSdoEJP46BR+wdxQEFK2o=
|
||||
gonum.org/v1/gonum v0.12.0/go.mod h1:73TDxJfAAHeA8Mk9mf8NlIppyhQNo5GLTcYeqgo2lvY=
|
||||
google.golang.org/genproto v0.0.0-20180518175338-11a468237815/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20240822170219-fc7c04adadcd h1:BBOTEWLuuEGQy9n1y9MhVJ9Qt0BDu21X8qZs71/uPZo=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20240822170219-fc7c04adadcd/go.mod h1:fO8wJzT2zbQbAjbIoos1285VfEIYKDDY+Dt+WpTkh6g=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240814211410-ddb44dafa142 h1:e7S5W7MGGLaSu8j3YjdezkZ+m1/Nm0uRVRMEMGk26Xs=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240814211410-ddb44dafa142/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU=
|
||||
google.golang.org/grpc v1.12.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw=
|
||||
google.golang.org/grpc v1.66.0 h1:DibZuoBznOxbDQxRINckZcUvnCEvrW9pcWIE2yF9r1c=
|
||||
google.golang.org/grpc v1.66.0/go.mod h1:s3/l6xSSCURdVfAnL+TqCNMyTDAGN6+lZeVxnZR128Y=
|
||||
google.golang.org/protobuf v1.35.2 h1:8Ar7bF+apOIoThw1EdZl0p1oWvMqTHmpA2fRTyZO8io=
|
||||
google.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
|
||||
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
||||
gopkg.in/fsnotify/fsnotify.v1 v1.4.7 h1:XNNYLJHt73EyYiCZi6+xjupS9CpvmiDgjPTAjrBlQbo=
|
||||
gopkg.in/fsnotify/fsnotify.v1 v1.4.7/go.mod h1:Fyux9zXlo4rWoMSIzpn9fDAYjalPqJ/K1qJ27s+7ltE=
|
||||
gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
|
||||
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
|
||||
gopkg.in/natefinch/lumberjack.v2 v2.2.1 h1:bBRl1b0OH9s/DuPhuXpNl+VtCaJXFZ5/uEFST95x9zc=
|
||||
gopkg.in/natefinch/lumberjack.v2 v2.2.1/go.mod h1:YD8tP3GAjkrDg1eZH7EGmyESg/lsYskCTPBJVb9jqSc=
|
||||
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
||||
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gotest.tools/v3 v3.5.1 h1:EENdUnS3pdur5nybKYIh2Vfgc8IUNBjxDPSjtiJcOzU=
|
||||
gotest.tools/v3 v3.5.1/go.mod h1:isy3WKz7GK6uNw/sbHzfKBLvlvXwUyV06n6brMxxopU=
|
||||
k8s.io/api v0.31.3 h1:umzm5o8lFbdN/hIXbrK9oRpOproJO62CV1zqxXrLgk8=
|
||||
k8s.io/api v0.31.3/go.mod h1:UJrkIp9pnMOI9K2nlL6vwpxRzzEX5sWgn8kGQe92kCE=
|
||||
k8s.io/apimachinery v0.31.3 h1:6l0WhcYgasZ/wk9ktLq5vLaoXJJr5ts6lkaQzgeYPq4=
|
||||
k8s.io/apimachinery v0.31.3/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo=
|
||||
k8s.io/client-go v0.31.3 h1:CAlZuM+PH2cm+86LOBemaJI/lQ5linJ6UFxKX/SoG+4=
|
||||
k8s.io/client-go v0.31.3/go.mod h1:2CgjPUTpv3fE5dNygAr2NcM8nhHzXvxB8KL5gYc3kJs=
|
||||
k8s.io/component-base v0.31.3 h1:DMCXXVx546Rfvhj+3cOm2EUxhS+EyztH423j+8sOwhQ=
|
||||
k8s.io/component-base v0.31.3/go.mod h1:xME6BHfUOafRgT0rGVBGl7TuSg8Z9/deT7qq6w7qjIU=
|
||||
k8s.io/cri-api v0.31.3 h1:dsZXzrGrCEwHjsTDlAV7rutEplpMLY8bfNRMIqrtXjo=
|
||||
k8s.io/cri-api v0.31.3/go.mod h1:Po3TMAYH/+KrZabi7QiwQI4a692oZcUOUThd/rqwxrI=
|
||||
k8s.io/cri-client v0.31.3 h1:9ZwddaNJomqkTBYQqSmB+Ccns3beY4HyYDwmRtWTCJM=
|
||||
k8s.io/cri-client v0.31.3/go.mod h1:klbWiYkOatOQOkXOYZMZMGSTM8q9eC/efsYGuXcgPes=
|
||||
k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
|
||||
k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
|
||||
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A=
|
||||
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
|
||||
nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
|
||||
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo=
|
||||
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0=
|
||||
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4=
|
||||
sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08=
|
||||
sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
|
||||
sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY=
|
|
@ -0,0 +1,224 @@
|
|||
# log-level: Debug, Info, Warn, Error, Panic
|
||||
LogLevel = "Info"
|
||||
# logging filepath
|
||||
# LogFile = ""
|
||||
|
||||
[APIServer]
|
||||
# TCPAddr is the tcp monitoring information of the huatuo-bamai server
|
||||
TCPAddr = ":19704"
|
||||
|
||||
[HuaTuoConf]
|
||||
UserName = "huatuo-bamai"
|
||||
PassWord = "huatuo-bamai"
|
||||
UnixAddr = "/home/xiaoju/didicloud/huatuo/shared/huatuo.unix"
|
||||
ServerIP = "127.0.0.1:12735"
|
||||
KeepaliveTimeout = 300
|
||||
APIVersion = "v1.3"
|
||||
ReqTimeout = 15
|
||||
|
||||
|
||||
[RuntimeCgroup]
|
||||
LimitInitCPU = 0.5
|
||||
LimitCPU = 2.0
|
||||
# limit memory (MB)
|
||||
LimitMem = 2048
|
||||
|
||||
# storage configurations
|
||||
[Storage]
|
||||
# ES & Kibana configurations
|
||||
[Storage.ES]
|
||||
# disable ES storage if one of Address, Username, Password empty.
|
||||
Address = "http://10.88.128.149:30963"
|
||||
Username = "20416"
|
||||
Password = "E4haxbsIUPiUZES"
|
||||
Index = "cn_huatuo_relay_cases"
|
||||
|
||||
# tracer's record data
|
||||
# Path: all but the last element of path for per tracer
|
||||
# RotationSize: the maximum size in Megabytes of a record file before it gets rotated for per subsystem
|
||||
# MaxRotation: the maximum number of old log files to retain for per subsystem
|
||||
[Storage.LocalFile]
|
||||
Path = "./record"
|
||||
RotationSize = 100
|
||||
MaxRotation = 10
|
||||
|
||||
[TaskConfig]
|
||||
MaxRunningTask = 10
|
||||
|
||||
[Tracing]
|
||||
# blacklist
|
||||
BlackList = ["softlockup"]
|
||||
|
||||
[Tracing.Cpuidle]
|
||||
CgUserth = 75 #75%
|
||||
CgDeltaUserth = 30 #30%
|
||||
CgSysth = 45 #45%
|
||||
CgDeltaSysth = 0 #0
|
||||
CgUsageth = 90 #90%
|
||||
CgDeltaUsageth = 30 #30%
|
||||
CgStep = 10 #10s
|
||||
CgGrace = 1800 #1800s
|
||||
CgUsageToolduration = 10 #10s
|
||||
[Tracing.Cpusys]
|
||||
CPUSysth = 50 #50%
|
||||
CPUSysDelta = 30 #30%
|
||||
CPUSysStep = 1 #1s
|
||||
CPUSysToolduration = 10 #10s
|
||||
[Tracing.Waitrate]
|
||||
[Tracing.Waitrate.SpikeThreshold]
|
||||
"0" = 50.0
|
||||
"101" = 80.0
|
||||
"102" = 120.0
|
||||
"103" = 170.0
|
||||
"1" = 220.0
|
||||
"2" = 270.0
|
||||
"3" = 320.0
|
||||
"4" = 370.0
|
||||
[Tracing.Waitrate.SlopeThreshold]
|
||||
"0" = 0.05
|
||||
"101" = 0.1
|
||||
"102" = 0.2
|
||||
"103" = 0.3
|
||||
"1" = 0.4
|
||||
"2" = 0.5
|
||||
"3" = 0.6
|
||||
"4" = 0.7
|
||||
[Tracing.Waitrate.SampleConfig]
|
||||
# DataSetCapability * SampleInterval is time capability (in seconds)
|
||||
DataSetCapability = 360
|
||||
# Seconds
|
||||
OnceCaptureTime = 15
|
||||
SampleInterval = 5
|
||||
[Tracing.Softirq]
|
||||
ThresholdTime = 100000000
|
||||
[Tracing.Dload]
|
||||
ThresholdLoad = 5.0
|
||||
MonitorGap = 180
|
||||
[Tracing.IOTracing]
|
||||
IOScheduleThreshold = 100 #100ms
|
||||
ReadThreshold = 2000 #MB/s
|
||||
WriteThreshold = 1500 #MB/s
|
||||
IOutilThreshold = 90 #90%
|
||||
IOwaitThreshold = 100 #100ms
|
||||
PeriodSecond = 8
|
||||
MaxStackNumber = 16
|
||||
TopProcessCount = 15
|
||||
TopFilesPerProcess = 10
|
||||
[Tracing.MemoryReclaim]
|
||||
Deltath = 900000000 #900ms
|
||||
[Tracing.MemoryBurst]
|
||||
HistoryWindowLength = 60
|
||||
SampleInterval = 5 # seconds
|
||||
SilencePeriod = 300 # seconds
|
||||
TopNProcesses = 10
|
||||
BurstRatio = 2.0
|
||||
AnonThreshold = 70 # percent
|
||||
# the latency threshold for package receive
|
||||
[Tracing.NetRecvLat]
|
||||
ToNetIf = 5 # ms, from driver to a core recv
|
||||
ToTCPV4 = 10 # ms, from driver to TCP recv, contains ToNetIf
|
||||
ToUserCopy = 115 # ms, from driver to user recv, contains ToNetIf + ToUserCopy
|
||||
IgnoreHost = true # whether to ignore the host process
|
||||
IgnoreContainerLevel = [103, 3, 4]
|
||||
[Tracing.Dropwatch]
|
||||
IgnoreNeighInvalidate = true # ignore the error of `neigh_invalidate`
|
||||
[Tracing.Netdev]
|
||||
Whitelist = ["eth0", "eth1", "bond4", "lo"]
|
||||
[Tracing.Fastfork]
|
||||
RedisInfoCollectionInterval = 3600 # interval (seconds) of redis proess information collection
|
||||
EnableForkProbe = 1 # enable fork kprobe and kretprobe
|
||||
EnablePtsepProbe = 1
|
||||
EnableWaitptsepProbe = 1
|
||||
|
||||
# Collector Configurations.
|
||||
[MetricCollector]
|
||||
# blacklist
|
||||
BlackList = ["ethtool"]
|
||||
|
||||
# Netdev Configurations.
|
||||
[MetricCollector.Netdev]
|
||||
# Use `netlink` instead of `procfs net/dev` to get netdev statistic.
|
||||
# Only support the host environment to use `netlink` now!
|
||||
EnableNetlink = false
|
||||
# IgnoredDevices: Ignore special devices in this netdev statistic.
|
||||
# AcceptDevices: Accept special devices in this netdev statistic.
|
||||
# These configurations use `Regexp`.
|
||||
# 'IgnoredDevices' has higher priority than 'AcceptDevices'.
|
||||
IgnoredDevices = "^(lo)|(docker\\w*)|(veth\\w*)$"
|
||||
#AcceptDevices = ""
|
||||
# Qdisc Configurations.
|
||||
[MetricCollector.Qdisc]
|
||||
# IgnoredDevices: Ignore special devices in this qdisc statistic.
|
||||
# AcceptDevices: Accept special devices in this qdisc statistic.
|
||||
# These configurations use `Regexp`.
|
||||
# 'IgnoredDevices' has higher priority than 'AcceptDevices'.
|
||||
IgnoredDevices = "^(lo)|(docker\\w*)|(veth\\w*)$"
|
||||
#AcceptDevices = ""
|
||||
[MetricCollector.Vmstat]
|
||||
IncludedMetrics = "allocstall|nr_active_anon|nr_active_file|nr_boost_pages|nr_dirty|nr_free_pages|nr_inactive_anon|nr_inactive_file|nr_kswapd_boost|nr_mlock|nr_shmem|nr_slab_reclaimable|nr_slab_unreclaimable|nr_unevictable|nr_writeback|numa_pages_migrated|pgdeactivate|pgrefill|pgscan_direct|pgscan_kswapd|pgsteal_direct|pgsteal_kswapd"
|
||||
ExcludedMetrics = "total"
|
||||
[MetricCollector.MemoryStat]
|
||||
IncludedMetrics = "active_anon|active_file|dirty|inactive_anon|inactive_file|pgdeactivate|pgrefill|pgscan_direct|pgscan_kswapd|pgsteal_direct|pgsteal_kswapd|shmem|unevictable|writeback|pgscan_globaldirect|pgscan_globalkswapd|pgscan_cswapd|pgsteal_cswapd|pgsteal_globaldirect|pgsteal_globalkswapd"
|
||||
ExcludedMetrics = "total"
|
||||
[MetricCollector.MemoryEvents]
|
||||
IncludedMetrics = "watermark_inc|watermark_dec"
|
||||
# ExcludedMetrics = ""
|
||||
# Netstat Configurations.
|
||||
[MetricCollector.Netstat]
|
||||
# ExcludedMetrics: Ignore keys in this netstat statistic.
|
||||
# IncludedMetrics: Accept keys in this netstat statistic.
|
||||
# The 'key' format: protocol + '_' + netstat_name. eg: TcpExt_TCPSynRetrans.
|
||||
# These configurations use `Regexp`.
|
||||
# 'ExcludedMetrics' has higher priority than 'IncludedMetrics'.
|
||||
#ExcludedMetrics = ""
|
||||
#IncludedMetrics = ""
|
||||
[MetricCollector.MountPointStat]
|
||||
IncludedMountPoints = "(^/home$)|(^/$)|(^/boot$)"
|
||||
|
||||
# Known warning pattern filter
|
||||
#
|
||||
# array[0] - the name of the known issue
|
||||
# array[1] - regex of pattern which help identify the known issues
|
||||
# array[2] & array[3] - regex of known clusters or containers hit the issue
|
||||
#
|
||||
# Example:
|
||||
# ["ep_poll", "ep_scan_ready_list.constprop.21\\+0x217", "athena-predict", ""]
|
||||
# issue name: ep_poll
|
||||
# ep_scan_ready_list.constprop.21\\+0x217 is used to identify the issue base on
|
||||
# the stack backtrace
|
||||
# athena-predict is part of name of cluster which are very known hit this
|
||||
# issue frequently
|
||||
[WarningFilter]
|
||||
PatternList = [
|
||||
[
|
||||
"coredump",
|
||||
"do_exit\\+0x1c9",
|
||||
"",
|
||||
""
|
||||
],
|
||||
[
|
||||
"ep_poll",
|
||||
"ep_scan_ready_list.constprop.21\\+0x217",
|
||||
"athena-predict",
|
||||
""
|
||||
],
|
||||
[
|
||||
"php_parallel_exit",
|
||||
"unlink_anon_vmas\\+0x76"
|
||||
],
|
||||
[
|
||||
"futex",
|
||||
"futex_wait_queue_me\\+0xc1"
|
||||
],
|
||||
[
|
||||
"netrecvlat",
|
||||
"comm=gundam_client:"
|
||||
],
|
||||
[
|
||||
"", "", "", ""
|
||||
]
|
||||
]
|
||||
|
||||
[Pod]
|
||||
KubeletPodListURL = "http://127.0.0.1:10255/pods"
|
||||
DockerAPIVersion = "1.24"
|
|
@ -0,0 +1,129 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package bpf
|
||||
|
||||
import (
|
||||
"context"
|
||||
)
|
||||
|
||||
// The BPF APIs
|
||||
//
|
||||
// The bpf manager has the following APIs:
|
||||
//
|
||||
// // InitBpfManager initializes the bpf manager.
|
||||
// InitBpfManager() error
|
||||
//
|
||||
// // CloseBpfManager closes the bpf manager.
|
||||
// CloseBpfManager()
|
||||
//
|
||||
// // LoadBpf the bpf and return the bpf.
|
||||
// LoadBpf(objName string, consts map[string]any) (BPF, error)
|
||||
|
||||
// AttachOption is an option for attaching a program.
|
||||
type AttachOption struct {
|
||||
ProgramName string
|
||||
Symbol string // symbol for kprobe/kretprobe/tracepoint/raw_tracepoint
|
||||
PerfEvent struct { // BPF_PROG_TYPE_PERF_EVENT
|
||||
SamplePeriod, SampleFreq uint64
|
||||
}
|
||||
}
|
||||
|
||||
// Info is the info of a bpf.
|
||||
type Info struct {
|
||||
MapsInfo []MapInfo
|
||||
ProgramsInfo []ProgramInfo
|
||||
}
|
||||
|
||||
// MapInfo is the info of a map.
|
||||
type MapInfo struct {
|
||||
ID uint32
|
||||
Name string
|
||||
}
|
||||
|
||||
// ProgramInfo is the info of a program.
|
||||
type ProgramInfo struct {
|
||||
ID uint32
|
||||
Name string
|
||||
SectionName string
|
||||
}
|
||||
|
||||
// MapItem describes a map element with key-value
|
||||
type MapItem struct {
|
||||
Key []byte
|
||||
Value []byte
|
||||
}
|
||||
|
||||
type BPF interface {
|
||||
// Name returns the bpf name.
|
||||
Name() string
|
||||
|
||||
// MapIDByName gets mapID by Name.
|
||||
MapIDByName(name string) uint32
|
||||
|
||||
// ProgIDByName gets progID by Name.
|
||||
ProgIDByName(name string) uint32
|
||||
|
||||
// String returns the bpf string.
|
||||
String() string
|
||||
|
||||
// Info gets bpf information.
|
||||
Info() (*Info, error)
|
||||
|
||||
// Close the bpf bpf.
|
||||
Close() error
|
||||
|
||||
// AttachWithOptions attaches programs with options.
|
||||
AttachWithOptions(opts []AttachOption) error
|
||||
|
||||
// Attach the default programs.
|
||||
Attach() error
|
||||
|
||||
// Detach all programs.
|
||||
Detach() error
|
||||
|
||||
// Loaded checks bpf is still loaded.
|
||||
Loaded() (bool, error)
|
||||
|
||||
// EventPipe gets event-pipe and returns a PerfEventReader.
|
||||
EventPipe(ctx context.Context, mapID, perCPUBuffer uint32) (PerfEventReader, error)
|
||||
|
||||
// EventPipeByName gets event-pipe by the mapName and returns a PerfEventReader.
|
||||
EventPipeByName(ctx context.Context, mapName string, perCPUBuffer uint32) (PerfEventReader, error)
|
||||
|
||||
// AttachAndEventPipe attaches and event-pipe and returns a PerfEventReader.
|
||||
AttachAndEventPipe(ctx context.Context, mapName string, perCPUBuffer uint32) (PerfEventReader, error)
|
||||
|
||||
// ReadMap read the value content corresponding to a key from a map
|
||||
//
|
||||
// NOTICE: The content of the key needs to be converted to byte type, and the
|
||||
// obtained value is of byte type, which also needs to be converted to the
|
||||
// corresponding type.
|
||||
ReadMap(mapID uint32, key []byte) ([]byte, error)
|
||||
|
||||
// WriteMapItems writes the value content corresponding to a key to a map.
|
||||
WriteMapItems(mapID uint32, items []MapItem) error
|
||||
|
||||
// DeleteMapItems deletes multiple items from a BPF map by keys.
|
||||
DeleteMapItems(mapID uint32, keys [][]byte) error
|
||||
|
||||
// DumpMap dump all the context of the map
|
||||
DumpMap(mapID uint32) ([]MapItem, error)
|
||||
|
||||
// DumpMapByName dump all the context of the map.
|
||||
DumpMapByName(mapName string) ([]MapItem, error)
|
||||
|
||||
// WaitDetachByBreaker check the bpf's status.
|
||||
WaitDetachByBreaker(ctx context.Context, cancel context.CancelFunc)
|
||||
}
|
|
@ -0,0 +1,595 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build !didi
|
||||
|
||||
package bpf
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"huatuo-bamai/internal/log"
|
||||
|
||||
"github.com/cilium/ebpf"
|
||||
"github.com/cilium/ebpf/link"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
bpfFileDirectory = "./bpf"
|
||||
)
|
||||
|
||||
// InitBpfManager initializes the bpf manager.
|
||||
func InitBpfManager() error {
|
||||
// unlimit
|
||||
return unix.Setrlimit(unix.RLIMIT_MEMLOCK, &unix.Rlimit{
|
||||
Cur: unix.RLIM_INFINITY,
|
||||
Max: unix.RLIM_INFINITY,
|
||||
})
|
||||
}
|
||||
|
||||
// CloseBpfManager closes the bpf manager.
|
||||
func CloseBpfManager() {}
|
||||
|
||||
type mapSpec struct {
|
||||
name string
|
||||
bMap *ebpf.Map
|
||||
}
|
||||
|
||||
type programSpec struct {
|
||||
name string
|
||||
specType ebpf.ProgramType
|
||||
sectionName string
|
||||
sectionPrefix string
|
||||
bProg *ebpf.Program
|
||||
links map[string]link.Link
|
||||
}
|
||||
|
||||
type defaultBPF struct {
|
||||
name string
|
||||
mapSpecs map[uint32]mapSpec
|
||||
programSpecs map[uint32]programSpec
|
||||
mapName2IDs map[string]uint32
|
||||
programName2IDs map[string]uint32
|
||||
}
|
||||
|
||||
// _ is a type assertion
|
||||
var _ BPF = (*defaultBPF)(nil)
|
||||
|
||||
// LoadBpfFromBytes loads the bpf from bytes.
|
||||
func LoadBpfFromBytes(bpfName string, bpfBytes []byte, consts map[string]any) (BPF, error) {
|
||||
return loadBpfFromReader(bpfName, bytes.NewReader(bpfBytes), consts)
|
||||
}
|
||||
|
||||
// LoadBpf the bpf and return the bpf.
|
||||
func LoadBpf(bpfName string, consts map[string]any) (BPF, error) {
|
||||
f, err := os.Open(filepath.Join(bpfFileDirectory, bpfName))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
return loadBpfFromReader(bpfName, f, consts)
|
||||
}
|
||||
|
||||
// loadBpfFromReader loads the bpf from reader.
|
||||
func loadBpfFromReader(bpfName string, rd io.ReaderAt, consts map[string]any) (BPF, error) {
|
||||
specs, err := ebpf.LoadCollectionSpecFromReader(rd)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("can't parse the bpf file %s: %w", bpfName, err)
|
||||
}
|
||||
|
||||
// RewriteConstants
|
||||
if consts != nil {
|
||||
if err := specs.RewriteConstants(consts); err != nil {
|
||||
return nil, fmt.Errorf("can't rewrite constants: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// loads Maps and Programs into the kernel.
|
||||
coll, err := ebpf.NewCollection(specs)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("can't new the bpf collection: %w", err)
|
||||
}
|
||||
defer coll.Close()
|
||||
|
||||
b := &defaultBPF{
|
||||
name: bpfName,
|
||||
mapSpecs: make(map[uint32]mapSpec),
|
||||
programSpecs: make(map[uint32]programSpec),
|
||||
}
|
||||
|
||||
// maps
|
||||
for name, spec := range specs.Maps {
|
||||
m, ok := coll.Maps[name]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
info, err := m.Info()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("can't get map info: %w", err)
|
||||
}
|
||||
|
||||
id, ok := info.ID()
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid map ID: %v", id)
|
||||
}
|
||||
|
||||
bMap, err := m.Clone()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("can't clone map: %w", err)
|
||||
}
|
||||
|
||||
b.mapSpecs[uint32(id)] = mapSpec{
|
||||
name: spec.Name,
|
||||
bMap: bMap,
|
||||
}
|
||||
}
|
||||
|
||||
// programs
|
||||
for name, spec := range specs.Programs {
|
||||
p, ok := coll.Programs[name]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
info, err := p.Info()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("can't get program info: %w", err)
|
||||
}
|
||||
|
||||
id, ok := info.ID()
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid program ID: %v", id)
|
||||
}
|
||||
|
||||
bProg, err := p.Clone()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("can't clone program: %w", err)
|
||||
}
|
||||
|
||||
b.programSpecs[uint32(id)] = programSpec{
|
||||
name: spec.Name,
|
||||
specType: spec.Type,
|
||||
sectionName: spec.SectionName,
|
||||
sectionPrefix: strings.SplitN(spec.SectionName, "/", 2)[0],
|
||||
bProg: bProg,
|
||||
links: make(map[string]link.Link),
|
||||
}
|
||||
}
|
||||
|
||||
// mapName2IDs
|
||||
b.mapName2IDs = make(map[string]uint32, len(b.mapSpecs))
|
||||
for id, m := range b.mapSpecs {
|
||||
b.mapName2IDs[m.name] = id
|
||||
}
|
||||
|
||||
// programName2IDs
|
||||
b.programName2IDs = make(map[string]uint32, len(b.programSpecs))
|
||||
for id, p := range b.programSpecs {
|
||||
b.programName2IDs[p.name] = id
|
||||
}
|
||||
|
||||
log.Infof("loaded bpf: %s", b)
|
||||
|
||||
// auto clean
|
||||
runtime.SetFinalizer(b, (*defaultBPF).Close)
|
||||
return b, nil
|
||||
}
|
||||
|
||||
// Name returns the name of the bpf.
|
||||
func (b *defaultBPF) Name() string {
|
||||
return b.name
|
||||
}
|
||||
|
||||
// MapIDByName gets mapID by Name.
|
||||
func (b *defaultBPF) MapIDByName(name string) uint32 {
|
||||
return b.mapName2IDs[name]
|
||||
}
|
||||
|
||||
// ProgIDByName gets progID by Name.
|
||||
func (b *defaultBPF) ProgIDByName(name string) uint32 {
|
||||
return b.programName2IDs[name]
|
||||
}
|
||||
|
||||
// String returns the bpf string.
|
||||
func (b *defaultBPF) String() string {
|
||||
return fmt.Sprintf("%s#%d#%d", b.name, len(b.mapSpecs), len(b.programSpecs))
|
||||
}
|
||||
|
||||
// Info gets defaultBPF information.
|
||||
func (b *defaultBPF) Info() (*Info, error) {
|
||||
info := &Info{
|
||||
MapsInfo: make([]MapInfo, 0, len(b.mapSpecs)),
|
||||
ProgramsInfo: make([]ProgramInfo, 0, len(b.programSpecs)),
|
||||
}
|
||||
|
||||
// maps
|
||||
for id, m := range b.mapSpecs {
|
||||
info.MapsInfo = append(info.MapsInfo, MapInfo{
|
||||
ID: id,
|
||||
Name: m.name,
|
||||
})
|
||||
}
|
||||
|
||||
// programs
|
||||
for id, p := range b.programSpecs {
|
||||
info.ProgramsInfo = append(info.ProgramsInfo, ProgramInfo{
|
||||
ID: id,
|
||||
Name: p.name,
|
||||
SectionName: p.sectionName,
|
||||
})
|
||||
}
|
||||
|
||||
return info, nil
|
||||
}
|
||||
|
||||
// Close the bpf.
|
||||
func (b *defaultBPF) Close() error {
|
||||
for _, m := range b.mapSpecs {
|
||||
m.bMap.Close()
|
||||
}
|
||||
|
||||
for _, p := range b.programSpecs {
|
||||
for _, l := range p.links {
|
||||
l.Close()
|
||||
}
|
||||
p.bProg.Close()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// AttachWithOptions attaches programs with options.
|
||||
func (b *defaultBPF) AttachWithOptions(opts []AttachOption) error {
|
||||
var err error
|
||||
|
||||
defer func() {
|
||||
if err != nil { // detach all programs when error.
|
||||
_ = b.Detach()
|
||||
}
|
||||
}()
|
||||
|
||||
for _, opt := range opts {
|
||||
progID := b.ProgIDByName(opt.ProgramName)
|
||||
spec := b.programSpecs[progID]
|
||||
switch spec.specType {
|
||||
case ebpf.TracePoint:
|
||||
// opt.Symbol: <system>/<symbol>
|
||||
symbols := strings.SplitN(opt.Symbol, "/", 2)
|
||||
if len(symbols) != 2 {
|
||||
return fmt.Errorf("bpf %s: invalid symbol: %s", b, opt.Symbol)
|
||||
}
|
||||
|
||||
if err = b.attachTracepoint(progID, symbols[0], symbols[1]); err != nil {
|
||||
return fmt.Errorf("attach tracepoint with options %v: %w", opt, err)
|
||||
}
|
||||
case ebpf.Kprobe:
|
||||
// opt.Symbol: <symbol>[+<offset>]
|
||||
// opt.Symbol: <symbol>
|
||||
if err = b.attachKprobe(progID, opt.Symbol, spec.sectionPrefix == "kretprobe"); err != nil {
|
||||
return fmt.Errorf("attach kprobe with options %v: %w", opt, err)
|
||||
}
|
||||
case ebpf.RawTracepoint:
|
||||
// opt.Symbol: <symbol>
|
||||
if err = b.attachRawTracepoint(progID, opt.Symbol); err != nil {
|
||||
return fmt.Errorf("attach raw tracepoint with options %v: %w", opt, err)
|
||||
}
|
||||
case ebpf.PerfEvent:
|
||||
// SamplePeriod/SamplePeriod
|
||||
if err = b.attachPerfEvent(progID, opt.PerfEvent.SamplePeriod, opt.PerfEvent.SampleFreq); err != nil {
|
||||
return fmt.Errorf("attach perf event with options %v: %w", opt, err)
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("bpf %s: unsupported program type: %s", b, spec.specType)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Attach the default programs.
|
||||
func (b *defaultBPF) Attach() error {
|
||||
var err error
|
||||
|
||||
defer func() {
|
||||
if err != nil { // detach all programs when error.
|
||||
_ = b.Detach()
|
||||
}
|
||||
}()
|
||||
|
||||
for progID, spec := range b.programSpecs {
|
||||
switch spec.specType {
|
||||
case ebpf.TracePoint:
|
||||
// section: tracepoint/<system>/<symbol>
|
||||
symbols := strings.SplitN(spec.sectionName, "/", 3)
|
||||
if len(symbols) != 3 {
|
||||
return fmt.Errorf("bpf %s: invalid section name: %s", b, spec.sectionName)
|
||||
}
|
||||
|
||||
if err = b.attachTracepoint(progID, symbols[1], symbols[2]); err != nil {
|
||||
return fmt.Errorf("attach tracepoint: %w", err)
|
||||
}
|
||||
case ebpf.Kprobe:
|
||||
// section: kprobe/<symbol>[+<offset>]
|
||||
// section: kretprobe/<symbol>
|
||||
symbols := strings.SplitN(spec.sectionName, "/", 2)
|
||||
if len(symbols) != 2 {
|
||||
return fmt.Errorf("bpf %s: invalid section name: %s", b, spec.sectionName)
|
||||
}
|
||||
|
||||
if err = b.attachKprobe(progID, symbols[1], symbols[0] == "kretprobe"); err != nil {
|
||||
return fmt.Errorf("attach kprobe: %w", err)
|
||||
}
|
||||
case ebpf.RawTracepoint:
|
||||
// section: raw_tracepoint/<symbol>
|
||||
symbols := strings.SplitN(spec.sectionName, "/", 2)
|
||||
if len(symbols) != 2 {
|
||||
return fmt.Errorf("bpf %s: invalid section name: %s", b, spec.sectionName)
|
||||
}
|
||||
|
||||
if err = b.attachRawTracepoint(progID, symbols[1]); err != nil {
|
||||
return fmt.Errorf("attach raw tracepoint: %w", err)
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("bpf %s: unsupported program type: %s", b, spec.specType)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *defaultBPF) attachKprobe(progID uint32, symbol string, isRetprobe bool) error {
|
||||
spec := b.programSpecs[progID]
|
||||
|
||||
if !isRetprobe { // kprobe
|
||||
// : <symbol>[+<offset>]
|
||||
// : <symbol>
|
||||
var (
|
||||
err error
|
||||
offset uint64
|
||||
)
|
||||
|
||||
symOffsets := strings.Split(symbol, "+")
|
||||
if len(symOffsets) > 2 {
|
||||
return fmt.Errorf("bpf %s: invalid symbol: %s", b, symbol)
|
||||
} else if len(symOffsets) == 2 {
|
||||
offset, err = strconv.ParseUint(symOffsets[1], 10, 64)
|
||||
if err != nil {
|
||||
return fmt.Errorf("bpf %s: invalid symbol: %s", b, symbol)
|
||||
}
|
||||
}
|
||||
|
||||
linkKey := fmt.Sprintf("%s+%d", symOffsets[0], offset)
|
||||
if _, ok := spec.links[linkKey]; ok {
|
||||
return fmt.Errorf("bpf %s: duplicate symbol: %s", b, symbol)
|
||||
}
|
||||
|
||||
opts := link.KprobeOptions{
|
||||
Offset: offset,
|
||||
}
|
||||
l, err := link.Kprobe(symOffsets[0], spec.bProg, &opts)
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't attach kprobe %s in %v: %w", symbol, spec.bProg, err)
|
||||
}
|
||||
|
||||
spec.links[linkKey] = l
|
||||
log.Infof("attach kprobe %s in %v, links: %v", symbol, spec.bProg, spec.links)
|
||||
} else { // kretprobe
|
||||
linkKey := symbol
|
||||
if _, ok := spec.links[linkKey]; ok {
|
||||
return fmt.Errorf("bpf %s: duplicate symbol: %s", b, symbol)
|
||||
}
|
||||
|
||||
l, err := link.Kretprobe(symbol, spec.bProg, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't attach kretprobe %s in %v: %w", symbol, spec.bProg, err)
|
||||
}
|
||||
|
||||
spec.links[linkKey] = l
|
||||
log.Infof("attach kretprobe %s in %v, links: %v", symbol, spec.bProg, spec.links)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *defaultBPF) attachTracepoint(progID uint32, system, symbol string) error {
|
||||
spec := b.programSpecs[progID]
|
||||
|
||||
linkKey := fmt.Sprintf("%s/%s", system, symbol)
|
||||
if _, ok := spec.links[linkKey]; ok {
|
||||
return fmt.Errorf("bpf %s: duplicate symbol: %s", b, symbol)
|
||||
}
|
||||
|
||||
l, err := link.Tracepoint(system, symbol, spec.bProg, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't attach tracepoint %s/%s in %v: %w", system, symbol, spec.bProg, err)
|
||||
}
|
||||
|
||||
spec.links[linkKey] = l
|
||||
log.Infof("attach tracepoint %s/%s in %v, links: %v", system, symbol, spec.bProg, spec.links)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *defaultBPF) attachRawTracepoint(progID uint32, symbol string) error {
|
||||
spec := b.programSpecs[progID]
|
||||
|
||||
linkKey := symbol
|
||||
if _, ok := spec.links[linkKey]; ok {
|
||||
return fmt.Errorf("bpf %s: duplicate symbol: %s", b, symbol)
|
||||
}
|
||||
|
||||
l, err := link.AttachRawTracepoint(link.RawTracepointOptions{
|
||||
Name: symbol,
|
||||
Program: spec.bProg,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't attach raw tracepoint %s in %v: %w", symbol, spec.bProg, err)
|
||||
}
|
||||
|
||||
spec.links[linkKey] = l
|
||||
log.Infof("attach raw tracepoint %s in %v, links: %v", symbol, spec.bProg, spec.links)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *defaultBPF) attachPerfEvent(progID uint32, samplePeriod, sampleFrequency uint64) error {
|
||||
// TODO implement
|
||||
return fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
// Detach all programs.
|
||||
func (b *defaultBPF) Detach() error {
|
||||
for _, spec := range b.programSpecs {
|
||||
for _, l := range spec.links {
|
||||
err := l.Close()
|
||||
log.Infof("detach %s in %v: %v", spec.sectionName, spec.bProg, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Loaded checks bpf is still loaded.
|
||||
func (b *defaultBPF) Loaded() (bool, error) {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// EventPipe gets event-pipe and returns a PerfEventReader.
|
||||
func (b *defaultBPF) EventPipe(ctx context.Context, mapID, perCPUBuffer uint32) (PerfEventReader, error) {
|
||||
reader, err := newPerfEventReader(ctx, b.mapSpecs[mapID].bMap, int(perCPUBuffer))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Infof("event-pipe %d, perCPUBuffer %d", mapID, perCPUBuffer)
|
||||
return reader, nil
|
||||
}
|
||||
|
||||
// EventPipeByName gets event-pipe by the mapName and returns a PerfEventReader.
|
||||
func (b *defaultBPF) EventPipeByName(ctx context.Context, mapName string, perCPUBuffer uint32) (PerfEventReader, error) {
|
||||
return b.EventPipe(ctx, b.MapIDByName(mapName), perCPUBuffer)
|
||||
}
|
||||
|
||||
// AttachAndEventPipe attaches and event-pipe and returns a PerfEventReader.
|
||||
func (b *defaultBPF) AttachAndEventPipe(ctx context.Context, mapName string, perCPUBuffer uint32) (PerfEventReader, error) {
|
||||
reader, err := b.EventPipeByName(ctx, mapName, perCPUBuffer)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := b.Attach(); err != nil {
|
||||
reader.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Infof("attach and event-pipe %s, perCPUBuffer %d", mapName, perCPUBuffer)
|
||||
return reader, nil
|
||||
}
|
||||
|
||||
// ReadMap read the value content corresponding to a key from a map
|
||||
//
|
||||
// NOTICE: The content of the key needs to be converted to byte type, and the
|
||||
// obtained value is of byte type, which also needs to be converted to the
|
||||
// corresponding type.
|
||||
func (b *defaultBPF) ReadMap(mapID uint32, key []byte) ([]byte, error) {
|
||||
val, err := b.mapSpecs[mapID].bMap.LookupBytes(key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Debugf("read map %d, key %v, value %v", mapID, key, val)
|
||||
return val, nil
|
||||
}
|
||||
|
||||
// WriteMapItems write the value content corresponding to a key to a map.
|
||||
func (b *defaultBPF) WriteMapItems(mapID uint32, items []MapItem) error {
|
||||
m := b.mapSpecs[mapID].bMap
|
||||
|
||||
for _, item := range items {
|
||||
if err := m.Update(item.Key, item.Value, ebpf.UpdateAny); err != nil {
|
||||
return fmt.Errorf("map %d, key %v: update: %w", mapID, item.Key, err)
|
||||
}
|
||||
log.Infof("write map %d, key %v, value %v", mapID, item.Key, item.Value)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeleteMapItems deletes multiple items from a BPF map by keys.
|
||||
func (b *defaultBPF) DeleteMapItems(mapID uint32, keys [][]byte) error {
|
||||
m := b.mapSpecs[mapID].bMap
|
||||
|
||||
for _, k := range keys {
|
||||
if err := m.Delete(k); err != nil {
|
||||
return fmt.Errorf("map %d, key %v: delete: %w", mapID, k, err)
|
||||
}
|
||||
log.Infof("delete map %d, key %v", mapID, k)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DumpMap dump all the context of the map
|
||||
func (b *defaultBPF) DumpMap(mapID uint32) ([]MapItem, error) {
|
||||
m := b.mapSpecs[mapID].bMap
|
||||
|
||||
var prevKey any
|
||||
items := []MapItem{}
|
||||
for i := 0; i < int(m.MaxEntries()); i++ {
|
||||
nextKey, err := m.NextKeyBytes(prevKey)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("map %d, prevKey %v: next key: %w", mapID, prevKey, err)
|
||||
}
|
||||
|
||||
// last key
|
||||
if len(nextKey) == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
value, err := m.LookupBytes(nextKey)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("map %d, key %v: value: %w", mapID, nextKey, err)
|
||||
}
|
||||
|
||||
if value == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
prevKey = nextKey
|
||||
items = append(items, MapItem{
|
||||
Key: nextKey,
|
||||
Value: value,
|
||||
})
|
||||
}
|
||||
|
||||
log.Debugf("dump map %d, items %v", mapID, items)
|
||||
return items, nil
|
||||
}
|
||||
|
||||
// DumpMapByName dump all the context of the map.
|
||||
func (b *defaultBPF) DumpMapByName(mapName string) ([]MapItem, error) {
|
||||
return b.DumpMap(b.MapIDByName(mapName))
|
||||
}
|
||||
|
||||
// WaitDetachByBreaker check the bpf's status.
|
||||
func (b *defaultBPF) WaitDetachByBreaker(ctx context.Context, cancel context.CancelFunc) {
|
||||
// TODO: implement
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package bpf
|
||||
|
||||
// PerfEventReader reads the eBPF perf_event.
|
||||
type PerfEventReader interface {
|
||||
// ReadInto reads the eBPF perf_event into pdata.
|
||||
ReadInto(pdata any) error
|
||||
|
||||
// Close the PerfEventReader.
|
||||
Close() error
|
||||
}
|
|
@ -0,0 +1,96 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build !didi
|
||||
|
||||
package bpf
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/pkg/types"
|
||||
|
||||
"github.com/cilium/ebpf"
|
||||
"github.com/cilium/ebpf/perf"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// perfEventReader reads the eBPF perf_event_array.
|
||||
type perfEventReader struct {
|
||||
ctx context.Context
|
||||
rd *perf.Reader
|
||||
cancelCtx context.CancelFunc
|
||||
}
|
||||
|
||||
// _ is a type assertion
|
||||
var _ PerfEventReader = (*perfEventReader)(nil)
|
||||
|
||||
// newPerfEventReader creates a new perfEventReader.
|
||||
func newPerfEventReader(ctx context.Context, array *ebpf.Map, perCPUBuffer int) (PerfEventReader, error) {
|
||||
rd, err := perf.NewReader(array, perCPUBuffer)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("can't create the perf event reader: %w", err)
|
||||
}
|
||||
|
||||
readerCtx, cancel := context.WithCancel(ctx)
|
||||
return &perfEventReader{ctx: readerCtx, rd: rd, cancelCtx: cancel}, nil
|
||||
}
|
||||
|
||||
// Close the perfEventReader.
|
||||
func (r *perfEventReader) Close() error {
|
||||
r.cancelCtx()
|
||||
r.rd.Close()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ReadInto reads the eBPF perf_event into pdata.
|
||||
func (r *perfEventReader) ReadInto(pdata any) error {
|
||||
for {
|
||||
select {
|
||||
case <-r.ctx.Done():
|
||||
return types.ErrExitByCancelCtx
|
||||
default:
|
||||
// set the poll deadline 100ms
|
||||
r.rd.SetDeadline(time.Now().Add(100 * time.Millisecond))
|
||||
|
||||
// read the event
|
||||
record, err := r.rd.Read()
|
||||
if err != nil {
|
||||
if errors.Is(err, perf.ErrClosed) { // Close
|
||||
return fmt.Errorf("perfEventReader is closed: %w", types.ErrExitByCancelCtx)
|
||||
} else if errors.Is(err, os.ErrDeadlineExceeded) { // poll deadline
|
||||
continue
|
||||
}
|
||||
return fmt.Errorf("failed to read the event: %w", err)
|
||||
}
|
||||
|
||||
if record.LostSamples != 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
// parse the event
|
||||
if err := binary.Read(bytes.NewBuffer(record.RawSample), binary.NativeEndian, pdata); err != nil {
|
||||
return fmt.Errorf("failed to parse the event: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,340 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package conf
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"reflect"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"huatuo-bamai/internal/log"
|
||||
|
||||
"github.com/pelletier/go-toml"
|
||||
)
|
||||
|
||||
// CommonConf global common configuration
|
||||
type CommonConf struct {
|
||||
LogLevel string `default:"Info"`
|
||||
LogFile string
|
||||
|
||||
// APIServer addr
|
||||
APIServer struct {
|
||||
TCPAddr string `default:":19704"`
|
||||
}
|
||||
|
||||
// HuaTuo config
|
||||
HuaTuoConf struct {
|
||||
UserName string
|
||||
PassWord string
|
||||
UnixAddr string
|
||||
ServerIP string
|
||||
APIVersion string
|
||||
ReqTimeout int
|
||||
OnlyOneSession bool `default:"true"`
|
||||
KeepaliveEnable bool `default:"true"`
|
||||
KeepaliveTimeout int
|
||||
}
|
||||
|
||||
// RuntimeCgroup for huatuo-bamai resource
|
||||
RuntimeCgroup struct {
|
||||
// limit cpu num 0.5 2.0
|
||||
// limit memory (MB)
|
||||
LimitInitCPU float64 `default:"0.5"`
|
||||
LimitCPU float64 `default:"2.0"`
|
||||
LimitMem int64 `default:"2048"`
|
||||
}
|
||||
|
||||
// Storage for huatuo-bamai tracer storage
|
||||
Storage struct {
|
||||
// ES configurations
|
||||
ES struct {
|
||||
Address, Username, Password, Index string
|
||||
}
|
||||
|
||||
// LocalFile record file configuration
|
||||
LocalFile struct {
|
||||
Path string `default:"record"`
|
||||
RotationSize int `default:"100"`
|
||||
MaxRotation int `default:"10"`
|
||||
}
|
||||
}
|
||||
|
||||
TaskConfig struct {
|
||||
MaxRunningTask int `default:"10"`
|
||||
}
|
||||
|
||||
Tracing struct {
|
||||
// backlist
|
||||
BlackList []string
|
||||
|
||||
// Cpuidle for cpuidle configuration
|
||||
Cpuidle struct {
|
||||
CgUserth uint64
|
||||
CgDeltaUserth int64
|
||||
CgSysth uint64
|
||||
CgDeltaSysth int64
|
||||
CgUsageth uint64
|
||||
CgDeltaUsageth int64
|
||||
CgStep int64
|
||||
CgGrace int64
|
||||
CgUsageToolduration int64
|
||||
}
|
||||
|
||||
// Cpusys for cpusys configuration
|
||||
Cpusys struct {
|
||||
CPUSysth uint64
|
||||
CPUSysDelta int64
|
||||
CPUSysStep int64
|
||||
CPUSysToolduration int64
|
||||
}
|
||||
|
||||
// Waitrate for waitrate.go
|
||||
Waitrate struct {
|
||||
SpikeThreshold map[string]float64
|
||||
SlopeThreshold map[string]float64
|
||||
SampleConfig map[string]int
|
||||
}
|
||||
|
||||
// Softirq for softirq thresh configuration
|
||||
Softirq struct {
|
||||
ThresholdTime uint64
|
||||
}
|
||||
|
||||
// Dload for dload thresh configuration
|
||||
Dload struct {
|
||||
ThresholdLoad float64
|
||||
MonitorGap int
|
||||
}
|
||||
|
||||
// IOTracing for iotracer thresh configuration
|
||||
IOTracing struct {
|
||||
IOScheduleThreshold uint64
|
||||
ReadThreshold uint64
|
||||
WriteThreshold uint64
|
||||
IOutilThreshold uint64
|
||||
IOwaitThreshold uint64
|
||||
PeriodSecond uint64
|
||||
MaxStackNumber int
|
||||
TopProcessCount int
|
||||
TopFilesPerProcess int
|
||||
}
|
||||
|
||||
// MemoryReclaim for MemoryReclaim configuration
|
||||
MemoryReclaim struct {
|
||||
Deltath uint64
|
||||
}
|
||||
|
||||
// MemoryBurst configuration
|
||||
MemoryBurst struct {
|
||||
HistoryWindowLength int
|
||||
SampleInterval int
|
||||
SilencePeriod int
|
||||
TopNProcesses int
|
||||
BurstRatio float64
|
||||
AnonThreshold int
|
||||
}
|
||||
|
||||
// NetRecvLat configuration
|
||||
NetRecvLat struct {
|
||||
ToNetIf uint64
|
||||
ToTCPV4 uint64
|
||||
ToUserCopy uint64
|
||||
IgnoreHost bool
|
||||
IgnoreContainerLevel []int
|
||||
}
|
||||
|
||||
// Dropwatch configuration
|
||||
Dropwatch struct {
|
||||
IgnoreNeighInvalidate bool
|
||||
}
|
||||
|
||||
// Netdev configuration
|
||||
Netdev struct {
|
||||
Whitelist []string
|
||||
}
|
||||
Fastfork struct {
|
||||
RedisInfoCollectionInterval uint32 `default:"3600"`
|
||||
EnableForkProbe uint32 `default:"1"`
|
||||
EnablePtsepProbe uint32 `default:"1"`
|
||||
EnableWaitptsepProbe uint32 `default:"1"`
|
||||
}
|
||||
}
|
||||
|
||||
MetricCollector struct {
|
||||
// backlist
|
||||
BlackList []string
|
||||
|
||||
Netdev struct {
|
||||
// Use `netlink` instead of `procfs net/dev` to get netdev statistic.
|
||||
// Only support the host environment to use `netlink` now!
|
||||
EnableNetlink bool
|
||||
// IgnoredDevices: Ignore special devices in this netdev statistic.
|
||||
// AcceptDevices: Accept special devices in this netdev statistic.
|
||||
// These configurations use `Regexp`.
|
||||
// 'IgnoredDevices' has higher priority than 'AcceptDevices'.
|
||||
IgnoredDevices, AcceptDevices string
|
||||
}
|
||||
Qdisc struct {
|
||||
// IgnoredDevices: Ignore special devices in this qdisc statistic.
|
||||
// AcceptDevices: Accept special devices in this qdisc statistic.
|
||||
// These configurations use `Regexp`.
|
||||
// 'IgnoredDevices' has higher priority than 'AcceptDevices'.
|
||||
IgnoredDevices, AcceptDevices string
|
||||
}
|
||||
Vmstat struct {
|
||||
IncludedMetrics, ExcludedMetrics string
|
||||
}
|
||||
MemoryStat struct {
|
||||
IncludedMetrics, ExcludedMetrics string
|
||||
}
|
||||
MemoryEvents struct {
|
||||
IncludedMetrics, ExcludedMetrics string
|
||||
}
|
||||
Netstat struct {
|
||||
// ExcludedMetrics: Ignore keys in this netstat statistic.
|
||||
// IncludedMetrics: Accept keys in this netstat statistic.
|
||||
// The 'key' format: protocol + '_' + netstat_name. eg: TcpExt_TCPSynRetrans.
|
||||
// These configurations use `Regexp`.
|
||||
// 'ExcludedMetrics' has higher priority than 'IncludedMetrics'.
|
||||
ExcludedMetrics, IncludedMetrics string
|
||||
}
|
||||
MountPointStat struct {
|
||||
IncludedMountPoints string
|
||||
}
|
||||
}
|
||||
|
||||
// WarningFilter for filt the known issues
|
||||
WarningFilter struct {
|
||||
PatternList [][]string
|
||||
}
|
||||
|
||||
// Pod configuration
|
||||
Pod struct {
|
||||
KubeletPodListURL string `default:"http://127.0.0.1:10255/pods"`
|
||||
DockerAPIVersion string `default:"1.24"`
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
lock = sync.Mutex{}
|
||||
configFile = ""
|
||||
config = &CommonConf{}
|
||||
|
||||
// Region is host and containers belong to.
|
||||
Region string
|
||||
)
|
||||
|
||||
// LoadConfig load conf file
|
||||
func LoadConfig(path string) error {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
// defaults.SetDefaults(config)
|
||||
d := toml.NewDecoder(f)
|
||||
if err := d.Strict(true).Decode(config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// MB
|
||||
config.RuntimeCgroup.LimitMem *= 1024 * 1024
|
||||
configFile = path
|
||||
|
||||
log.Infof("Loadconfig:\n%+v\n", config)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get return the global configuration obj
|
||||
func Get() *CommonConf {
|
||||
return config
|
||||
}
|
||||
|
||||
// Set is a function that modifies the configuration obj
|
||||
//
|
||||
// @key: supported keys
|
||||
// - "Key1"
|
||||
// - "Key1.Key2"
|
||||
func Set(key string, val any) {
|
||||
lock.Lock()
|
||||
defer lock.Unlock()
|
||||
|
||||
// find key
|
||||
c := reflect.ValueOf(config)
|
||||
for _, k := range strings.Split(key, ".") {
|
||||
elem := c.Elem().FieldByName(k)
|
||||
if !elem.IsValid() || !elem.CanAddr() {
|
||||
panic(fmt.Errorf("invalid elem %s: %v", key, elem))
|
||||
}
|
||||
c = elem.Addr()
|
||||
}
|
||||
|
||||
// assign
|
||||
rc := reflect.Indirect(c)
|
||||
rval := reflect.ValueOf(val)
|
||||
if rc.Kind() != rval.Kind() {
|
||||
panic(fmt.Errorf("%s type %s is not assignable to type %s", key, rc.Kind(), rval.Kind()))
|
||||
}
|
||||
|
||||
rc.Set(rval)
|
||||
log.Infof("Config: set %s = %v", key, val)
|
||||
}
|
||||
|
||||
// Sync write config data to file
|
||||
func Sync() error {
|
||||
f, err := os.Create(configFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
encoder := toml.NewEncoder(f)
|
||||
return encoder.Encode(config)
|
||||
}
|
||||
|
||||
// KnownIssueSearch search the known issue pattern in
|
||||
// the stack and return pattern name if found.
|
||||
func KnownIssueSearch(srcPattern, srcMatching1, srcMatching2 string) (issueName string, inKnownList uint64) {
|
||||
for _, p := range config.WarningFilter.PatternList {
|
||||
if len(p) < 2 {
|
||||
log.Infof("Invalid configuration, please check the config file!")
|
||||
return "", 0
|
||||
}
|
||||
|
||||
rePattern := regexp.MustCompile(p[1])
|
||||
if rePattern.MatchString(srcPattern) {
|
||||
if srcMatching1 != "" && len(p) >= 3 && p[2] != "" {
|
||||
re1 := regexp.MustCompile(p[2])
|
||||
if re1.MatchString(srcMatching1) {
|
||||
return p[0], 1
|
||||
}
|
||||
}
|
||||
|
||||
if srcMatching2 != "" && len(p) >= 4 && p[3] != "" {
|
||||
re2 := regexp.MustCompile(p[3])
|
||||
if re2.MatchString(srcMatching2) {
|
||||
return p[0], 1
|
||||
}
|
||||
}
|
||||
|
||||
return p[0], 0
|
||||
}
|
||||
}
|
||||
return "", 0
|
||||
}
|
|
@ -0,0 +1,235 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package flamegraph
|
||||
|
||||
import (
|
||||
"github.com/grafana/grafana-plugin-sdk-go/data"
|
||||
)
|
||||
|
||||
// Level is a depth array of flame graph data
|
||||
type Level struct {
|
||||
Values []int64
|
||||
}
|
||||
|
||||
// Flamebearer is pyroscope flame graph data
|
||||
type Flamebearer struct {
|
||||
Names []string
|
||||
Levels []*Level
|
||||
Total int64
|
||||
MaxSelf int64
|
||||
}
|
||||
|
||||
// StartOffest is offset of the bar relative to previous sibling
|
||||
const StartOffest = 0
|
||||
|
||||
// ValueOffest is value or width of the bar
|
||||
const ValueOffest = 1
|
||||
|
||||
// SelfOffest is self value of the bar
|
||||
const SelfOffest = 2
|
||||
|
||||
// NameOffest is index into the names array
|
||||
const NameOffest = 3
|
||||
|
||||
// ItemOffest Next bar. Each bar of the profile is represented by 4 number in a flat array.
|
||||
const ItemOffest = 4
|
||||
|
||||
// ProfileTree grafana tree struct
|
||||
type ProfileTree struct {
|
||||
Start int64
|
||||
Value int64
|
||||
Self int64
|
||||
Level int
|
||||
Name string
|
||||
Nodes []*ProfileTree
|
||||
}
|
||||
|
||||
// LevelsToTree converts flamebearer format into a tree. This is needed to then convert it into nested set format
|
||||
func LevelsToTree(levels []*Level, names []string) *ProfileTree {
|
||||
if len(levels) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
tree := &ProfileTree{
|
||||
Start: 0,
|
||||
Value: levels[0].Values[ValueOffest],
|
||||
Self: levels[0].Values[SelfOffest],
|
||||
Level: 0,
|
||||
Name: names[levels[0].Values[0]],
|
||||
}
|
||||
|
||||
parentsStack := []*ProfileTree{tree}
|
||||
currentLevel := 1
|
||||
|
||||
// Cycle through each level
|
||||
for {
|
||||
if currentLevel >= len(levels) {
|
||||
break
|
||||
}
|
||||
|
||||
// If we still have levels to go, this should not happen. Something is probably wrong with the flamebearer data.
|
||||
if len(parentsStack) == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
var nextParentsStack []*ProfileTree
|
||||
currentParent := parentsStack[:1][0]
|
||||
parentsStack = parentsStack[1:]
|
||||
itemIndex := 0
|
||||
// cumulative offset as items in flamebearer format have just relative to prev item
|
||||
offset := int64(0)
|
||||
|
||||
// Cycle through bar in a level
|
||||
for {
|
||||
if itemIndex >= len(levels[currentLevel].Values) {
|
||||
break
|
||||
}
|
||||
|
||||
itemStart := levels[currentLevel].Values[itemIndex+StartOffest] + offset
|
||||
itemValue := levels[currentLevel].Values[itemIndex+ValueOffest]
|
||||
selfValue := levels[currentLevel].Values[itemIndex+SelfOffest]
|
||||
itemEnd := itemStart + itemValue
|
||||
parentEnd := currentParent.Start + currentParent.Value
|
||||
|
||||
if itemStart >= currentParent.Start && itemEnd <= parentEnd {
|
||||
// We have an item that is in the bounds of current parent item, so it should be its child
|
||||
treeItem := &ProfileTree{
|
||||
Start: itemStart,
|
||||
Value: itemValue,
|
||||
Self: selfValue,
|
||||
Level: currentLevel,
|
||||
Name: names[levels[currentLevel].Values[itemIndex+NameOffest]],
|
||||
}
|
||||
// Add to parent
|
||||
currentParent.Nodes = append(currentParent.Nodes, treeItem)
|
||||
// Add this item as parent for the next level
|
||||
nextParentsStack = append(nextParentsStack, treeItem)
|
||||
itemIndex += ItemOffest
|
||||
|
||||
// Update offset for next item. This is changing relative offset to absolute one.
|
||||
offset = itemEnd
|
||||
} else {
|
||||
// We went out of parents bounds so lets move to next parent. We will evaluate the same item again, but
|
||||
// we will check if it is a child of the next parent item in line.
|
||||
if len(parentsStack) == 0 {
|
||||
break
|
||||
}
|
||||
currentParent = parentsStack[:1][0]
|
||||
parentsStack = parentsStack[1:]
|
||||
continue
|
||||
}
|
||||
}
|
||||
parentsStack = nextParentsStack
|
||||
currentLevel++
|
||||
}
|
||||
|
||||
return tree
|
||||
}
|
||||
|
||||
// TreeToNestedSetDataFrame walks the tree depth first and adds items into the dataframe. This is a nested set format
|
||||
func TreeToNestedSetDataFrame(tree *ProfileTree, unit string) (*data.Frame, *EnumField) {
|
||||
frame := data.NewFrame("response")
|
||||
frame.Meta = &data.FrameMeta{PreferredVisualization: "flamegraph"}
|
||||
|
||||
levelField := data.NewField("level", nil, []int64{})
|
||||
valueField := data.NewField("value", nil, []int64{})
|
||||
selfField := data.NewField("self", nil, []int64{})
|
||||
|
||||
// profileTypeID should encode the type of the profile with unit being the 3rd part
|
||||
valueField.Config = &data.FieldConfig{Unit: unit}
|
||||
selfField.Config = &data.FieldConfig{Unit: unit}
|
||||
frame.Fields = data.Fields{levelField, valueField, selfField}
|
||||
|
||||
labelField := NewEnumField("label", nil)
|
||||
|
||||
// Tree can be nil if profile was empty, we can still send empty frame in that case
|
||||
if tree != nil {
|
||||
walkTree(tree, func(tree *ProfileTree) {
|
||||
levelField.Append(int64(tree.Level))
|
||||
valueField.Append(tree.Value)
|
||||
selfField.Append(tree.Self)
|
||||
labelField.Append(tree.Name)
|
||||
})
|
||||
}
|
||||
frame.Fields = append(frame.Fields, labelField.GetField())
|
||||
return frame, labelField
|
||||
}
|
||||
|
||||
// EnumField label struct
|
||||
type EnumField struct {
|
||||
field *data.Field
|
||||
valuesMap map[string]data.EnumItemIndex
|
||||
counter data.EnumItemIndex
|
||||
}
|
||||
|
||||
// NewEnumField add a new label field
|
||||
func NewEnumField(name string, labels data.Labels) *EnumField {
|
||||
return &EnumField{
|
||||
field: data.NewField(name, labels, []data.EnumItemIndex{}),
|
||||
valuesMap: make(map[string]data.EnumItemIndex),
|
||||
}
|
||||
}
|
||||
|
||||
// GetValuesMap get label.valuesMap
|
||||
func (e *EnumField) GetValuesMap() map[string]data.EnumItemIndex {
|
||||
return e.valuesMap
|
||||
}
|
||||
|
||||
// Append data
|
||||
func (e *EnumField) Append(value string) {
|
||||
if valueIndex, ok := e.valuesMap[value]; ok {
|
||||
e.field.Append(valueIndex)
|
||||
} else {
|
||||
e.valuesMap[value] = e.counter
|
||||
e.field.Append(e.counter)
|
||||
e.counter++
|
||||
}
|
||||
}
|
||||
|
||||
// GetField get fields
|
||||
func (e *EnumField) GetField() *data.Field {
|
||||
s := make([]string, len(e.valuesMap))
|
||||
for k, v := range e.valuesMap {
|
||||
s[v] = k
|
||||
}
|
||||
|
||||
e.field.SetConfig(&data.FieldConfig{
|
||||
TypeConfig: &data.FieldTypeConfig{
|
||||
Enum: &data.EnumFieldConfig{
|
||||
Text: s,
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
return e.field
|
||||
}
|
||||
|
||||
func walkTree(tree *ProfileTree, fn func(tree *ProfileTree)) {
|
||||
fn(tree)
|
||||
stack := tree.Nodes
|
||||
|
||||
for {
|
||||
if len(stack) == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
fn(stack[0])
|
||||
if stack[0].Nodes != nil {
|
||||
stack = append(stack[0].Nodes, stack[1:]...)
|
||||
} else {
|
||||
stack = stack[1:]
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package flamegraph
|
||||
|
||||
// FrameData Flamegraph json data
|
||||
type FrameData struct {
|
||||
Level int64 `json:"level"`
|
||||
Value int64 `json:"value"`
|
||||
Self int64 `json:"self"`
|
||||
Label string `json:"label"`
|
||||
}
|
|
@ -0,0 +1,184 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package log
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
var logger *logrus.Logger
|
||||
|
||||
const rfc3339NanoFixed = "2006-01-02T15:04:05.000000000Z07:00"
|
||||
|
||||
func init() {
|
||||
logger = logrus.New()
|
||||
|
||||
logger.SetFormatter(&logrus.TextFormatter{
|
||||
DisableColors: true,
|
||||
ForceQuote: true,
|
||||
FullTimestamp: true,
|
||||
TimestampFormat: rfc3339NanoFixed,
|
||||
DisableSorting: true,
|
||||
})
|
||||
|
||||
logger.SetOutput(os.Stdout)
|
||||
logger.SetLevel(logrus.InfoLevel)
|
||||
logger.SetReportCaller(false)
|
||||
}
|
||||
|
||||
func newLogrusEntry(callerSkip int) *logrus.Entry {
|
||||
var function string
|
||||
|
||||
pc, file, line, ok := runtime.Caller(callerSkip)
|
||||
if !ok {
|
||||
file = "<???>"
|
||||
function = "<???>"
|
||||
line = 1
|
||||
} else {
|
||||
file = filepath.Base(file)
|
||||
function = runtime.FuncForPC(pc).Name()
|
||||
}
|
||||
|
||||
return logger.WithFields(logrus.Fields{
|
||||
logrus.FieldKeyFunc: function,
|
||||
logrus.FieldKeyFile: fmt.Sprintf("%s:%d", file, line),
|
||||
})
|
||||
}
|
||||
|
||||
// SetLevel aims to set the log level
|
||||
func SetLevel(lvl string) {
|
||||
level, err := logrus.ParseLevel(lvl)
|
||||
if err != nil {
|
||||
Errorf("invalid lvl: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
logger.SetLevel(level)
|
||||
}
|
||||
|
||||
// GetLevel returns the standard logger level.
|
||||
func GetLevel() logrus.Level {
|
||||
return logger.GetLevel()
|
||||
}
|
||||
|
||||
// SetOutput sets the standard logger output.
|
||||
func SetOutput(out io.Writer) {
|
||||
logger.SetOutput(out)
|
||||
}
|
||||
|
||||
// AddHook adds a hook to the standard logger hooks.
|
||||
func AddHook(hook logrus.Hook) {
|
||||
logger.AddHook(hook)
|
||||
}
|
||||
|
||||
// WithError creates an entry from the standard logger and adds an error to it, using the value defined in ErrorKey as key.
|
||||
func WithError(err error) *logrus.Entry {
|
||||
return newLogrusEntry(2).WithError(err)
|
||||
}
|
||||
|
||||
// Debug logs a message at level Debug on the standard logger.
|
||||
func Debug(args ...any) {
|
||||
if logger.IsLevelEnabled(logrus.DebugLevel) {
|
||||
newLogrusEntry(2).Debug(args...)
|
||||
}
|
||||
}
|
||||
|
||||
// Info logs a message at level Info on the standard logger.
|
||||
func Info(args ...any) {
|
||||
if logger.IsLevelEnabled(logrus.InfoLevel) {
|
||||
newLogrusEntry(2).Info(args...)
|
||||
}
|
||||
}
|
||||
|
||||
// Warn logs a message at level Warn on the standard logger.
|
||||
func Warn(args ...any) {
|
||||
if logger.IsLevelEnabled(logrus.WarnLevel) {
|
||||
newLogrusEntry(2).Warn(args...)
|
||||
}
|
||||
}
|
||||
|
||||
// Error logs a message at level Error on the standard logger.
|
||||
func Error(args ...any) {
|
||||
if logger.IsLevelEnabled(logrus.ErrorLevel) {
|
||||
newLogrusEntry(2).Error(args...)
|
||||
}
|
||||
}
|
||||
|
||||
// Panic logs a message at level Panic on the standard logger.
|
||||
func Panic(args ...any) {
|
||||
if logger.IsLevelEnabled(logrus.PanicLevel) {
|
||||
newLogrusEntry(2).Panic(args...)
|
||||
}
|
||||
}
|
||||
|
||||
// Fatal logs a message at level Fatal on the standard logger then the process will exit with status set to 1.
|
||||
func Fatal(args ...any) {
|
||||
if logger.IsLevelEnabled(logrus.FatalLevel) {
|
||||
newLogrusEntry(2).Fatal(args...)
|
||||
}
|
||||
}
|
||||
|
||||
// Debugf logs a message at level Debug on the standard logger.
|
||||
func Debugf(format string, args ...any) {
|
||||
if logger.IsLevelEnabled(logrus.DebugLevel) {
|
||||
newLogrusEntry(2).Debugf(format, args...)
|
||||
}
|
||||
}
|
||||
|
||||
// Infof logs a message at level Info on the standard logger.
|
||||
func Infof(format string, args ...any) {
|
||||
if logger.IsLevelEnabled(logrus.InfoLevel) {
|
||||
newLogrusEntry(2).Infof(format, args...)
|
||||
}
|
||||
}
|
||||
|
||||
// Warnf logs a message at level Warn on the standard logger.
|
||||
func Warnf(format string, args ...any) {
|
||||
if logger.IsLevelEnabled(logrus.WarnLevel) {
|
||||
newLogrusEntry(2).Warnf(format, args...)
|
||||
}
|
||||
}
|
||||
|
||||
// Errorf logs a message at level Error on the standard logger.
|
||||
func Errorf(format string, args ...any) {
|
||||
if logger.IsLevelEnabled(logrus.ErrorLevel) {
|
||||
newLogrusEntry(2).Errorf(format, args...)
|
||||
}
|
||||
}
|
||||
|
||||
// Panicf logs a message at level Panic on the standard logger.
|
||||
func Panicf(format string, args ...any) {
|
||||
if logger.IsLevelEnabled(logrus.PanicLevel) {
|
||||
newLogrusEntry(2).Panicf(format, args...)
|
||||
}
|
||||
}
|
||||
|
||||
// Fatalf logs a message at level Fatal on the standard logger then the process will exit with status set to 1.
|
||||
func Fatalf(format string, args ...any) {
|
||||
if logger.IsLevelEnabled(logrus.FatalLevel) {
|
||||
newLogrusEntry(2).Fatalf(format, args...)
|
||||
}
|
||||
}
|
||||
|
||||
// WithCallerSkip creates an entry from the caller skip.
|
||||
func WithCallerSkip(skip int) *logrus.Entry {
|
||||
return newLogrusEntry(2 + skip)
|
||||
}
|
|
@ -0,0 +1,207 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package pod
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/internal/log"
|
||||
)
|
||||
|
||||
var (
|
||||
// all containers, map: ContainerID -> *Container
|
||||
containers = map[string]*Container{}
|
||||
|
||||
// updated
|
||||
lastUpdatedAt = time.Now()
|
||||
updatedStep = 5 * time.Second
|
||||
updatedLock sync.Mutex
|
||||
)
|
||||
|
||||
// Container object
|
||||
type Container struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Hostname string `json:"hostname"`
|
||||
Type ContainerType `json:"type"`
|
||||
Qos ContainerQos `json:"qos"`
|
||||
IPAddress string `json:"ip_address"`
|
||||
NetNamespaceInode uint64 `json:"net_namespace_inode"`
|
||||
InitPid int `json:"init_pid"` // the pid-1 of container
|
||||
CgroupSuffix string `json:"cgroup_suffix"`
|
||||
CSS map[string]uint64 `json:"css"` // map: Name -> Address
|
||||
StartedAt time.Time `json:"started_at"` // started time
|
||||
SyncedAt time.Time `json:"synced_at"` // synced time
|
||||
lifeResouces map[string]any
|
||||
Labels map[string]any `json:"labels"` // custom labels
|
||||
}
|
||||
|
||||
func (c *Container) String() string {
|
||||
return fmt.Sprintf("%s:%s/%s/%s:%s/%s", c.ID, c.Hostname, c.Name, c.Type, c.Qos, c.IPAddress)
|
||||
}
|
||||
|
||||
// LifeResouces returns the life resouces of container.
|
||||
func (c *Container) LifeResouces(key string) any {
|
||||
return c.lifeResouces[key]
|
||||
}
|
||||
|
||||
// LabelHostNamespace returns namespace label
|
||||
func (c *Container) LabelHostNamespace() string {
|
||||
return c.Labels[labelHostNamespace].(string)
|
||||
}
|
||||
|
||||
// getContainers returns the containers by type and level.
|
||||
func getContainers(typeMask ContainerType, minLevel ContainerQos) (map[string]*Container, error) {
|
||||
updatedLock.Lock()
|
||||
defer updatedLock.Unlock()
|
||||
|
||||
res := make(map[string]*Container)
|
||||
|
||||
if time.Since(lastUpdatedAt) > updatedStep {
|
||||
if err := kubeletSyncContainers(); err != nil {
|
||||
if errors.Is(err, syscall.ECONNREFUSED) { // ignore error of no connections
|
||||
log.Debugf("failed to sync containers by ECONNREFUSED, err: %v", err)
|
||||
return res, nil
|
||||
}
|
||||
return res, err
|
||||
}
|
||||
lastUpdatedAt = time.Now()
|
||||
}
|
||||
|
||||
log.Debugf("sync latest containers: %+v", containers)
|
||||
for _, c := range containers {
|
||||
// check Type
|
||||
if c.Type&typeMask == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
// check Level
|
||||
if c.Qos < minLevel {
|
||||
continue
|
||||
}
|
||||
|
||||
res[c.ID] = c
|
||||
}
|
||||
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// GetContainersByType returns the containers by type.
|
||||
func GetContainersByType(typeMask ContainerType) (map[string]*Container, error) {
|
||||
return getContainers(typeMask, ContainerQosLevelMin)
|
||||
}
|
||||
|
||||
// GetNormalContainers returns the normal containers.
|
||||
func GetNormalContainers() (map[string]*Container, error) {
|
||||
return GetContainersByType(ContainerTypeNormal)
|
||||
}
|
||||
|
||||
// GetNormalAndSidecarContainers returns the normal and sidecar containers.
|
||||
func GetNormalAndSidecarContainers() (map[string]*Container, error) {
|
||||
return GetContainersByType(ContainerTypeNormal | ContainerTypeSidecar)
|
||||
}
|
||||
|
||||
// GetAllContainers returns all containers.
|
||||
func GetAllContainers() (map[string]*Container, error) {
|
||||
return getContainers(ContainerTypeAll, ContainerQosLevelMin)
|
||||
}
|
||||
|
||||
// GetContainerByID returns the special container by id.
|
||||
func GetContainerByID(id string) (*Container, error) {
|
||||
all, err := GetAllContainers()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if c, ok := all[id]; ok {
|
||||
return c, nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// GetContainerByIPAddress returns the special container by the container ip address.
|
||||
func GetContainerByIPAddress(ip string) (*Container, error) {
|
||||
// only for normal
|
||||
all, err := GetNormalContainers()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, c := range all {
|
||||
if c.IPAddress == ip {
|
||||
return c, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// GetContainerByNetNamespaceInode returns the special container by the net namespace inode.
|
||||
func GetContainerByNetNamespaceInode(inode uint64) (*Container, error) {
|
||||
// only for normal
|
||||
all, err := GetNormalContainers()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, c := range all {
|
||||
if c.NetNamespaceInode == inode {
|
||||
return c, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// GetContainerByCSS returns the special container by the css address.
|
||||
func GetContainerByCSS(css uint64, subsys string) (*Container, error) {
|
||||
all, err := GetAllContainers()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, c := range all {
|
||||
if addr, ok := c.CSS[subsys]; ok {
|
||||
if addr == css {
|
||||
return c, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// GetCSSToContainerID Build mapping from css address to container id
|
||||
// Usage: return_val = GetCSSToContainerID('cpu')
|
||||
//
|
||||
// container_id = return_val[0xffffffffc0601000]
|
||||
func GetCSSToContainerID(subsys string) (map[uint64]string, error) {
|
||||
containers, err := GetAllContainers()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cssToContainerMap := make(map[uint64]string)
|
||||
for _, container := range containers {
|
||||
if addr, ok := container.CSS[subsys]; ok {
|
||||
cssToContainerMap[addr] = container.ID
|
||||
}
|
||||
}
|
||||
|
||||
return cssToContainerMap, nil
|
||||
}
|
|
@ -0,0 +1,313 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build !didi
|
||||
|
||||
package pod
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/internal/bpf"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/utils/cgrouputil"
|
||||
"huatuo-bamai/pkg/types"
|
||||
|
||||
mapset "github.com/deckarep/golang-set"
|
||||
)
|
||||
|
||||
// XXX go:generate go run -mod=mod github.com/cilium/ebpf/cmd/bpf2go -target amd64 cgroupCssGather $BPF_DIR/cgroup_css_gather.c -- $BPF_INCLUDE
|
||||
// use the huatuo bpf framework:
|
||||
//
|
||||
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/cgroup_css_gather.c -o $BPF_DIR/cgroup_css_gather.o
|
||||
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/cgroup_css_events.c -o $BPF_DIR/cgroup_css_events.o
|
||||
|
||||
func parseContainerCSS(containerID string) (map[string]uint64, error) {
|
||||
msg := make(map[string]uint64)
|
||||
cssList := cgroupListCssDataByKnode(containerID)
|
||||
for _, css := range cssList {
|
||||
msg[css.SubSys] = css.CSS
|
||||
}
|
||||
|
||||
return msg, nil
|
||||
}
|
||||
|
||||
const (
|
||||
cgroupSubsysCount = 13
|
||||
kubeletContainerIDKnodeMaxlen = 64
|
||||
)
|
||||
|
||||
var (
|
||||
// FIXME:
|
||||
// 1. cgroupv supported only
|
||||
// 2. cgroup dir name is containerID
|
||||
kubeletContainerIDRegexp = regexp.MustCompile(`[^a-zA-Z0-9]+`)
|
||||
cgroupv1SubSysName = []string{"cpu", "cpuacct", "cpuset", "memory", "blkio"}
|
||||
cgroupv1NotifyCgroupFile = "cgroup.clone_children"
|
||||
cgroupCssID2SubSysNameMap = map[int]string{}
|
||||
cgroupCssMetaDataMap sync.Map
|
||||
|
||||
// avoid GC
|
||||
_cgroupCssBpfInternal *bpf.BPF
|
||||
)
|
||||
|
||||
func isValidKnodeName(name string) bool {
|
||||
return !kubeletContainerIDRegexp.MatchString(name)
|
||||
}
|
||||
|
||||
type containerCssMetaData struct {
|
||||
CSS uint64
|
||||
SubSys string
|
||||
Cgroup uint64
|
||||
CgroupRoot int32
|
||||
CgroupLevel int32
|
||||
ContainerID string
|
||||
}
|
||||
|
||||
type containerCssPerfEvent struct {
|
||||
Cgroup uint64
|
||||
OpsType uint64
|
||||
CgroupRoot int32
|
||||
CgroupLevel int32
|
||||
CSS [cgroupSubsysCount]uint64
|
||||
KnodeName [kubeletContainerIDKnodeMaxlen + 2]byte
|
||||
}
|
||||
|
||||
func cgroupListCssDataByKnode(containerID string) []*containerCssMetaData {
|
||||
res := []*containerCssMetaData{}
|
||||
cgroupCssMetaDataMap.Range(func(k, v any) bool {
|
||||
if m, ok := v.(*containerCssMetaData); ok {
|
||||
if m.ContainerID == containerID {
|
||||
res = append(res, m)
|
||||
}
|
||||
}
|
||||
return true
|
||||
})
|
||||
return res
|
||||
}
|
||||
|
||||
func cgroupUpdateOrCreateCssData(data *containerCssPerfEvent) error {
|
||||
knodeName := strings.TrimRight(string(data.KnodeName[:]), "\x00")
|
||||
if !isValidKnodeName(knodeName) {
|
||||
return fmt.Errorf("knode name is not containterID")
|
||||
}
|
||||
|
||||
for index, css := range data.CSS {
|
||||
if css == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
if sysName, ok := cgroupCssID2SubSysNameMap[index]; ok {
|
||||
m := &containerCssMetaData{
|
||||
CSS: css,
|
||||
Cgroup: data.Cgroup,
|
||||
CgroupRoot: data.CgroupRoot,
|
||||
CgroupLevel: data.CgroupLevel,
|
||||
ContainerID: knodeName,
|
||||
SubSys: sysName,
|
||||
}
|
||||
log.Debugf("update container css data: %+v", m)
|
||||
cgroupCssMetaDataMap.Store(css, m)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func cgroupDeleteCssData(data *containerCssPerfEvent) error {
|
||||
knodeName := strings.TrimRight(string(data.KnodeName[:]), "\x00")
|
||||
if !isValidKnodeName(knodeName) {
|
||||
return fmt.Errorf("knode name is not containterID")
|
||||
}
|
||||
|
||||
for index, css := range data.CSS {
|
||||
if css == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
if _, ok := cgroupCssID2SubSysNameMap[index]; ok {
|
||||
m, loaded := cgroupCssMetaDataMap.LoadAndDelete(css)
|
||||
if loaded {
|
||||
log.Debugf("delete container css data: %+v", m)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func cgroupCssEventSync(ctx context.Context, reader bpf.PerfEventReader) {
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
var data containerCssPerfEvent
|
||||
if err := reader.ReadInto(&data); err != nil {
|
||||
if !errors.Is(err, types.ErrExitByCancelCtx) {
|
||||
log.Errorf("cgroup css sync read events: %v", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
log.Debugf("sync container css data: %+v", data)
|
||||
|
||||
switch data.OpsType {
|
||||
case 0: // mkdir cgroup
|
||||
_ = cgroupUpdateOrCreateCssData(&data)
|
||||
case 1: // rmdir cgroup
|
||||
_ = cgroupDeleteCssData(&data)
|
||||
default:
|
||||
log.Errorf("css event opstype not supported: %+v", data)
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
func cgroupCssNotify() {
|
||||
rootSet := mapset.NewSet()
|
||||
|
||||
for _, subsys := range cgroupv1SubSysName {
|
||||
root := cgrouputil.CgroupRootFsFilePath(subsys)
|
||||
realRoot, err := filepath.EvalSymlinks(root)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if rootSet.Contains(realRoot) {
|
||||
continue
|
||||
}
|
||||
|
||||
rootSet.Add(realRoot)
|
||||
|
||||
if err := filepath.WalkDir(realRoot, func(path string, d fs.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !d.IsDir() || len(d.Name()) != kubeletContainerIDKnodeMaxlen {
|
||||
return nil
|
||||
}
|
||||
|
||||
notifyPath := filepath.Join(path, cgroupv1NotifyCgroupFile)
|
||||
_, _ = os.ReadFile(notifyPath)
|
||||
|
||||
log.Debugf("read cgroup path: %s", notifyPath)
|
||||
return filepath.SkipDir
|
||||
}); err != nil {
|
||||
var e *os.PathError
|
||||
if errors.As(err, &e) && errors.Is(e.Err, syscall.ENOENT) {
|
||||
continue
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func cgroupInitSubSysIDs() error {
|
||||
file, err := os.Open("/proc/cgroups")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
scanner.Split(bufio.ScanLines)
|
||||
|
||||
// skip frst head
|
||||
scanner.Scan()
|
||||
|
||||
ssid := 0
|
||||
for scanner.Scan() {
|
||||
arr := strings.SplitN(scanner.Text(), "\t", 2)
|
||||
cgroupCssID2SubSysNameMap[ssid] = arr[0]
|
||||
ssid++
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func cgroupInitEventCssWithoutCleanup() error {
|
||||
cssBpf, err := bpf.LoadBpf("cgroup_css_events.o", nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("LoadBpf: %w", err)
|
||||
}
|
||||
_cgroupCssBpfInternal = &cssBpf
|
||||
|
||||
childCtx := context.Background()
|
||||
reader, err := cssBpf.AttachAndEventPipe(childCtx, "cgroup_perf_events", 8192)
|
||||
if err != nil {
|
||||
log.Infof("AttachAndEventPipe: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
cgroupCssEventSync(childCtx, reader)
|
||||
return nil
|
||||
}
|
||||
|
||||
func cgroupInitGatherCss() error {
|
||||
cssBpf, err := bpf.LoadBpf("cgroup_css_gather.o", nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("LoadBpf: %w", err)
|
||||
}
|
||||
defer cssBpf.Close()
|
||||
|
||||
childCtx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
reader, err := cssBpf.AttachAndEventPipe(childCtx, "cgroup_perf_events", 8192)
|
||||
if err != nil {
|
||||
log.Infof("AttachAndEventPipe: %v", err)
|
||||
return err
|
||||
}
|
||||
defer reader.Close()
|
||||
|
||||
cgroupCssEventSync(childCtx, reader)
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
cgroupCssNotify()
|
||||
|
||||
// wait sync
|
||||
time.Sleep(1 * time.Second)
|
||||
return nil
|
||||
}
|
||||
|
||||
func ContainerCgroupCssInit() error {
|
||||
if err := cgroupInitSubSysIDs(); err != nil {
|
||||
panic("only support cgroupv1 now")
|
||||
}
|
||||
|
||||
if err := cgroupInitGatherCss(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := cgroupInitEventCssWithoutCleanup(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|