Compare commits
1 Commits
Author | SHA1 | Date |
---|---|---|
|
334c032be0 |
34
Dockerfile
|
@ -1,34 +0,0 @@
|
|||
FROM golang:1.22.4-alpine AS base
|
||||
RUN sed -i 's/dl-cdn.alpinelinux.org/mirrors.aliyun.com/g' /etc/apk/repositories
|
||||
RUN apk add --no-cache \
|
||||
make \
|
||||
clang15 \
|
||||
libbpf-dev \
|
||||
bpftool \
|
||||
curl \
|
||||
git
|
||||
ENV PATH=$PATH:/usr/lib/llvm15/bin
|
||||
|
||||
|
||||
FROM base AS build
|
||||
ARG BUILD_PATH=${BUILD_PATH:-/go/huatuo-bamai}
|
||||
ARG RUN_PATH=${RUN_PATH:-/home/huatuo-bamai}
|
||||
WORKDIR ${BUILD_PATH}
|
||||
COPY . .
|
||||
RUN make && \
|
||||
mkdir -p ${RUN_PATH}/bpf && \
|
||||
mkdir -p ${RUN_PATH}/tracer && \
|
||||
cp ${BUILD_PATH}/_output/bin/huatuo-bamai ${RUN_PATH}/huatuo-bamai && \
|
||||
cp ${BUILD_PATH}/huatuo-bamai.conf ${RUN_PATH}/huatuo-bamai.conf && \
|
||||
cp ${BUILD_PATH}/bpf/*.o ${RUN_PATH}/bpf/ && \
|
||||
find ${BUILD_PATH}/cmd -type f -name "*.bin" -exec cp {} ${RUN_PATH}/tracer/ \;
|
||||
# Comment following line if elasticsearch is needed and repalce the ES configs in huatuo-bamai.conf
|
||||
RUN sed -i 's/"http:\/\/127.0.0.1:9200"/""/' ${RUN_PATH}/huatuo-bamai.conf
|
||||
|
||||
|
||||
FROM alpine:3.22.0 AS run
|
||||
ARG RUN_PATH=${RUN_PATH:-/home/huatuo-bamai}
|
||||
RUN apk add --no-cache curl
|
||||
COPY --from=build ${RUN_PATH} ${RUN_PATH}
|
||||
WORKDIR ${RUN_PATH}
|
||||
CMD ["./huatuo-bamai", "--region", "example", "--config", "huatuo-bamai.conf"]
|
13
NEWS
|
@ -1,13 +0,0 @@
|
|||
v2.0 - WIP
|
||||
---------------------
|
||||
- 支持指标、事件、 追踪 region 字段
|
||||
- 支持 softirq percpu 指标
|
||||
- 支持 golangci 静态检查
|
||||
- 支持组件的 cgroupv2 资源限制
|
||||
- 支持独立的 cgroup package, 应用无感知 cgroup 运行时类型
|
||||
- 支持根据 kubelet cgroupdriver 配置,实现 cgroupfs, systemd cgroup 路径转换
|
||||
- 若干代码优化和 BUG 修复
|
||||
|
||||
v1.0 - 2025-07-13
|
||||
---------------------
|
||||
- 初始版本发布,主要涉及指标,事件,自动化追踪
|
115
README.md
|
@ -1,9 +1,7 @@
|
|||
简体中文 | [English](./README_EN.md)
|
||||
|
||||

|
||||
|
||||
# 什么是 HUATUO
|
||||
**HUATUO(华佗)**是由**滴滴**开源并依托 **CCF 开源发展委员会**孵化的云原生操作系统可观测性项目,专注于为复杂云原生环境提供操作系统内核级深度观测能力。该项目基于 [eBPF](https://docs.kernel.org/userspace-api/ebpf/syscall.html) 技术,通过整合 [kprobe](https://www.kernel.org/doc/html/latest/trace/kprobes.html)、 [tracepoint](https://www.kernel.org/doc/html/latest/trace/tracepoints.html)、 [ftrace](https://www.kernel.org/doc/html/latest/trace/ftrace.html) 等内核动态追踪技术,实现了多维度的内核观测能力:**1.** 更精细化的内核子系统埋点指标 Metric **2.** 异常事件驱动的内核运行时上下文捕获 Events **3.** 针对系统突发毛刺的自动追踪 AutoTracing、AutoProfiling。该项目逐步构建了完整的 Linux 内核深度可观测体系架构。目前,HUATUO 已在滴滴生产环境中实现规模化部署,在诸多故障场景中发挥关键作用,有效保障了云原生操作系统的高可用性和性能优化。通过持续的技术演进,希望 HUATUO 能够推动 eBPF 技术在云原生可观测领域向更细粒度、更低开销、更高时效性的方向发展。更多信息访问官网 [https://huatuo.tech](https://huatuo.tech/)。
|
||||
**HUATUO(华佗)**是由**滴滴**开源并依托 **CCF 开源发展委员会**孵化的云原生操作系统可观测性项目,专注于为复杂云原生环境提供操作系统内核级深度观测能力。该项目基于 [eBPF](https://docs.kernel.org/userspace-api/ebpf/syscall.html) 技术,通过整合 [kprobe](https://www.kernel.org/doc/html/latest/trace/kprobes.html)、 [tracepoint](https://www.kernel.org/doc/html/latest/trace/tracepoints.html)、 [ftrace](https://www.kernel.org/doc/html/latest/trace/ftrace.html) 等内核动态追踪技术,实现了多维度的内核观测能力:**1.** 更精细化的内核子系统埋点指标 Metric **2.** 异常事件驱动的内核运行时上下文捕获 Events **3.** 针对系统突发毛刺的自动追踪 AutoTracing、AutoProfiling。该项目逐步构建了完整的 Linux 内核深度可观测体系架构。目前,HUATUO 已在滴滴生产环境中实现规模化部署,在诸多故障场景中发挥关键作用,有效保障了云原生操作系统的高可用性和性能优化。通过持续的技术演进,希望 HUATUO 能够推动 eBPF 技术在云原生可观测领域向更细粒度、更低开销、更高时效性的方向发展。
|
||||
|
||||
|
||||
# 核心特性
|
||||
|
@ -14,57 +12,80 @@
|
|||
- **分布式链路追踪 Tracing**:以网络为中心的面向服务请求的分布式链路追踪,能够清晰的划分系统调用层级关系,节点关联关系,耗时记账等,支持在大规模分布式系统中的跨节点追踪,提供微服务调用的全景视图,保障系统在复杂场景下的稳定性。
|
||||
- **开源技术生态融合**:无缝对接主流开源可观测技术栈,如 Prometheus、Grafana、Pyroscope、Elasticsearch等。支持独立物理机和云原生部署,自动感知 K8S 容器资源/标签/注解,自动关联操作系统内核事件指标,消除数据孤岛。通过零侵扰、内核可编程方式兼容主流硬件平台和内核版本,确保其适应性、应用性。
|
||||
|
||||
|
||||
|
||||
# 快速上手
|
||||
为用户开发者快速体验 HUATUO, 我们提供容器编译镜像的便捷方式,一键运行 docker compose 即可启动。该命令会启动 elasticsearch, prometheus, grafana 以及编译的 huatuo-bamai 组件。上述命令执行成功后,打开浏览器访问 http://localhost:3000 即可浏览监控大盘。
|
||||
|
||||
- **极速体验**
|
||||
如果你只关心底层原理,不关心存储、前端展示等,我们提供了编译好的镜像,已包含 HUATUO 底层运行的必要组件,直接运行即可:
|
||||
```bash
|
||||
$ docker run --privileged --cgroupns=host --network=host -v /sys:/sys -v /run:/run huatuo/huatuo-bamai:latest
|
||||
```
|
||||
|
||||
- **快速搭建**
|
||||
如果你想更进一步了解 HUATUO 运行机制,架构设计等,可在本地很方便地搭建 HUATUO 完整运行的所有组件,我们提供容器镜像以及简单配置,方便用户开发者快速了解 HUATUO。
|
||||

|
||||
<div style="text-align: center; margin: 8px 0 20px 0; color: #777;">
|
||||
<small>
|
||||
HUATUO 组件运行示意图<br>
|
||||
</small>
|
||||
</div>
|
||||
|
||||
为快速搭建运行环境,我们提供一键运行的方式,该命令会启动 [elasticsearch](https://www.elastic.co), [prometheus](https://prometheus.io), [grafana](https://grafana.com) 以及 huatuo-bamai 组件。命令执行成功后,打开浏览器访问 [http://localhost:3000](http://localhost:3000) 即可浏览监控大盘。
|
||||
|
||||
```bash
|
||||
$ docker compose --project-directory ./build/docker up
|
||||
```
|
||||
```bash
|
||||
$ docker compose --project-directory ./build/docker up
|
||||
```
|
||||
|
||||
# 软件架构
|
||||

|
||||
|
||||
# 开源协议
|
||||
该项目采用 Apache License 2.0 协议开源,BPF 代码采用 GPL 协议。
|
||||
|
||||
# 内核版本
|
||||
理论支持 4.18 之后的所有版本,主要测试内核、和操作系统发行版如下:
|
||||
|
||||
| HUATUO | 内核版本 | 操作系统发行版 |
|
||||
| :--- | :---- | :--- |
|
||||
| 1.0 | 4.18.x | Centos 8.5 |
|
||||
| 1.0 | 5.10.x | OpenEuler 22.03/Anolis OS 8.10 |
|
||||
| 1.0 | 6.6.x | OpenEuler 24.03/Anolis OS 23.3 |
|
||||
| 1.0 | 6.8.x | Ubuntu 24.04 |
|
||||
| 1.0 | 6.14.x | Fedora 42 |
|
||||

|
||||
|
||||
|
||||
# 文档
|
||||
# 功能列表
|
||||
## Autotracing
|
||||
| 追踪名称 | 核心功能 | 场景 |
|
||||
| ---------------| --------------------- |-------------------------------------- |
|
||||
| cpu sys | 宿主 sys 增高检测 | 由于系统负载异常导致业务毛刺问题 |
|
||||
| cpu idle | 容器 cpu idle 掉底检测,提供调用栈,火焰图,进程上下文信息等 | 容器 cpu 使用异常,帮助业务判断进程热点是否异常 |
|
||||
| dload | 跟踪 D 状态进程,提供容器运行情况、D 状态进程调用栈信息等 | 由于系统 D 或 R 状态进程数量突增导致负载升高的问题。系统 D 状态突增通常和资源不可用或者锁被长期持有相关,R 状态进程数量突增往往是业务代码设计不合理导致 |
|
||||
| waitrate | 容器 cpu 外部争抢检测,提供发生争抢时的容器信息等 | 容器 cpu 争抢可能会引起业务毛刺,已存在争抢指标缺乏具体争抢容器信息,通过 waitrate 追踪可以获取参与争抢的容器信息,给混部资源隔离提供参考 |
|
||||
| memburst | 记录突发内存分配上下文 | 宿主机短时间内大量分配内存时,检测宿主机上短时间内大量分配内存的事件,突发性内存分配可能引发直接回收或者 oom 等 |
|
||||
| iotracer | 检测宿主磁盘满、IO 延迟异常时,输出异常时 IO 访问的文件名和路径、磁盘设备、inode 号、容器等上下文信息 | 频繁出现磁盘 IO 带宽打满、磁盘访问突增,进而导致应用请求延迟或者系统性能抖动 |
|
||||
|
||||
更多信息访问官网 [https://huatuo.tech](https://huatuo.tech/)
|
||||
## Events
|
||||
| 事件名称 | 核心功能 | 场景 |
|
||||
| ---------------| --------------------- |----------------------------------------|
|
||||
| softirq | 宿主软中断延迟响应或长期关闭,输出长时间关闭软中断的调用栈,进程信息等 | 该类问题会严重影响网络收发,进而导致业务毛刺或者超时等其他问题 |
|
||||
| dropwatch | TCP 数据包丢包检测,输出发生丢包时主机、网络上下文信息等 | 该类问题主要会引起业务毛刺和延迟 |
|
||||
| netrecvlat | 在网络收方向获取数据包从驱动、协议栈、到用户主动收过程的延迟事件 | 网络延迟问题中有一类是数据传输阶段收方向存在延迟,但不清楚是延迟位置,netrecvlat case 根据 skb 入网卡时间戳依次在驱动、协议栈和用户 copy 数据的路径计算延迟,通过预先设定的阈值过滤超时的数据包,已定位延迟位置 |
|
||||
| oom | 检测宿主或容器内 oom 事件 | 当宿主机层面或者容器维度发生 oom 事件时,能够获取触发 oom 的进程信息、被 kill 的进程信息以及容器信息,便于定位进程内存泄漏、异常退出等问题 |
|
||||
| softlockup | 当系统上发生 softlockup 时,收集目标进程信息以及 cpu 信息,同时获取各个 cpu 上的内核栈信息 | 系统发生 softlockup |
|
||||
| hungtask | 提供系统内所有 D 状态进程数量、内核栈信息 | 用于定位瞬时出现 D 进程的场景,能及时保留现场便于后期问题跟踪 |
|
||||
| memreclaim | 进程进入直接回收的耗时,超过时间阈值,记录进程信息 | 内存压力过大时,如果此时进程申请内存,有可能进入直接回收,此时处于同步回收阶段,可能会造成业务进程的卡顿,此时记录进程进入直接回收的时间,有助于我们判断此进程被直接回收影响的剧烈程度 |
|
||||
|
||||
## Metrics
|
||||
metrics 采集包括各子系统的众多指标,包括 cpu, memory, io, network 等,metrics 主要来源 procfs, eBPF, 计算聚合等,以下为部分 Metrics 的简介。[详细参考](docs/metrics.md)
|
||||
|
||||
| 子系统 | Metric | 描述 | 维度 |
|
||||
| ----------- | --------------- | ----------------------------------- | ------------------ |
|
||||
| cpu | sys, usr, util | cpu 占用百分比 | 宿主、容器 |
|
||||
| cpu | burst, throttled | cpu burst 时长, throttled/limited 的次数 | 容器 |
|
||||
| cpu | inner, exter_wait_rate | 容器内外部争抢指数 | 容器 |
|
||||
| cpu | nr_running, nr_uninterruptible | 对应状态的任务数 | 容器 |
|
||||
| cpu | load 1, 5, 15 | 宿主的 1、5、15 分钟平滑负载值 | 宿主 |
|
||||
| cpu | softirq_latency | NET_RX/NET_TX 中断延迟在指定区间内的次数 | 宿主 |
|
||||
| cpu | runqlat_nlat | 调度延迟在指定时间段内的出现的次数 | 宿主、容器 |
|
||||
| cpu | reschedipi_oversell_probability | VM 宿主机可能发生 cpu 超卖 | 宿主 |
|
||||
| memory | direct_reclaim | 内存直接回收相关指标 | 容器 |
|
||||
| memory | asyncreclaim | 内存异步回收相关指标 | 容器 |
|
||||
| memory | vmstat, memory_stat | 其他内存状态指标 | 宿主、容器 |
|
||||
| memory | hungtask, oom, softlockup | 事件计数统计 | 宿主、容器 |
|
||||
| IO | d2c | 统计 IO 的延迟,只包括驱动和磁盘硬件处理部分 | 宿主、容器 |
|
||||
| IO | q2c | 统计 IO 的延迟,包括整个 IO 生命周期 | 宿主、容器 |
|
||||
| IO | disk_freeze | 统计磁盘 freeze 的事件次数 | 宿主 |
|
||||
| IO | disk_flush | 统计 RAI 设备的 flush 操作延迟 | 宿主、容器 |
|
||||
| network | arp | ARP 缓存数量 | 系统、宿主、容器 |
|
||||
| network | tcp, udp mem | Socket 使用、socket 内存使用等 | 系统、宿主、容器 |
|
||||
| network | qdisc | 网络出向队列状态统计 | 宿主 |
|
||||
| network | netdev | 设备指标统计 | 宿主、容器 |
|
||||
| network | netstat | 网络指标统计 | 宿主、容器 |
|
||||
| network | sockstat | Socket 指标统计 | 宿主、容器 |
|
||||
|
||||
|
||||
# 前端展示
|
||||
## 机房内核事件总览
|
||||

|
||||

|
||||
## AutoProfiling
|
||||

|
||||
## Dropwatch
|
||||

|
||||
## net_rx_latency
|
||||

|
||||
|
||||
|
||||
# 联系我们
|
||||
|
||||
@[hao022](https://github.com/hao022)
|
||||
@[nashuiliang](https://github.com/nashuiliang)
|
||||
@[fanzu8](https://github.com/fanzuba)
|
||||
|
||||
@[hao022](https://github.com/hao022)
|
||||
@[nashuiliang](https://github.com/nashuiliang)
|
26
README_EN.md
|
@ -12,25 +12,15 @@ HuaTuo also integrates core technologies such as automated tracing, profiling, a
|
|||
- **Smooth Transition** to Popular Observability Stacks: Provides standard data sources for Prometheus and Pyroscope, integrates with Kubernetes container resources, and automatically correlates Kubernetes labels/annotations with kernel event metrics, eliminating data silos, ensuring seamless integration and analysis across various data sources for comprehensive system monitoring.
|
||||
|
||||
# Getting Started
|
||||
- **Instant Experience**
|
||||
If you only care about the underlying principles and not about storage backends or frontend display, we provide a pre-built image containing all necessary components for HUATO's core operation. Just run:
|
||||
## run
|
||||
HuaTuo provides a convenient way for quick getting started, all in one command as below:
|
||||
```bash
|
||||
$ docker compose --project-directory ./build/docker up
|
||||
```
|
||||
Run it in the project root directory, then open [http://localhost:3000](http://localhost:3000) to view the panels on your browser.
|
||||
|
||||
```bash
|
||||
$ docker run --privileged --cgroupns=host --network=host -v /sys:/sys -v /run:/run huatuo/huatuo-bamai:latest
|
||||
```
|
||||
|
||||
- **Quick Setup**
|
||||
If you want to dive deeper into HUATO's operation mechanisms and architecture, you can easily set up all components locally. We provide container images and simple configurations for developers to quickly understand HUATO.
|
||||

|
||||
<div style="text-align: center; margin: 8px 0 20px 0; color: #777;">
|
||||
<small>
|
||||
HUATUO Component Workflow<br>
|
||||
</small>
|
||||
</div>
|
||||
|
||||
For a quick setup, we provide a one-command solution to launch [elasticsearch](https://www.elastic.co), [prometheus](https://prometheus.io), [grafana](https://grafana.com) and huatuo-bamai. Once executed, click [http://localhost:3000](http://localhost:3000) to view the monitoring dashboards on your browser.
|
||||
|
||||
- Data related to event-driven operations Autotracing and Events, are stored in elasticsearch
|
||||
The upper command starts three dependencies containers: [elasticsearch](https://www.elastic.co), [prometheus](https://prometheus.io), [grafana](https://grafana.com), then compiles and starts huatuo-bamai.
|
||||
- Data related to event-driven operations, such as Autotracing and Events, are stored in elasticsearch
|
||||
- Metrics-related data is actively collected and stored by prometheus
|
||||
- elasticsearch data reporting port: 9200
|
||||
- prometheus data source port: 9090
|
||||
|
|
|
@ -48,7 +48,7 @@ bpf_cgroup_event_class_prog(struct bpf_raw_tracepoint_args *ctx, u64 type)
|
|||
bpf_probe_read(&data.css, sizeof(u64) * CGROUP_SUBSYS_COUNT,
|
||||
BPF_CORE_READ(cgrp, subsys));
|
||||
|
||||
bpf_perf_event_output(ctx, &cgroup_perf_events, COMPAT_BPF_F_CURRENT_CPU,
|
||||
bpf_perf_event_output(ctx, &cgroup_perf_events, BPF_F_CURRENT_CPU,
|
||||
&data, sizeof(data));
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -50,7 +50,7 @@ int bpf_cgroup_clone_children_read_prog(struct pt_regs *ctx)
|
|||
BPF_CORE_READ(cgrp, subsys));
|
||||
|
||||
/* output */
|
||||
bpf_perf_event_output(ctx, &cgroup_perf_events, COMPAT_BPF_F_CURRENT_CPU,
|
||||
bpf_perf_event_output(ctx, &cgroup_perf_events, BPF_F_CURRENT_CPU,
|
||||
&data, sizeof(data));
|
||||
return 0;
|
||||
}
|
||||
|
|
378
bpf/dropwatch.c
|
@ -1,181 +1,287 @@
|
|||
#include "vmlinux.h"
|
||||
|
||||
#include "vmlinux_net.h"
|
||||
#include "bpf_common.h"
|
||||
#include "bpf_ratelimit.h"
|
||||
#include "vmlinux_net.h"
|
||||
|
||||
#define TYPE_TCP_COMMON_DROP 1
|
||||
#define TYPE_TCP_SYN_FLOOD 2
|
||||
#define TYPE_TCP_LISTEN_OVERFLOW_HANDSHAKE1 3
|
||||
#define TYPE_TCP_LISTEN_OVERFLOW_HANDSHAKE3 4
|
||||
#define TYPE_TCP_COMMON_DROP 1
|
||||
#define TYPE_TCP_SYN_FLOOD 2
|
||||
#define TYPE_TCP_LISTEN_OVERFLOW_HANDSHAKE1 3
|
||||
#define TYPE_TCP_LISTEN_OVERFLOW_HANDSHAKE3 4
|
||||
|
||||
#define SK_FL_PROTO_SHIFT 8
|
||||
#define SK_FL_PROTO_MASK 0x0000ff00
|
||||
#define SK_FL_TYPE_SHIFT 16
|
||||
#define SK_FL_TYPE_MASK 0xffff0000
|
||||
#define SK_FL_PROTO_SHIFT 8
|
||||
#define SK_FL_PROTO_MASK 0x0000ff00
|
||||
#define SK_FL_TYPE_SHIFT 16
|
||||
#define SK_FL_TYPE_MASK 0xffff0000
|
||||
|
||||
struct perf_event_t {
|
||||
u64 tgid_pid;
|
||||
u32 saddr;
|
||||
u32 daddr;
|
||||
u16 sport;
|
||||
u16 dport;
|
||||
u32 seq;
|
||||
u32 ack_seq;
|
||||
u32 queue_mapping;
|
||||
u64 pkt_len;
|
||||
s64 stack_size;
|
||||
u64 stack[PERF_MAX_STACK_DEPTH];
|
||||
u32 sk_max_ack_backlog;
|
||||
u8 state;
|
||||
u8 type;
|
||||
char comm[COMPAT_TASK_COMM_LEN];
|
||||
u64 tgid_pid;
|
||||
u32 saddr;
|
||||
u32 daddr;
|
||||
u16 sport;
|
||||
u16 dport;
|
||||
u32 seq;
|
||||
u32 ack_seq;
|
||||
u32 queue_mapping;
|
||||
u64 pkt_len;
|
||||
s64 stack_size;
|
||||
u64 stack[PERF_MAX_STACK_DEPTH];
|
||||
u32 sk_max_ack_backlog;
|
||||
u8 state;
|
||||
u8 type;
|
||||
char comm[TASK_COMM_LEN];
|
||||
};
|
||||
|
||||
/* format: /sys/kernel/debug/tracing/events/skb/kfree_skb/format */
|
||||
struct kfree_skb_args {
|
||||
unsigned long long pad;
|
||||
|
||||
void *skbaddr;
|
||||
void *location;
|
||||
u16 protocol;
|
||||
};
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(u32));
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(u32));
|
||||
} perf_events SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||
__uint(max_entries, 1);
|
||||
__uint(key_size, sizeof(u32));
|
||||
__uint(value_size, sizeof(struct perf_event_t));
|
||||
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||
__uint(max_entries, 1);
|
||||
__uint(key_size, sizeof(u32));
|
||||
__uint(value_size, sizeof(struct perf_event_t));
|
||||
} dropwatch_stackmap SEC(".maps");
|
||||
|
||||
char __license[] SEC("license") = "Dual MIT/GPL";
|
||||
|
||||
static const struct perf_event_t zero_data = {};
|
||||
static const u32 stackmap_key = 0;
|
||||
static const u32 stackmap_key = 0;
|
||||
|
||||
BPF_RATELIMIT(rate, 1, 100); // 100/s
|
||||
|
||||
struct sock___5_10 {
|
||||
u16 sk_type;
|
||||
u16 sk_protocol;
|
||||
} __attribute__((preserve_access_index));
|
||||
u16 sk_type;
|
||||
u16 sk_protocol;
|
||||
}__attribute__((preserve_access_index));
|
||||
|
||||
static void sk_get_type_and_protocol(struct sock *sk, u16 *protocol, u16 *type)
|
||||
{
|
||||
// kernel version <= 4.18
|
||||
//
|
||||
// struct sock {
|
||||
// unsigned int __sk_flags_offset[0];
|
||||
// #ifdef __BIG_ENDIAN_BITFIELD
|
||||
// #define SK_FL_PROTO_SHIFT 16
|
||||
// #define SK_FL_PROTO_MASK 0x00ff0000
|
||||
// #
|
||||
// #define SK_FL_TYPE_SHIFT 0
|
||||
// #define SK_FL_TYPE_MASK 0x0000ffff
|
||||
// #else
|
||||
// #define SK_FL_PROTO_SHIFT 8
|
||||
// #define SK_FL_PROTO_MASK 0x0000ff00
|
||||
// #
|
||||
// #define SK_FL_TYPE_SHIFT 16
|
||||
// #define SK_FL_TYPE_MASK 0xffff0000
|
||||
// #endif
|
||||
//
|
||||
// unsigned int sk_padding : 1,
|
||||
// sk_kern_sock : 1,
|
||||
// sk_no_check_tx : 1,
|
||||
// sk_no_check_rx : 1,
|
||||
// sk_userlocks : 4,
|
||||
// sk_protocol : 8,
|
||||
// sk_type : 16;
|
||||
// }
|
||||
if (bpf_core_field_exists(sk->__sk_flags_offset)) {
|
||||
u32 sk_flags;
|
||||
// kernel version <= 4.18
|
||||
//
|
||||
// struct sock {
|
||||
// unsigned int __sk_flags_offset[0];
|
||||
// #ifdef __BIG_ENDIAN_BITFIELD
|
||||
// #define SK_FL_PROTO_SHIFT 16
|
||||
// #define SK_FL_PROTO_MASK 0x00ff0000
|
||||
// #
|
||||
// #define SK_FL_TYPE_SHIFT 0
|
||||
// #define SK_FL_TYPE_MASK 0x0000ffff
|
||||
// #else
|
||||
// #define SK_FL_PROTO_SHIFT 8
|
||||
// #define SK_FL_PROTO_MASK 0x0000ff00
|
||||
// #
|
||||
// #define SK_FL_TYPE_SHIFT 16
|
||||
// #define SK_FL_TYPE_MASK 0xffff0000
|
||||
// #endif
|
||||
//
|
||||
// unsigned int sk_padding : 1,
|
||||
// sk_kern_sock : 1,
|
||||
// sk_no_check_tx : 1,
|
||||
// sk_no_check_rx : 1,
|
||||
// sk_userlocks : 4,
|
||||
// sk_protocol : 8,
|
||||
// sk_type : 16;
|
||||
// }
|
||||
if (bpf_core_field_exists(sk->__sk_flags_offset)) {
|
||||
u32 sk_flags;
|
||||
|
||||
bpf_probe_read(&sk_flags, sizeof(sk_flags),
|
||||
&sk->__sk_flags_offset);
|
||||
*protocol = sk_flags >> SK_FL_PROTO_SHIFT;
|
||||
*type = sk_flags >> SK_FL_TYPE_SHIFT;
|
||||
return;
|
||||
}
|
||||
bpf_probe_read(&sk_flags, sizeof(sk_flags), &sk->__sk_flags_offset);
|
||||
*protocol = sk_flags >> SK_FL_PROTO_SHIFT;
|
||||
*type = sk_flags >> SK_FL_TYPE_SHIFT;
|
||||
return;
|
||||
}
|
||||
|
||||
// struct sock {
|
||||
// u16 sk_type;
|
||||
// u16 sk_protocol;
|
||||
// }
|
||||
struct sock___5_10 *sk_new = (struct sock___5_10 *)sk;
|
||||
// kernel version >= 5.10
|
||||
//
|
||||
// struct sock {
|
||||
// u16 sk_type;
|
||||
// u16 sk_protocol;
|
||||
// }
|
||||
struct sock___5_10 *sk_new = (struct sock___5_10 *)sk;
|
||||
|
||||
*protocol = BPF_CORE_READ(sk_new, sk_protocol);
|
||||
*type = BPF_CORE_READ(sk_new, sk_type);
|
||||
return;
|
||||
*protocol = BPF_CORE_READ(sk_new, sk_protocol);
|
||||
*type = BPF_CORE_READ(sk_new, sk_type);
|
||||
return;
|
||||
}
|
||||
|
||||
SEC("tracepoint/skb/kfree_skb")
|
||||
int bpf_kfree_skb_prog(struct trace_event_raw_kfree_skb *ctx)
|
||||
int bpf_kfree_skb_prog(struct kfree_skb_args *ctx)
|
||||
{
|
||||
struct sk_buff *skb = ctx->skbaddr;
|
||||
struct perf_event_t *data = NULL;
|
||||
struct sock_common *sk_common;
|
||||
struct tcphdr tcphdr;
|
||||
struct iphdr iphdr;
|
||||
struct sock *sk;
|
||||
u16 protocol = 0;
|
||||
u16 type = 0;
|
||||
u8 state = 0;
|
||||
struct sk_buff *skb = ctx->skbaddr;
|
||||
struct perf_event_t *data = NULL;
|
||||
struct sock_common *sk_common;
|
||||
struct tcphdr tcphdr;
|
||||
struct iphdr iphdr;
|
||||
struct sock *sk;
|
||||
u16 protocol = 0;
|
||||
u16 type = 0;
|
||||
u8 state = 0;
|
||||
|
||||
/* only for IP && TCP */
|
||||
if (ctx->protocol != ETH_P_IP)
|
||||
return 0;
|
||||
/* only for IP && TCP */
|
||||
if (ctx->protocol != ETH_P_IP)
|
||||
return 0;
|
||||
|
||||
bpf_probe_read(&iphdr, sizeof(iphdr), skb_network_header(skb));
|
||||
if (iphdr.protocol != IPPROTO_TCP)
|
||||
return 0;
|
||||
bpf_probe_read(&iphdr, sizeof(iphdr), skb_network_header(skb));
|
||||
if (iphdr.protocol != IPPROTO_TCP)
|
||||
return 0;
|
||||
|
||||
sk = BPF_CORE_READ(skb, sk);
|
||||
if (!sk)
|
||||
return 0;
|
||||
sk = BPF_CORE_READ(skb, sk);
|
||||
if (!sk)
|
||||
return 0;
|
||||
sk_common = (struct sock_common *)sk;
|
||||
|
||||
sk_common = (struct sock_common *)sk;
|
||||
// filter the sock by AF_INET, SOCK_STREAM, IPPROTO_TCP
|
||||
if (BPF_CORE_READ(sk_common, skc_family) != AF_INET)
|
||||
return 0;
|
||||
|
||||
// filter the sock by AF_INET, SOCK_STREAM, IPPROTO_TCP
|
||||
if (BPF_CORE_READ(sk_common, skc_family) != AF_INET)
|
||||
return 0;
|
||||
sk_get_type_and_protocol(sk, &protocol, &type);
|
||||
if ((u8)protocol != IPPROTO_TCP || type != SOCK_STREAM)
|
||||
return 0;
|
||||
|
||||
sk_get_type_and_protocol(sk, &protocol, &type);
|
||||
if ((u8)protocol != IPPROTO_TCP || type != SOCK_STREAM)
|
||||
return 0;
|
||||
// filter not CLOSE
|
||||
state = BPF_CORE_READ(sk_common, skc_state);
|
||||
if (state == TCP_CLOSE || state == 0)
|
||||
return 0;
|
||||
|
||||
state = BPF_CORE_READ(sk_common, skc_state);
|
||||
if (state == TCP_CLOSE || state == 0)
|
||||
return 0;
|
||||
// ratelimit
|
||||
if (bpf_ratelimited(&rate))
|
||||
return 0;
|
||||
|
||||
if (bpf_ratelimited(&rate))
|
||||
return 0;
|
||||
data = bpf_map_lookup_elem(&dropwatch_stackmap, &stackmap_key);
|
||||
if (!data) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
data = bpf_map_lookup_elem(&dropwatch_stackmap, &stackmap_key);
|
||||
if (!data) {
|
||||
return 0;
|
||||
}
|
||||
bpf_probe_read(&tcphdr, sizeof(tcphdr), skb_transport_header(skb));
|
||||
|
||||
bpf_probe_read(&tcphdr, sizeof(tcphdr), skb_transport_header(skb));
|
||||
/* event */
|
||||
data->tgid_pid = bpf_get_current_pid_tgid();
|
||||
bpf_get_current_comm(&data->comm, sizeof(data->comm));
|
||||
data->type = TYPE_TCP_COMMON_DROP;
|
||||
data->state = state;
|
||||
data->saddr = iphdr.saddr;
|
||||
data->daddr = iphdr.daddr;
|
||||
data->sport = tcphdr.source;
|
||||
data->dport = tcphdr.dest;
|
||||
data->seq = tcphdr.seq;
|
||||
data->ack_seq = tcphdr.ack_seq;
|
||||
data->pkt_len = BPF_CORE_READ(skb, len);
|
||||
data->queue_mapping = BPF_CORE_READ(skb, queue_mapping);
|
||||
data->stack_size = bpf_get_stack(ctx, data->stack, sizeof(data->stack), 0);
|
||||
data->sk_max_ack_backlog = 0; // ignore sk_max_ack_backlog in dropwatch case.
|
||||
|
||||
/* event */
|
||||
data->tgid_pid = bpf_get_current_pid_tgid();
|
||||
bpf_get_current_comm(&data->comm, sizeof(data->comm));
|
||||
data->type = TYPE_TCP_COMMON_DROP;
|
||||
data->state = state;
|
||||
data->saddr = iphdr.saddr;
|
||||
data->daddr = iphdr.daddr;
|
||||
data->sport = tcphdr.source;
|
||||
data->dport = tcphdr.dest;
|
||||
data->seq = tcphdr.seq;
|
||||
data->ack_seq = tcphdr.ack_seq;
|
||||
data->pkt_len = BPF_CORE_READ(skb, len);
|
||||
data->queue_mapping = BPF_CORE_READ(skb, queue_mapping);
|
||||
data->stack_size =
|
||||
bpf_get_stack(ctx, data->stack, sizeof(data->stack), 0);
|
||||
data->sk_max_ack_backlog = 0;
|
||||
// output
|
||||
bpf_perf_event_output(ctx, &perf_events, BPF_F_CURRENT_CPU, data, sizeof(*data));
|
||||
|
||||
bpf_perf_event_output(ctx, &perf_events, COMPAT_BPF_F_CURRENT_CPU, data,
|
||||
sizeof(*data));
|
||||
|
||||
bpf_map_update_elem(&dropwatch_stackmap, &stackmap_key, &zero_data,
|
||||
COMPAT_BPF_EXIST);
|
||||
return 0;
|
||||
// clean
|
||||
bpf_map_update_elem(&dropwatch_stackmap, &stackmap_key, &zero_data, BPF_EXIST);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// The current kernel does not support kprobe+offset very well, waiting for kpatch to come online.
|
||||
#if 0
|
||||
static int fill_overflow_event(void *ctx, u8 type, struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
|
||||
struct perf_event_t *data = NULL;
|
||||
struct iphdr iphdr;
|
||||
struct tcphdr tcphdr;
|
||||
|
||||
data = bpf_map_lookup_elem(&dropwatch_stackmap, &stackmap_key);
|
||||
if (!data) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
bpf_probe_read(&iphdr, sizeof(iphdr), skb_network_header(skb));
|
||||
bpf_probe_read(&tcphdr, sizeof(tcphdr), skb_transport_header(skb));
|
||||
|
||||
/* event */
|
||||
data->tgid_pid = bpf_get_current_pid_tgid();
|
||||
bpf_get_current_comm(&data->comm, sizeof(data->comm));
|
||||
data->type = type;
|
||||
data->state = 0;
|
||||
data->saddr = iphdr.saddr;
|
||||
data->daddr = iphdr.daddr;
|
||||
data->sport = tcphdr.source;
|
||||
data->dport = tcphdr.dest;
|
||||
data->seq = tcphdr.seq;
|
||||
data->ack_seq = tcphdr.ack_seq;
|
||||
data->pkt_len = BPF_CORE_READ(skb, len);
|
||||
data->queue_mapping = BPF_CORE_READ(skb, queue_mapping);
|
||||
data->stack_size = 0; // ignore stack in not-overflow.
|
||||
data->sk_max_ack_backlog = BPF_CORE_READ(sk, sk_max_ack_backlog);
|
||||
|
||||
// output
|
||||
bpf_perf_event_output(ctx, &perf_events, BPF_F_CURRENT_CPU, data, sizeof(*data));
|
||||
|
||||
// clean
|
||||
bpf_map_update_elem(&dropwatch_stackmap, &stackmap_key, &zero_data, BPF_EXIST);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// the dropwatch case: syn_flood.
|
||||
SEC("kprobe/tcp_conn_request+1290")
|
||||
int bpf_tcp_syn_flood_action_prog(struct pt_regs *ctx)
|
||||
{
|
||||
// the function of `tcp_syn_flood_action` arguments:
|
||||
// %r15: struct sock *sk
|
||||
// %r13: struct sk_buff *skb
|
||||
struct sock *sk = (void *)ctx->r15;
|
||||
struct sk_buff *skb= (void *)ctx->r13;
|
||||
|
||||
// ratelimit
|
||||
if (bpf_ratelimited(ctx, rate))
|
||||
return 0;
|
||||
|
||||
// fill
|
||||
return fill_overflow_event(ctx, TYPE_TCP_SYN_FLOOD, sk, skb);
|
||||
}
|
||||
|
||||
// the dropwatch case: listen-overflow in the TCP_CLOSE state(client: TCP_SYN_SENT).
|
||||
SEC("kprobe/tcp_conn_request+167")
|
||||
int bpf_tcp_listen_overflow_handshake1_prog(struct pt_regs *ctx)
|
||||
{
|
||||
// this position has registers as follows:
|
||||
// %r15: struct sock *sk
|
||||
// %r13: struct sk_buff *skb
|
||||
struct sock *sk = (void *)ctx->r15;
|
||||
struct sk_buff *skb= (void *)ctx->r13;
|
||||
|
||||
// ratelimit
|
||||
if (bpf_ratelimited(ctx, rate))
|
||||
return 0;
|
||||
|
||||
// fill
|
||||
return fill_overflow_event(ctx, TYPE_TCP_LISTEN_OVERFLOW_HANDSHAKE1, sk, skb);
|
||||
}
|
||||
|
||||
// the dropwatch case: listen-overflow in the TCP_NEW_SYN_RECV state(client: TCP_ESTABLISHED).
|
||||
SEC("kprobe/tcp_v4_syn_recv_sock+700")
|
||||
int bpf_tcp_listen_overflow_handshake3_prog(struct pt_regs *ctx)
|
||||
{
|
||||
// this position has registers as follows:
|
||||
// %rdi: struct sock *sk
|
||||
// %rsi: struct sk_buff *skb
|
||||
// %r15: struct request_sock *req
|
||||
struct sock *sk = (void *)ctx->di;
|
||||
struct sk_buff *skb= (void *)ctx->si;
|
||||
|
||||
// ratelimit
|
||||
if (bpf_ratelimited(ctx, rate))
|
||||
return 0;
|
||||
|
||||
// fill
|
||||
return fill_overflow_event(ctx, TYPE_TCP_LISTEN_OVERFLOW_HANDSHAKE3, sk, skb);
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -1,38 +1,43 @@
|
|||
#include "vmlinux.h"
|
||||
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
#include "bpf_common.h"
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include "bpf_ratelimit.h"
|
||||
|
||||
char __license[] SEC("license") = "Dual MIT/GPL";
|
||||
|
||||
BPF_RATELIMIT_IN_MAP(rate, 1, COMPAT_CPU_NUM * 10000, 0);
|
||||
#define CPU_NUM 128
|
||||
BPF_RATELIMIT_IN_MAP(rate, 1, CPU_NUM * 10000, 0);
|
||||
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(u32));
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(u32));
|
||||
} hungtask_perf_events SEC(".maps");
|
||||
|
||||
struct hungtask_info {
|
||||
int32_t pid;
|
||||
char comm[COMPAT_TASK_COMM_LEN];
|
||||
int32_t pid;
|
||||
char comm[TASK_COMM_LEN];
|
||||
};
|
||||
|
||||
struct tracepoint_args {
|
||||
unsigned long pad;
|
||||
char comm[TASK_COMM_LEN];
|
||||
int pid;
|
||||
};
|
||||
|
||||
SEC("tracepoint/sched/sched_process_hang")
|
||||
int tracepoint_sched_process_hang(struct trace_event_raw_sched_process_hang *ctx)
|
||||
int tracepoint_sched_process_hang(struct tracepoint_args *ctx)
|
||||
{
|
||||
struct hungtask_info info = {};
|
||||
struct hungtask_info info = {};
|
||||
|
||||
if (bpf_ratelimited_in_map(ctx, rate))
|
||||
return 0;
|
||||
|
||||
info.pid = ctx->pid;
|
||||
bpf_probe_read_str(&info.comm, COMPAT_TASK_COMM_LEN, ctx->comm);
|
||||
bpf_perf_event_output(ctx, &hungtask_perf_events,
|
||||
COMPAT_BPF_F_CURRENT_CPU, &info, sizeof(info));
|
||||
return 0;
|
||||
if (bpf_ratelimited_in_map(ctx, rate))
|
||||
return 0;
|
||||
info.pid = ctx->pid;
|
||||
// custom defined struct can't use BPF_CORE_READ_STR_INTO()
|
||||
bpf_probe_read_str(&info.comm, TASK_COMM_LEN, ctx->comm);
|
||||
bpf_perf_event_output(ctx, &hungtask_perf_events, BPF_F_CURRENT_CPU, &info, sizeof(info));
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -5,29 +5,38 @@
|
|||
#define NULL ((void *)0)
|
||||
#endif
|
||||
|
||||
/* define COMPAT_XXX for compat old kernel vmlinux.h */
|
||||
#define COMPAT_BPF_F_CURRENT_CPU 0xffffffffULL
|
||||
/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
|
||||
* BPF_FUNC_perf_event_read_value flags.
|
||||
*/
|
||||
enum {
|
||||
BPF_F_INDEX_MASK = 0xffffffffULL,
|
||||
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
|
||||
/* BPF_FUNC_perf_event_output for sk_buff input context. */
|
||||
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
|
||||
};
|
||||
|
||||
#define COMPAT_TASK_COMM_LEN 16
|
||||
/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
|
||||
enum {
|
||||
BPF_F_SKIP_FIELD_MASK = 0xffULL,
|
||||
BPF_F_USER_STACK = (1ULL << 8),
|
||||
/* flags used by BPF_FUNC_get_stackid only. */
|
||||
BPF_F_FAST_STACK_CMP = (1ULL << 9),
|
||||
BPF_F_REUSE_STACKID = (1ULL << 10),
|
||||
/* flags used by BPF_FUNC_get_stack only. */
|
||||
BPF_F_USER_BUILD_ID = (1ULL << 11),
|
||||
};
|
||||
|
||||
#define TASK_COMM_LEN 16
|
||||
#define PATH_MAX 4096 /* # chars in a path name including nul */
|
||||
#define COMPAT_CPU_NUM 128
|
||||
|
||||
/* include/uapi/linux/perf_event.h */
|
||||
#define PERF_MAX_STACK_DEPTH 127
|
||||
#define PERF_MIN_STACK_DEPTH 16
|
||||
|
||||
/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
|
||||
#define COMPAT_BPF_F_USER_STACK 256
|
||||
|
||||
/* flags for BPF_MAP_UPDATE_ELEM command */
|
||||
#define COMPAT_BPF_ANY 0 /* create new element or update existing */
|
||||
#define COMPAT_BPF_NOEXIST 1 /* create new element if it didn't exist */
|
||||
#define COMPAT_BPF_EXIST 2 /* update existing element */
|
||||
#define COMPAT_BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */
|
||||
|
||||
#define NR_SOFTIRQS_MAX 16
|
||||
|
||||
#define NSEC_PER_MSEC 1000000UL
|
||||
#define NSEC_PER_USEC 1000UL
|
||||
#define BPF_ANY 0 /* create new element or update existing */
|
||||
#define BPF_NOEXIST 1 /* create new element if it didn't exist */
|
||||
#define BPF_EXIST 2 /* update existing element */
|
||||
#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */
|
||||
|
||||
#endif /* __BPF_COMMON_H__ */
|
||||
|
|
|
@ -23,7 +23,7 @@ static __always_inline void func_trace_begain(u64 id)
|
|||
.id = id,
|
||||
};
|
||||
|
||||
bpf_map_update_elem(&func_trace_map, &id, &entry, COMPAT_BPF_ANY);
|
||||
bpf_map_update_elem(&func_trace_map, &id, &entry, BPF_ANY);
|
||||
}
|
||||
|
||||
static __always_inline struct trace_entry_ctx *func_trace_end(u64 id)
|
||||
|
|
|
@ -105,7 +105,7 @@ bpf_ratelimited_core_in_map(void *ctx, void *map, void *perf_map,
|
|||
// the threshold/max_burst is exceeded, notify once in a cycle
|
||||
if (old_nmissed == 0 || (rate->max_burst > 0 &&
|
||||
rate->nmissed > rate->max_burst - rate->burst))
|
||||
bpf_perf_event_output(ctx, perf_map, COMPAT_BPF_F_CURRENT_CPU, rate,
|
||||
bpf_perf_event_output(ctx, perf_map, BPF_F_CURRENT_CPU, rate,
|
||||
sizeof(struct bpf_ratelimit));
|
||||
return true;
|
||||
}
|
||||
|
|
98986
bpf/include/vmlinux.h
|
@ -17,8 +17,8 @@ int ad_disable(struct pt_regs *ctx)
|
|||
// ko module and CO-RE relocation is not supported directly at old
|
||||
// kernel
|
||||
u64 nothing = 0;
|
||||
bpf_perf_event_output(ctx, &ad_event_map, COMPAT_BPF_F_CURRENT_CPU,
|
||||
¬hing, sizeof(nothing));
|
||||
bpf_perf_event_output(ctx, &ad_event_map, BPF_F_CURRENT_CPU, ¬hing,
|
||||
sizeof(nothing));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
#include "bpf_common.h"
|
||||
#include "vmlinux.h"
|
||||
#include "vmlinux_sched.h"
|
||||
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
char __license[] SEC("license") = "Dual MIT/GPL";
|
||||
|
||||
struct mem_cgroup_metric {
|
||||
/* cg: direct reclaim count caused by try_charge */
|
||||
unsigned long directstall_count;
|
||||
};
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__type(key, unsigned long);
|
||||
__type(value, struct mem_cgroup_metric);
|
||||
__uint(max_entries, 10240);
|
||||
} mem_cgroup_map SEC(".maps");
|
||||
|
||||
SEC("tracepoint/vmscan/mm_vmscan_memcg_reclaim_begin")
|
||||
int tracepoint_vmscan_mm_vmscan_memcg_reclaim_begin(struct pt_regs *ctx)
|
||||
{
|
||||
struct cgroup_subsys_state *mm_subsys;
|
||||
struct mem_cgroup_metric *valp;
|
||||
struct task_struct *task;
|
||||
|
||||
task = (struct task_struct *)bpf_get_current_task();
|
||||
if (BPF_CORE_READ(task, flags) & PF_KSWAPD)
|
||||
return 0;
|
||||
|
||||
mm_subsys = BPF_CORE_READ(task, cgroups, subsys[memory_cgrp_id]);
|
||||
valp = bpf_map_lookup_elem(&mem_cgroup_map, &mm_subsys);
|
||||
if (!valp) {
|
||||
struct mem_cgroup_metric new_metrics = {
|
||||
.directstall_count = 1,
|
||||
};
|
||||
bpf_map_update_elem(&mem_cgroup_map, &mm_subsys, &new_metrics,
|
||||
BPF_ANY);
|
||||
return 0;
|
||||
}
|
||||
|
||||
__sync_fetch_and_add(&valp->directstall_count, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("kprobe/mem_cgroup_css_released")
|
||||
int kprobe_mem_cgroup_css_released(struct pt_regs *ctx)
|
||||
{
|
||||
u64 css = PT_REGS_PARM1(ctx);
|
||||
bpf_map_delete_elem(&mem_cgroup_map, &css);
|
||||
return 0;
|
||||
}
|
|
@ -1,11 +1,10 @@
|
|||
#include "vmlinux.h"
|
||||
#include "bpf_common.h"
|
||||
#include "bpf_func_trace.h"
|
||||
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
#include "bpf_common.h"
|
||||
#include "bpf_func_trace.h"
|
||||
|
||||
struct mm_free_compact_entry {
|
||||
/* host: compaction latency */
|
||||
unsigned long compaction_stat;
|
||||
|
@ -35,7 +34,7 @@ update_metric_map(u64 free_delta_ns, u64 compact_delta_ns)
|
|||
.compaction_stat = compact_delta_ns,
|
||||
};
|
||||
bpf_map_update_elem(&mm_free_compact_map, &key, &new_metrics,
|
||||
COMPAT_BPF_ANY);
|
||||
BPF_ANY);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -5,52 +5,60 @@
|
|||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
#include "bpf_common.h"
|
||||
#include "vmlinux_sched.h"
|
||||
#include "bpf_func_trace.h"
|
||||
#include "bpf_ratelimit.h"
|
||||
|
||||
char __license[] SEC("license") = "Dual MIT/GPL";
|
||||
|
||||
struct mem_cgroup_metric {
|
||||
/* cg: direct reclaim count caused by try_charge */
|
||||
unsigned long directstall_count;
|
||||
};
|
||||
volatile const unsigned long deltath = 0;
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__type(key, unsigned long);
|
||||
__type(value, struct mem_cgroup_metric);
|
||||
__uint(max_entries, 10240);
|
||||
} mem_cgroup_map SEC(".maps");
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(u32));
|
||||
} reclaim_perf_events SEC(".maps");
|
||||
|
||||
SEC("tracepoint/vmscan/mm_vmscan_memcg_reclaim_begin")
|
||||
int tracepoint_vmscan_mm_vmscan_memcg_reclaim_begin(struct pt_regs *ctx)
|
||||
struct reclaim_entry {
|
||||
char comm[TASK_COMM_LEN];
|
||||
u64 delta_time;
|
||||
u64 css;
|
||||
u64 pid;
|
||||
};
|
||||
|
||||
SEC("kprobe/try_to_free_pages")
|
||||
int kprobe_try_to_free_pages(struct pt_regs *ctx)
|
||||
{
|
||||
struct cgroup_subsys_state *mm_subsys;
|
||||
struct mem_cgroup_metric *valp;
|
||||
func_trace_begain(bpf_get_current_pid_tgid());
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("kretprobe/try_to_free_pages")
|
||||
int kretprobe_try_to_free_pages(struct pt_regs *ctx)
|
||||
{
|
||||
struct trace_entry_ctx *entry;
|
||||
struct task_struct *task;
|
||||
|
||||
task = (struct task_struct *)bpf_get_current_task();
|
||||
if (BPF_CORE_READ(task, flags) & PF_KSWAPD)
|
||||
entry = func_trace_end(bpf_get_current_pid_tgid());
|
||||
if (!entry)
|
||||
return 0;
|
||||
|
||||
mm_subsys = BPF_CORE_READ(task, cgroups, subsys[memory_cgrp_id]);
|
||||
valp = bpf_map_lookup_elem(&mem_cgroup_map, &mm_subsys);
|
||||
if (!valp) {
|
||||
struct mem_cgroup_metric new_metrics = {
|
||||
.directstall_count = 1,
|
||||
if (entry->delta_ns > deltath) {
|
||||
task = (struct task_struct *)bpf_get_current_task();
|
||||
|
||||
struct reclaim_entry data = {
|
||||
.pid = entry->id,
|
||||
.css = (u64)BPF_CORE_READ(task, cgroups,
|
||||
subsys[cpu_cgrp_id]),
|
||||
.delta_time = entry->delta_ns,
|
||||
};
|
||||
bpf_map_update_elem(&mem_cgroup_map, &mm_subsys, &new_metrics,
|
||||
COMPAT_BPF_ANY);
|
||||
return 0;
|
||||
|
||||
bpf_get_current_comm(data.comm, sizeof(data.comm));
|
||||
|
||||
bpf_perf_event_output(ctx, &reclaim_perf_events,
|
||||
BPF_F_CURRENT_CPU, &data,
|
||||
sizeof(struct reclaim_entry));
|
||||
}
|
||||
|
||||
__sync_fetch_and_add(&valp->directstall_count, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("kprobe/mem_cgroup_css_released")
|
||||
int kprobe_mem_cgroup_css_released(struct pt_regs *ctx)
|
||||
{
|
||||
u64 css = PT_REGS_PARM1(ctx);
|
||||
bpf_map_delete_elem(&mem_cgroup_map, &css);
|
||||
func_trace_destroy(entry->id);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,64 +0,0 @@
|
|||
#include "vmlinux.h"
|
||||
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
#include "bpf_common.h"
|
||||
#include "bpf_func_trace.h"
|
||||
#include "bpf_ratelimit.h"
|
||||
|
||||
char __license[] SEC("license") = "Dual MIT/GPL";
|
||||
|
||||
volatile const unsigned long deltath = 0;
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(u32));
|
||||
} reclaim_perf_events SEC(".maps");
|
||||
|
||||
struct reclaim_entry {
|
||||
char comm[COMPAT_TASK_COMM_LEN];
|
||||
u64 delta_time;
|
||||
u64 css;
|
||||
u64 pid;
|
||||
};
|
||||
|
||||
SEC("kprobe/try_to_free_pages")
|
||||
int kprobe_try_to_free_pages(struct pt_regs *ctx)
|
||||
{
|
||||
func_trace_begain(bpf_get_current_pid_tgid());
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("kretprobe/try_to_free_pages")
|
||||
int kretprobe_try_to_free_pages(struct pt_regs *ctx)
|
||||
{
|
||||
struct trace_entry_ctx *entry;
|
||||
struct task_struct *task;
|
||||
|
||||
entry = func_trace_end(bpf_get_current_pid_tgid());
|
||||
if (!entry)
|
||||
return 0;
|
||||
|
||||
if (entry->delta_ns > deltath) {
|
||||
task = (struct task_struct *)bpf_get_current_task();
|
||||
|
||||
struct reclaim_entry data = {
|
||||
.pid = entry->id,
|
||||
.css = (u64)BPF_CORE_READ(task, cgroups,
|
||||
subsys[cpu_cgrp_id]),
|
||||
.delta_time = entry->delta_ns,
|
||||
};
|
||||
|
||||
bpf_get_current_comm(data.comm, sizeof(data.comm));
|
||||
|
||||
bpf_perf_event_output(ctx, &reclaim_perf_events,
|
||||
COMPAT_BPF_F_CURRENT_CPU, &data,
|
||||
sizeof(struct reclaim_entry));
|
||||
}
|
||||
|
||||
func_trace_destroy(entry->id);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
#include "vmlinux.h"
|
||||
#include "bpf_common.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include <bpf/bpf_core_read.h>
|
||||
|
||||
#define NSEC_PER_MSEC 1000000UL
|
||||
#define NSEC_PER_USEC 1000UL
|
||||
#define NR_SOFTIRQS_MAX 16 // must be 2^order
|
||||
|
||||
enum lat_zone {
|
||||
LAT_ZONE0=0, // 0 ~ 10us
|
||||
LAT_ZONE1, // 10us ~ 100us
|
||||
LAT_ZONE2, // 100us ~ 1ms
|
||||
LAT_ZONE3, // 1ms ~ inf
|
||||
LAT_ZONE_MAX,
|
||||
};
|
||||
|
||||
struct tp_softirq {
|
||||
unsigned long long pad;
|
||||
unsigned int vec;
|
||||
};
|
||||
|
||||
// Because bpf access array is strictly checked,
|
||||
// the size of the array must be aligned in order
|
||||
// of 2, so we should not use NR_SOFTIRQS, but
|
||||
// use NR_SOFTIRQS_MAX as the size of the array
|
||||
struct softirq_lat {
|
||||
u64 silat[NR_SOFTIRQS_MAX][LAT_ZONE_MAX];
|
||||
};
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||
//key -> NR_SOFTIRQS
|
||||
__type(key, u32);
|
||||
// value -> ts, record softirq_raise start time
|
||||
__type(value, u64);
|
||||
__uint(max_entries, NR_SOFTIRQS);
|
||||
} silat_map SEC(".maps");//softirq latency map
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__uint(key_size, sizeof(u32));
|
||||
__uint(value_size, sizeof(struct softirq_lat));
|
||||
__uint(max_entries, 1);
|
||||
} softirq_lats SEC(".maps");
|
||||
|
||||
SEC("tracepoint/irq/softirq_raise")
|
||||
void probe_softirq_raise(struct tp_softirq *ctx)
|
||||
{
|
||||
u32 nr;
|
||||
u64 now;
|
||||
nr = ctx->vec;
|
||||
|
||||
now = bpf_ktime_get_ns();
|
||||
bpf_map_update_elem(&silat_map, &nr, &now, BPF_ANY);
|
||||
}
|
||||
|
||||
static void
|
||||
calc_softirq_latency(struct softirq_lat *lat_mc, u32 nr, u64 now)
|
||||
{
|
||||
u64 lat, *ts;
|
||||
|
||||
ts = bpf_map_lookup_elem(&silat_map, &nr);
|
||||
if (!ts)
|
||||
return;
|
||||
|
||||
lat = now - *ts;
|
||||
|
||||
//update to metrics
|
||||
if (lat < 10 * NSEC_PER_USEC) { //10us
|
||||
__sync_fetch_and_add(&lat_mc->silat[nr & (NR_SOFTIRQS_MAX - 1)][LAT_ZONE0], 1);
|
||||
} else if (lat < 100 * NSEC_PER_USEC) {//100us
|
||||
__sync_fetch_and_add(&lat_mc->silat[nr & (NR_SOFTIRQS_MAX - 1)][LAT_ZONE1], 1);
|
||||
} else if (lat < 1 * NSEC_PER_MSEC) {//1ms
|
||||
__sync_fetch_and_add(&lat_mc->silat[nr & (NR_SOFTIRQS_MAX - 1)][LAT_ZONE2], 1);
|
||||
} else {//1ms+
|
||||
__sync_fetch_and_add(&lat_mc->silat[nr & (NR_SOFTIRQS_MAX - 1)][LAT_ZONE3], 1);
|
||||
}
|
||||
}
|
||||
|
||||
SEC("tracepoint/irq/softirq_entry")
|
||||
void probe_softirq_entry(struct tp_softirq *ctx)
|
||||
{
|
||||
u32 key = 0, nr;
|
||||
u64 now;
|
||||
struct softirq_lat *lat_mc;
|
||||
|
||||
lat_mc = bpf_map_lookup_elem(&softirq_lats, &key);
|
||||
if (!lat_mc)
|
||||
return;
|
||||
|
||||
nr = ctx->vec;
|
||||
|
||||
now = bpf_ktime_get_ns();
|
||||
|
||||
// update softirq lat to lat metric
|
||||
calc_softirq_latency(lat_mc, nr, now);
|
||||
}
|
||||
|
||||
char __license[] SEC("license") = "Dual MIT/GPL";
|
194
bpf/netrecvlat.c
|
@ -1,175 +1,173 @@
|
|||
// go:build ignore
|
||||
//go:build ignore
|
||||
|
||||
#include "vmlinux.h"
|
||||
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include <bpf/bpf_endian.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
#include <bpf/bpf_endian.h>
|
||||
#include "bpf_common.h"
|
||||
#include "bpf_ratelimit.h"
|
||||
#include "vmlinux_net.h"
|
||||
#include "bpf_ratelimit.h"
|
||||
|
||||
volatile const long long mono_wall_offset = 0;
|
||||
volatile const long long to_netif = 5 * 1000 * 1000; // 5ms
|
||||
volatile const long long to_tcpv4 = 10 * 1000 * 1000; // 10ms
|
||||
volatile const long long to_user_copy = 115 * 1000 * 1000; // 115ms
|
||||
volatile const long long to_netif = 5 * 1000 * 1000; // 5ms
|
||||
volatile const long long to_tcpv4 = 10 * 1000 * 1000; // 10ms
|
||||
volatile const long long to_user_copy = 115 * 1000 * 1000; // 115ms
|
||||
|
||||
#define likely(x) __builtin_expect(!!(x), 1)
|
||||
#define likely(x) __builtin_expect(!!(x), 1)
|
||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
|
||||
BPF_RATELIMIT(rate, 1, 100);
|
||||
|
||||
struct netif_receive_skb_args {
|
||||
struct trace_entry entry;
|
||||
struct sk_buff *skb;
|
||||
};
|
||||
|
||||
struct skb_copy_datagram_iovec_args {
|
||||
struct trace_entry entry;
|
||||
struct sk_buff *skb;
|
||||
};
|
||||
|
||||
struct perf_event_t {
|
||||
char comm[COMPAT_TASK_COMM_LEN];
|
||||
u64 latency;
|
||||
u64 tgid_pid;
|
||||
u64 pkt_len;
|
||||
u16 sport;
|
||||
u16 dport;
|
||||
u32 saddr;
|
||||
u32 daddr;
|
||||
u32 seq;
|
||||
u32 ack_seq;
|
||||
u8 state;
|
||||
u8 where;
|
||||
char comm[TASK_COMM_LEN];
|
||||
u64 latency;
|
||||
u64 tgid_pid;
|
||||
u64 pkt_len;
|
||||
u16 sport;
|
||||
u16 dport;
|
||||
u32 saddr;
|
||||
u32 daddr;
|
||||
u32 seq;
|
||||
u32 ack_seq;
|
||||
u8 state;
|
||||
u8 where;
|
||||
};
|
||||
|
||||
enum skb_rcv_where {
|
||||
TO_NETIF_RCV,
|
||||
TO_TCPV4_RCV,
|
||||
TO_USER_COPY,
|
||||
TO_NETIF_RCV,
|
||||
TO_TCPV4_RCV,
|
||||
TO_USER_COPY,
|
||||
};
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(u32));
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(u32));
|
||||
} net_recv_lat_event_map SEC(".maps");
|
||||
|
||||
struct mix {
|
||||
struct iphdr *ip_hdr;
|
||||
u64 lat;
|
||||
u8 state;
|
||||
u8 where;
|
||||
struct iphdr *ip_hdr;
|
||||
u64 lat;
|
||||
u8 state;
|
||||
u8 where;
|
||||
};
|
||||
|
||||
static inline u64 delta_now_skb_tstamp(struct sk_buff *skb)
|
||||
{
|
||||
u64 tstamp = BPF_CORE_READ(skb, tstamp);
|
||||
// although the skb->tstamp record is opened in user space by
|
||||
// SOF_TIMESTAMPING_RX_SOFTWARE, it is still 0 in the following cases:
|
||||
// unix recv, netlink recv, few virtual dev(e.g. tun dev, napi dsabled)
|
||||
if (!tstamp)
|
||||
return 0;
|
||||
// although the skb->tstamp record is opened in user space by SOF_TIMESTAMPING_RX_SOFTWARE,
|
||||
// it is still 0 in the following cases:
|
||||
// unix recv, netlink recv, few virtual dev(e.g. tun dev, napi dsabled)
|
||||
if (!tstamp)
|
||||
return 0;
|
||||
|
||||
return bpf_ktime_get_ns() + mono_wall_offset - tstamp;
|
||||
return bpf_ktime_get_ns() + mono_wall_offset - tstamp;
|
||||
}
|
||||
|
||||
static inline u8 get_state(struct sk_buff *skb)
|
||||
{
|
||||
return BPF_CORE_READ(skb, sk, __sk_common.skc_state);
|
||||
return BPF_CORE_READ(skb, sk, __sk_common.skc_state);
|
||||
}
|
||||
|
||||
static inline void
|
||||
fill_and_output_event(void *ctx, struct sk_buff *skb, struct mix *_mix)
|
||||
static inline void fill_and_output_event(void *ctx, struct sk_buff *skb, struct mix *_mix)
|
||||
{
|
||||
struct perf_event_t event = {};
|
||||
struct tcphdr tcp_hdr;
|
||||
struct tcphdr tcp_hdr;
|
||||
|
||||
// ratelimit
|
||||
if (bpf_ratelimited(&rate))
|
||||
return;
|
||||
// ratelimit
|
||||
if (bpf_ratelimited(&rate))
|
||||
return;
|
||||
|
||||
if (likely(_mix->where == TO_USER_COPY)) {
|
||||
event.tgid_pid = bpf_get_current_pid_tgid();
|
||||
bpf_get_current_comm(&event.comm, sizeof(event.comm));
|
||||
}
|
||||
if (likely(_mix->where == TO_USER_COPY)) {
|
||||
event.tgid_pid = bpf_get_current_pid_tgid();
|
||||
bpf_get_current_comm(&event.comm, sizeof(event.comm));
|
||||
}
|
||||
|
||||
bpf_probe_read(&tcp_hdr, sizeof(tcp_hdr), skb_transport_header(skb));
|
||||
event.latency = _mix->lat;
|
||||
event.saddr = _mix->ip_hdr->saddr;
|
||||
event.daddr = _mix->ip_hdr->daddr;
|
||||
event.sport = tcp_hdr.source;
|
||||
event.dport = tcp_hdr.dest;
|
||||
event.seq = tcp_hdr.seq;
|
||||
event.ack_seq = tcp_hdr.ack_seq;
|
||||
event.pkt_len = BPF_CORE_READ(skb, len);
|
||||
event.state = _mix->state;
|
||||
event.where = _mix->where;
|
||||
|
||||
bpf_probe_read(&tcp_hdr, sizeof(tcp_hdr), skb_transport_header(skb));
|
||||
event.latency = _mix->lat;
|
||||
event.saddr = _mix->ip_hdr->saddr;
|
||||
event.daddr = _mix->ip_hdr->daddr;
|
||||
event.sport = tcp_hdr.source;
|
||||
event.dport = tcp_hdr.dest;
|
||||
event.seq = tcp_hdr.seq;
|
||||
event.ack_seq = tcp_hdr.ack_seq;
|
||||
event.pkt_len = BPF_CORE_READ(skb, len);
|
||||
event.state = _mix->state;
|
||||
event.where = _mix->where;
|
||||
|
||||
bpf_perf_event_output(ctx, &net_recv_lat_event_map,
|
||||
COMPAT_BPF_F_CURRENT_CPU, &event,
|
||||
sizeof(struct perf_event_t));
|
||||
bpf_perf_event_output(ctx, &net_recv_lat_event_map, BPF_F_CURRENT_CPU, &event, sizeof(struct perf_event_t));
|
||||
}
|
||||
|
||||
SEC("tracepoint/net/netif_receive_skb")
|
||||
int netif_receive_skb_prog(struct trace_event_raw_net_dev_template *args)
|
||||
int netif_receive_skb_prog(struct netif_receive_skb_args *args)
|
||||
{
|
||||
struct sk_buff *skb = (struct sk_buff *)args->skbaddr;
|
||||
struct sk_buff *skb = args->skb;
|
||||
struct iphdr ip_hdr;
|
||||
u64 delta;
|
||||
u64 delta;
|
||||
|
||||
if (unlikely(BPF_CORE_READ(skb, protocol) !=
|
||||
bpf_ntohs(ETH_P_IP))) // IPv4
|
||||
if (unlikely(BPF_CORE_READ(skb, protocol) != bpf_ntohs(ETH_P_IP))) // IPv4
|
||||
return 0;
|
||||
|
||||
bpf_probe_read(&ip_hdr, sizeof(ip_hdr), skb_network_header(skb));
|
||||
if (ip_hdr.protocol != IPPROTO_TCP)
|
||||
return 0;
|
||||
if (ip_hdr.protocol != IPPROTO_TCP)
|
||||
return 0;
|
||||
|
||||
delta = delta_now_skb_tstamp(skb);
|
||||
if (delta < to_netif)
|
||||
return 0;
|
||||
delta = delta_now_skb_tstamp(skb);
|
||||
if (delta < to_netif)
|
||||
return 0;
|
||||
|
||||
fill_and_output_event(args, skb,
|
||||
&(struct mix){&ip_hdr, delta, 0, TO_NETIF_RCV});
|
||||
fill_and_output_event(args, skb, &(struct mix){&ip_hdr, delta, 0, TO_NETIF_RCV});
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("kprobe/tcp_v4_rcv")
|
||||
int tcp_v4_rcv_prog(struct pt_regs *ctx)
|
||||
{
|
||||
struct sk_buff *skb = (struct sk_buff *)PT_REGS_PARM1_CORE(ctx);
|
||||
struct sk_buff *skb = (struct sk_buff*)PT_REGS_PARM1_CORE(ctx);
|
||||
struct iphdr ip_hdr;
|
||||
u64 delta;
|
||||
u64 delta;
|
||||
|
||||
delta = delta_now_skb_tstamp(skb);
|
||||
delta = delta_now_skb_tstamp(skb);
|
||||
if (delta < to_tcpv4)
|
||||
return 0;
|
||||
return 0;
|
||||
|
||||
bpf_probe_read(&ip_hdr, sizeof(ip_hdr), skb_network_header(skb));
|
||||
fill_and_output_event(
|
||||
ctx, skb,
|
||||
&(struct mix){&ip_hdr, delta, get_state(skb), TO_TCPV4_RCV});
|
||||
bpf_probe_read(&ip_hdr, sizeof(ip_hdr), skb_network_header(skb));
|
||||
fill_and_output_event(ctx, skb, &(struct mix){&ip_hdr, delta, get_state(skb), TO_TCPV4_RCV});
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("tracepoint/skb/skb_copy_datagram_iovec")
|
||||
int skb_copy_datagram_iovec_prog(
|
||||
struct trace_event_raw_skb_copy_datagram_iovec *args)
|
||||
int skb_copy_datagram_iovec_prog(struct skb_copy_datagram_iovec_args *args)
|
||||
{
|
||||
struct sk_buff *skb = (struct sk_buff *)args->skbaddr;
|
||||
struct sk_buff *skb = args->skb;
|
||||
struct iphdr ip_hdr;
|
||||
u64 delta;
|
||||
u64 delta;
|
||||
|
||||
if (unlikely(BPF_CORE_READ(skb, protocol) != bpf_ntohs(ETH_P_IP)))
|
||||
if (unlikely(BPF_CORE_READ(skb, protocol) != bpf_ntohs(ETH_P_IP))) // IPv4
|
||||
return 0;
|
||||
|
||||
bpf_probe_read(&ip_hdr, sizeof(ip_hdr), skb_network_header(skb));
|
||||
if (ip_hdr.protocol != IPPROTO_TCP)
|
||||
return 0;
|
||||
if (ip_hdr.protocol != IPPROTO_TCP)
|
||||
return 0;
|
||||
|
||||
delta = delta_now_skb_tstamp(skb);
|
||||
delta = delta_now_skb_tstamp(skb);
|
||||
if (delta < to_user_copy)
|
||||
return 0;
|
||||
|
||||
fill_and_output_event(
|
||||
args, skb,
|
||||
&(struct mix){&ip_hdr, delta, get_state(skb), TO_USER_COPY});
|
||||
|
||||
fill_and_output_event(args, skb, &(struct mix){&ip_hdr, delta, get_state(skb), TO_USER_COPY});
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
73
bpf/oom.c
|
@ -1,59 +1,54 @@
|
|||
#include "vmlinux.h"
|
||||
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
#include "bpf_common.h"
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include "bpf_ratelimit.h"
|
||||
|
||||
char __license[] SEC("license") = "Dual MIT/GPL";
|
||||
|
||||
BPF_RATELIMIT_IN_MAP(rate, 1, COMPAT_CPU_NUM * 10000, 0);
|
||||
#define CPU_NUM 128
|
||||
BPF_RATELIMIT_IN_MAP(rate, 1, CPU_NUM * 10000, 0);
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(u32));
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(u32));
|
||||
} oom_perf_events SEC(".maps");
|
||||
|
||||
struct oom_info {
|
||||
char trigger_comm[COMPAT_TASK_COMM_LEN];
|
||||
char victim_comm[COMPAT_TASK_COMM_LEN];
|
||||
u32 trigger_pid;
|
||||
u32 victim_pid;
|
||||
u64 trigger_memcg_css;
|
||||
u64 victim_memcg_css;
|
||||
char trigger_comm[TASK_COMM_LEN];
|
||||
char victim_comm[TASK_COMM_LEN];
|
||||
u32 trigger_pid;
|
||||
u32 victim_pid;
|
||||
u64 trigger_memcg_css;
|
||||
u64 victim_memcg_css;
|
||||
};
|
||||
|
||||
SEC("kprobe/oom_kill_process")
|
||||
int kprobe_oom_kill_process(struct pt_regs *ctx)
|
||||
{
|
||||
struct oom_control *oc;
|
||||
struct oom_info info = {};
|
||||
struct task_struct *trigger_task, *victim_task;
|
||||
struct oom_control *oc;
|
||||
struct oom_info info = {};
|
||||
struct task_struct *trigger_task, *victim_task;
|
||||
|
||||
if (bpf_ratelimited_in_map(ctx, rate))
|
||||
return 0;
|
||||
if (bpf_ratelimited_in_map(ctx, rate))
|
||||
return 0;
|
||||
|
||||
oc = (void *)ctx->di;
|
||||
oc = (void *)ctx->di;
|
||||
|
||||
if (!oc)
|
||||
return 0;
|
||||
|
||||
trigger_task = (struct task_struct *)bpf_get_current_task();
|
||||
victim_task = BPF_CORE_READ(oc, chosen);
|
||||
info.trigger_pid = BPF_CORE_READ(trigger_task, pid);
|
||||
info.victim_pid = BPF_CORE_READ(victim_task, pid);
|
||||
BPF_CORE_READ_STR_INTO(&info.trigger_comm, trigger_task, comm);
|
||||
BPF_CORE_READ_STR_INTO(&info.victim_comm, victim_task, comm);
|
||||
|
||||
info.victim_memcg_css =
|
||||
(u64)BPF_CORE_READ(victim_task, cgroups, subsys[4]);
|
||||
info.trigger_memcg_css =
|
||||
(u64)BPF_CORE_READ(trigger_task, cgroups, subsys[4]);
|
||||
|
||||
bpf_perf_event_output(ctx, &oom_perf_events, COMPAT_BPF_F_CURRENT_CPU,
|
||||
&info, sizeof(info));
|
||||
return 0;
|
||||
if (!oc)
|
||||
return 0;
|
||||
trigger_task = (struct task_struct *)bpf_get_current_task();
|
||||
victim_task = BPF_CORE_READ(oc, chosen);
|
||||
info.trigger_pid = BPF_CORE_READ(trigger_task, pid);
|
||||
info.victim_pid = BPF_CORE_READ(victim_task, pid);
|
||||
BPF_CORE_READ_STR_INTO(&info.trigger_comm, trigger_task, comm);
|
||||
BPF_CORE_READ_STR_INTO(&info.victim_comm, victim_task, comm);
|
||||
|
||||
info.victim_memcg_css = (u64)BPF_CORE_READ(victim_task, cgroups, subsys[4]);
|
||||
info.trigger_memcg_css = (u64)BPF_CORE_READ(trigger_task, cgroups, subsys[4]);
|
||||
|
||||
bpf_perf_event_output(ctx, &oom_perf_events, BPF_F_CURRENT_CPU, &info, sizeof(info));
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,100 +1,99 @@
|
|||
#include "vmlinux.h"
|
||||
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include "bpf_common.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
#include "bpf_common.h"
|
||||
#include <bpf/bpf_core_read.h>
|
||||
|
||||
// defaultly, we use task_group address as key to operate map.
|
||||
#define TG_ADDR_KEY
|
||||
|
||||
#define TASK_RUNNING 0
|
||||
#define TASK_ON_RQ_QUEUED 1
|
||||
#define TASK_RUNNING 0
|
||||
#define TASK_ON_RQ_QUEUED 1
|
||||
|
||||
#define _(P) \
|
||||
({ \
|
||||
typeof(P) val = 0; \
|
||||
bpf_probe_read(&val, sizeof(val), &(P)); \
|
||||
val; \
|
||||
})
|
||||
#define _(P) \
|
||||
({ \
|
||||
typeof(P) val = 0; \
|
||||
bpf_probe_read(&val, sizeof(val), &(P)); \
|
||||
val; \
|
||||
})
|
||||
|
||||
char __license[] SEC("license") = "Dual MIT/GPL";
|
||||
|
||||
struct stat_t {
|
||||
unsigned long nvcsw; // task_group counts of voluntary context switch
|
||||
unsigned long nivcsw; // task_group counts of involuntary context switch
|
||||
unsigned long
|
||||
nlat_01; // task_group counts of sched latency range [0, 10)ms
|
||||
unsigned long
|
||||
nlat_02; // task_group counts of sched latency range [10, 20)ms
|
||||
unsigned long
|
||||
nlat_03; // task_group counts of sched latency range [20, 50)ms
|
||||
unsigned long
|
||||
nlat_04; // task_group counts of sched latency range [50, inf)ms
|
||||
unsigned long nvcsw; // task_group counts of voluntary context switch
|
||||
unsigned long nivcsw; // task_group counts of involuntary context switch
|
||||
unsigned long nlat_01; // task_group counts of sched latency range [0, 10)ms
|
||||
unsigned long nlat_02; // task_group counts of sched latency range [10, 20)ms
|
||||
unsigned long nlat_03; // task_group counts of sched latency range [20, 50)ms
|
||||
unsigned long nlat_04; // task_group counts of sched latency range [50, inf)ms
|
||||
};
|
||||
|
||||
struct g_stat_t {
|
||||
unsigned long g_nvcsw; // global counts of voluntary context switch
|
||||
unsigned long g_nvcsw; // global counts of voluntary context switch
|
||||
unsigned long g_nivcsw; // global counts of involuntary context switch
|
||||
unsigned long
|
||||
g_nlat_01; // global counts of sched latency range [0, 10)ms
|
||||
unsigned long
|
||||
g_nlat_02; // global counts of sched latency range [10, 20)ms
|
||||
unsigned long
|
||||
g_nlat_03; // global counts of sched latency range [20, 50)ms
|
||||
unsigned long
|
||||
g_nlat_04; // global counts of sched latency range [50, inf)ms
|
||||
unsigned long g_nlat_01; // global counts of sched latency range [0, 10)ms
|
||||
unsigned long g_nlat_02; // global counts of sched latency range [10, 20)ms
|
||||
unsigned long g_nlat_03; // global counts of sched latency range [20, 50)ms
|
||||
unsigned long g_nlat_04; // global counts of sched latency range [50, inf)ms
|
||||
};
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__type(key, u32);
|
||||
__type(value, u64);
|
||||
// FIXME: is 10000 enough or too large?
|
||||
__uint(max_entries, 10000);
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__type(key, u32);
|
||||
__type(value, u64);
|
||||
// FIXME: is 10000 enough or too large?
|
||||
__uint(max_entries, 10000);
|
||||
} latency SEC(".maps");
|
||||
|
||||
struct stat_t;
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
#ifdef TG_ADDR_KEY
|
||||
__type(key, u64);
|
||||
__type(key, u64);
|
||||
#else
|
||||
__type(key, u32);
|
||||
__type(key, u32);
|
||||
#endif
|
||||
__type(value, struct stat_t);
|
||||
__uint(max_entries, 10000);
|
||||
__type(value, struct stat_t);
|
||||
__uint(max_entries, 10000);
|
||||
} cpu_tg_metric SEC(".maps");
|
||||
|
||||
struct g_stat_t;
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__type(key, u32);
|
||||
__type(value, struct g_stat_t);
|
||||
// all global counts are integrated in one g_stat_t struct
|
||||
__uint(max_entries, 1);
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__type(key, u32);
|
||||
__type(value, struct g_stat_t);
|
||||
// all global counts are integrated in one g_stat_t struct
|
||||
__uint(max_entries, 1);
|
||||
} cpu_host_metric SEC(".maps");
|
||||
|
||||
// record enqueue timestamp
|
||||
static int trace_enqueue(u32 pid)
|
||||
{
|
||||
// u64 *valp;
|
||||
u64 ts;
|
||||
//u64 *valp;
|
||||
u64 ts;
|
||||
|
||||
if (pid == 0)
|
||||
return 0;
|
||||
if (pid == 0)
|
||||
return 0;
|
||||
|
||||
ts = bpf_ktime_get_ns();
|
||||
bpf_map_update_elem(&latency, &pid, &ts, COMPAT_BPF_ANY);
|
||||
ts = bpf_ktime_get_ns();
|
||||
bpf_map_update_elem(&latency, &pid, &ts, BPF_ANY);
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct sched_wakeup_new_args {
|
||||
unsigned long long pad;
|
||||
char comm[16];
|
||||
int pid;
|
||||
int prio;
|
||||
int success;
|
||||
int target_cpu;
|
||||
};
|
||||
|
||||
SEC("tracepoint/sched/sched_wakeup_new")
|
||||
int sched_wakeup_new_entry(struct trace_event_raw_sched_wakeup_template *ctx)
|
||||
int sched_wakeup_new_entry(struct sched_wakeup_new_args *ctx)
|
||||
{
|
||||
return trace_enqueue(ctx->pid);
|
||||
return trace_enqueue(ctx->pid);
|
||||
}
|
||||
|
||||
struct sched_wakeup_args {
|
||||
|
@ -107,231 +106,206 @@ struct sched_wakeup_args {
|
|||
};
|
||||
|
||||
SEC("tracepoint/sched/sched_wakeup")
|
||||
int sched_wakeup_entry(struct trace_event_raw_sched_wakeup_template *ctx)
|
||||
int sched_wakeup_entry(struct sched_wakeup_new_args *ctx)
|
||||
{
|
||||
return trace_enqueue(ctx->pid);
|
||||
}
|
||||
|
||||
struct task_struct___5_14 {
|
||||
unsigned int __state;
|
||||
} __attribute__((preserve_access_index));
|
||||
|
||||
long get_task_state(struct task_struct *task)
|
||||
{
|
||||
long state;
|
||||
|
||||
if (task == NULL)
|
||||
return -1;
|
||||
|
||||
if (bpf_core_field_exists(task->state))
|
||||
state = BPF_CORE_READ(task, state);
|
||||
else {
|
||||
struct task_struct___5_14 *task_new = (struct task_struct___5_14 *)task;
|
||||
state = (long)BPF_CORE_READ(task_new, __state);
|
||||
}
|
||||
|
||||
return state;
|
||||
return trace_enqueue(ctx->pid);
|
||||
}
|
||||
|
||||
#define NSEC_PER_MSEC 1000000L
|
||||
SEC("raw_tracepoint/sched_switch")
|
||||
int sched_switch_entry(struct bpf_raw_tracepoint_args *ctx)
|
||||
{
|
||||
u32 prev_pid, next_pid, g_key = 0;
|
||||
u64 now, *tsp, delta;
|
||||
bool is_voluntary;
|
||||
long state;
|
||||
struct stat_t *entry;
|
||||
struct g_stat_t *g_entry;
|
||||
u32 prev_pid, next_pid, g_key = 0;
|
||||
u64 now, *tsp, delta;
|
||||
bool is_voluntary;
|
||||
struct stat_t *entry;
|
||||
struct g_stat_t *g_entry;
|
||||
|
||||
// TP_PROTO(bool preempt, struct task_struct *prev, struct task_struct
|
||||
// *next)
|
||||
struct task_struct *prev = (struct task_struct *)ctx->args[1];
|
||||
struct task_struct *next = (struct task_struct *)ctx->args[2];
|
||||
// TP_PROTO(bool preempt, struct task_struct *prev, struct task_struct *next)
|
||||
struct task_struct *prev = (struct task_struct *)ctx->args[1];
|
||||
struct task_struct *next = (struct task_struct *)ctx->args[2];
|
||||
|
||||
#ifdef TG_ADDR_KEY
|
||||
// get task_group addr: task_struct->sched_task_group
|
||||
u64 key = (u64)_(prev->sched_task_group);
|
||||
// get task_group addr: task_struct->sched_task_group
|
||||
u64 key = (u64)_(prev->sched_task_group);
|
||||
#else
|
||||
// get pid ns id: task_struct->nsproxy->pid_ns_for_children->ns.inum
|
||||
u32 key = BPF_CORE_READ(prev, nsproxy, pid_ns_for_children, ns.inum);
|
||||
// get pid ns id: task_struct->nsproxy->pid_ns_for_children->ns.inum
|
||||
u32 key = BPF_CORE_READ(prev, nsproxy, pid_ns_for_children, ns.inum);
|
||||
#endif
|
||||
|
||||
state = get_task_state(prev);
|
||||
long state;
|
||||
// to avoid compilation warning, use raw interface instead of macro _()
|
||||
bpf_probe_read(&state, sizeof(long), (void *)&(prev->state));
|
||||
|
||||
// ivcsw: treat like an enqueue event and store timestamp
|
||||
prev_pid = _(prev->pid);
|
||||
if (state == TASK_RUNNING) {
|
||||
if (prev_pid != 0) {
|
||||
now = bpf_ktime_get_ns();
|
||||
bpf_map_update_elem(&latency, &prev_pid, &now,
|
||||
COMPAT_BPF_ANY);
|
||||
}
|
||||
is_voluntary = 0;
|
||||
// ivcsw: treat like an enqueue event and store timestamp
|
||||
prev_pid = _(prev->pid);
|
||||
if (state == TASK_RUNNING) {
|
||||
if (prev_pid != 0) {
|
||||
now = bpf_ktime_get_ns();
|
||||
bpf_map_update_elem(&latency, &prev_pid, &now, BPF_ANY);
|
||||
}
|
||||
is_voluntary = 0;
|
||||
} else {
|
||||
is_voluntary = 1;
|
||||
}
|
||||
|
||||
g_entry = bpf_map_lookup_elem(&cpu_host_metric, &g_key);
|
||||
if (!g_entry) {
|
||||
// init global counts map
|
||||
struct g_stat_t g_new_stat = {
|
||||
.g_nvcsw = 0,
|
||||
.g_nivcsw = 0,
|
||||
.g_nlat_01 = 0,
|
||||
.g_nlat_02 = 0,
|
||||
.g_nlat_03 = 0,
|
||||
.g_nlat_04 = 0,
|
||||
};
|
||||
bpf_map_update_elem(&cpu_host_metric, &g_key, &g_new_stat, BPF_NOEXIST);
|
||||
g_entry = bpf_map_lookup_elem(&cpu_host_metric, &g_key);
|
||||
if (!g_entry)
|
||||
return 0;
|
||||
}
|
||||
|
||||
// When use pid namespace id as key, sometimes we would encounter
|
||||
// null id because task->nsproxy is freed, usually means that this
|
||||
// task is almost dead (zombie), so ignore it.
|
||||
if (key && prev_pid) {
|
||||
entry = bpf_map_lookup_elem(&cpu_tg_metric, &key);
|
||||
if (!entry) {
|
||||
struct stat_t new_stat = {
|
||||
.nvcsw = 0,
|
||||
.nivcsw = 0,
|
||||
.nlat_01 = 0,
|
||||
.nlat_02 = 0,
|
||||
.nlat_03 = 0,
|
||||
.nlat_04 = 0,
|
||||
};
|
||||
bpf_map_update_elem(&cpu_tg_metric, &key, &new_stat, BPF_NOEXIST);
|
||||
entry = bpf_map_lookup_elem(&cpu_tg_metric, &key);
|
||||
if (!entry)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (is_voluntary) {
|
||||
__sync_fetch_and_add(&entry->nvcsw, 1);
|
||||
__sync_fetch_and_add(&g_entry->g_nvcsw, 1);
|
||||
} else {
|
||||
is_voluntary = 1;
|
||||
__sync_fetch_and_add(&entry->nivcsw, 1);
|
||||
__sync_fetch_and_add(&g_entry->g_nivcsw, 1);
|
||||
}
|
||||
}
|
||||
|
||||
g_entry = bpf_map_lookup_elem(&cpu_host_metric, &g_key);
|
||||
if (!g_entry) {
|
||||
// init global counts map
|
||||
struct g_stat_t g_new_stat = {
|
||||
.g_nvcsw = 0,
|
||||
.g_nivcsw = 0,
|
||||
.g_nlat_01 = 0,
|
||||
.g_nlat_02 = 0,
|
||||
.g_nlat_03 = 0,
|
||||
.g_nlat_04 = 0,
|
||||
};
|
||||
bpf_map_update_elem(&cpu_host_metric, &g_key, &g_new_stat,
|
||||
COMPAT_BPF_NOEXIST);
|
||||
g_entry = bpf_map_lookup_elem(&cpu_host_metric, &g_key);
|
||||
if (!g_entry)
|
||||
return 0;
|
||||
}
|
||||
//trace_sched_switch is called under prev != next, no need to check again.
|
||||
|
||||
// When use pid namespace id as key, sometimes we would encounter
|
||||
// null id because task->nsproxy is freed, usually means that this
|
||||
// task is almost dead (zombie), so ignore it.
|
||||
if (key && prev_pid) {
|
||||
entry = bpf_map_lookup_elem(&cpu_tg_metric, &key);
|
||||
if (!entry) {
|
||||
struct stat_t new_stat = {
|
||||
.nvcsw = 0,
|
||||
.nivcsw = 0,
|
||||
.nlat_01 = 0,
|
||||
.nlat_02 = 0,
|
||||
.nlat_03 = 0,
|
||||
.nlat_04 = 0,
|
||||
};
|
||||
bpf_map_update_elem(&cpu_tg_metric, &key, &new_stat,
|
||||
COMPAT_BPF_NOEXIST);
|
||||
entry = bpf_map_lookup_elem(&cpu_tg_metric, &key);
|
||||
if (!entry)
|
||||
return 0;
|
||||
}
|
||||
next_pid = _(next->pid);
|
||||
// ignore idle
|
||||
if (next_pid == 0)
|
||||
return 0;
|
||||
|
||||
if (is_voluntary) {
|
||||
__sync_fetch_and_add(&entry->nvcsw, 1);
|
||||
__sync_fetch_and_add(&g_entry->g_nvcsw, 1);
|
||||
} else {
|
||||
__sync_fetch_and_add(&entry->nivcsw, 1);
|
||||
__sync_fetch_and_add(&g_entry->g_nivcsw, 1);
|
||||
}
|
||||
}
|
||||
// fetch timestamp and calculate delta
|
||||
tsp = bpf_map_lookup_elem(&latency, &next_pid);
|
||||
if (tsp == 0 || *tsp == 0) {
|
||||
return 0; // missed enqueue
|
||||
}
|
||||
|
||||
// trace_sched_switch is called under prev != next, no need to check
|
||||
// again.
|
||||
|
||||
next_pid = _(next->pid);
|
||||
// ignore idle
|
||||
if (next_pid == 0)
|
||||
return 0;
|
||||
|
||||
// fetch timestamp and calculate delta
|
||||
tsp = bpf_map_lookup_elem(&latency, &next_pid);
|
||||
if (tsp == 0 || *tsp == 0) {
|
||||
return 0; // missed enqueue
|
||||
}
|
||||
|
||||
now = bpf_ktime_get_ns();
|
||||
delta = now - *tsp;
|
||||
bpf_map_delete_elem(&latency, &next_pid);
|
||||
now = bpf_ktime_get_ns();
|
||||
delta = now - *tsp;
|
||||
bpf_map_delete_elem(&latency, &next_pid);
|
||||
|
||||
#ifdef TG_ADDR_KEY
|
||||
key = (u64)_(next->sched_task_group);
|
||||
key = (u64)_(next->sched_task_group);
|
||||
#else
|
||||
key = BPF_CORE_READ(next, nsproxy, pid_ns_for_children, ns.inum);
|
||||
key = BPF_CORE_READ(next, nsproxy, pid_ns_for_children, ns.inum);
|
||||
#endif
|
||||
|
||||
if (key) {
|
||||
entry = bpf_map_lookup_elem(&cpu_tg_metric, &key);
|
||||
if (!entry) {
|
||||
struct stat_t new_stat = {
|
||||
.nvcsw = 0,
|
||||
.nivcsw = 0,
|
||||
.nlat_01 = 0,
|
||||
.nlat_02 = 0,
|
||||
.nlat_03 = 0,
|
||||
.nlat_04 = 0,
|
||||
};
|
||||
bpf_map_update_elem(&cpu_tg_metric, &key, &new_stat,
|
||||
COMPAT_BPF_NOEXIST);
|
||||
entry = bpf_map_lookup_elem(&cpu_tg_metric, &key);
|
||||
if (!entry)
|
||||
return 0;
|
||||
}
|
||||
if (key) {
|
||||
entry = bpf_map_lookup_elem(&cpu_tg_metric, &key);
|
||||
if (!entry) {
|
||||
struct stat_t new_stat = {
|
||||
.nvcsw = 0,
|
||||
.nivcsw = 0,
|
||||
.nlat_01 = 0,
|
||||
.nlat_02 = 0,
|
||||
.nlat_03 = 0,
|
||||
.nlat_04 = 0,
|
||||
};
|
||||
bpf_map_update_elem(&cpu_tg_metric, &key, &new_stat, BPF_NOEXIST);
|
||||
entry = bpf_map_lookup_elem(&cpu_tg_metric, &key);
|
||||
if (!entry)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (delta < 10 * NSEC_PER_MSEC) {
|
||||
__sync_fetch_and_add(&entry->nlat_01, 1);
|
||||
__sync_fetch_and_add(&g_entry->g_nlat_01, 1);
|
||||
} else if (delta < 20 * NSEC_PER_MSEC) {
|
||||
__sync_fetch_and_add(&entry->nlat_02, 1);
|
||||
__sync_fetch_and_add(&g_entry->g_nlat_02, 1);
|
||||
} else if (delta < 50 * NSEC_PER_MSEC) {
|
||||
__sync_fetch_and_add(&entry->nlat_03, 1);
|
||||
__sync_fetch_and_add(&g_entry->g_nlat_03, 1);
|
||||
} else {
|
||||
__sync_fetch_and_add(&entry->nlat_04, 1);
|
||||
__sync_fetch_and_add(&g_entry->g_nlat_04, 1);
|
||||
}
|
||||
}
|
||||
if (delta < 10 * NSEC_PER_MSEC) {
|
||||
__sync_fetch_and_add(&entry->nlat_01, 1);
|
||||
__sync_fetch_and_add(&g_entry->g_nlat_01, 1);
|
||||
} else if (delta < 20 * NSEC_PER_MSEC) {
|
||||
__sync_fetch_and_add(&entry->nlat_02, 1);
|
||||
__sync_fetch_and_add(&g_entry->g_nlat_02, 1);
|
||||
} else if (delta < 50 * NSEC_PER_MSEC) {
|
||||
__sync_fetch_and_add(&entry->nlat_03, 1);
|
||||
__sync_fetch_and_add(&g_entry->g_nlat_03, 1);
|
||||
} else {
|
||||
__sync_fetch_and_add(&entry->nlat_04, 1);
|
||||
__sync_fetch_and_add(&g_entry->g_nlat_04, 1);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("raw_tracepoint/sched_process_exit")
|
||||
int sched_process_exit_entry(struct bpf_raw_tracepoint_args *ctx)
|
||||
{
|
||||
u32 pid;
|
||||
u32 pid;
|
||||
|
||||
// TP_PROTO(struct task_struct *tsk)
|
||||
struct task_struct *p = (struct task_struct *)ctx->args[0];
|
||||
// TP_PROTO(struct task_struct *tsk)
|
||||
struct task_struct *p = (struct task_struct *)ctx->args[0];
|
||||
|
||||
pid = _(p->pid);
|
||||
/*
|
||||
* check latency table to fix latency table overflow in below scenario:
|
||||
* when wake up the target task, but the target task always running in
|
||||
* the other cpu, the target cpu will never be the next pid, because the
|
||||
* target task will be exiting, the latency item never delete.
|
||||
* To avoid latency table overflow, we should delete the latency item in
|
||||
* exit process.
|
||||
*/
|
||||
pid = _(p->pid);
|
||||
/*
|
||||
* check latency table to fix latency table overflow in below scenario:
|
||||
* when wake up the target task, but the target task always running in
|
||||
* the other cpu, the target cpu will never be the next pid, because the
|
||||
* target task will be exiting, the latency item never delete.
|
||||
* To avoid latency table overflow, we should delete the latency item in
|
||||
* exit process.
|
||||
*/
|
||||
|
||||
if (bpf_map_lookup_elem(&latency, &pid)) {
|
||||
bpf_map_delete_elem(&latency, &pid);
|
||||
}
|
||||
if (bpf_map_lookup_elem(&latency, &pid)) {
|
||||
bpf_map_delete_elem(&latency, &pid);
|
||||
}
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef TG_ADDR_KEY
|
||||
// When cgroup is removed, the record should be deleted.
|
||||
SEC("kprobe/free_fair_sched_group")
|
||||
int free_fair_sched_group_entry(struct pt_regs *ctx)
|
||||
SEC("kprobe/sched_free_group")
|
||||
int sched_free_group_entry(struct pt_regs *ctx)
|
||||
{
|
||||
struct task_group *tg = (void *)PT_REGS_PARM1(ctx);
|
||||
struct stat_t *entry;
|
||||
struct task_group *tg = (void *) PT_REGS_PARM1(ctx);
|
||||
struct stat_t *entry;
|
||||
|
||||
entry = bpf_map_lookup_elem(&cpu_tg_metric, &tg);
|
||||
if (entry)
|
||||
bpf_map_delete_elem(&cpu_tg_metric, &tg);
|
||||
entry = bpf_map_lookup_elem(&cpu_tg_metric, &tg);
|
||||
if (entry)
|
||||
bpf_map_delete_elem(&cpu_tg_metric, &tg);
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
// When pid namespace is destroyed, the record should be deleted.
|
||||
SEC("kprobe/destroy_pid_namespace")
|
||||
int destroy_pid_namespace_entry(struct pt_regs *ctx)
|
||||
{
|
||||
struct pid_namespace *ns = (void *)PT_REGS_PARM1(ctx);
|
||||
struct stat_t *entry;
|
||||
struct pid_namespace *ns = (void *) PT_REGS_PARM1(ctx);
|
||||
struct stat_t *entry;
|
||||
|
||||
// ns->ns.inum
|
||||
u32 pidns = BPF_CORE_READ(ns, ns.inum);
|
||||
entry = bpf_map_lookup_elem(&cpu_tg_metric, &pidns);
|
||||
if (entry)
|
||||
bpf_map_delete_elem(&cpu_tg_metric, &pidns);
|
||||
// ns->ns.inum
|
||||
u32 pidns = BPF_CORE_READ(ns, ns.inum);
|
||||
entry = bpf_map_lookup_elem(&cpu_tg_metric, &pidns);
|
||||
if (entry)
|
||||
bpf_map_delete_elem(&cpu_tg_metric, &pidns);
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
|
187
bpf/softirq.c
|
@ -1,71 +1,158 @@
|
|||
#include "vmlinux.h"
|
||||
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include "bpf_common.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include "bpf_ratelimit.h"
|
||||
|
||||
#include "bpf_common.h"
|
||||
char __license[] SEC("license") = "Dual MIT/GPL";
|
||||
|
||||
enum lat_zone {
|
||||
LAT_ZONE0 = 0, // 0 ~ 10us
|
||||
LAT_ZONE1, // 10us ~ 100us
|
||||
LAT_ZONE2, // 100us ~ 1ms
|
||||
LAT_ZONE3, // 1ms ~ inf
|
||||
LAT_ZONE_MAX,
|
||||
#define NR_STACK_TRACE_MAX 0x4000
|
||||
#define MSEC_PER_NSEC 1000000UL
|
||||
#define TICK_DEP_MASK_NONE 0
|
||||
#define SOFTIRQ_THRESH 5000000UL
|
||||
|
||||
volatile const u64 softirq_thresh = SOFTIRQ_THRESH;
|
||||
|
||||
#define CPU_NUM 128
|
||||
#define TICK 1000
|
||||
BPF_RATELIMIT(rate, 1, CPU_NUM * TICK * 1000);
|
||||
|
||||
struct timer_softirq_run_ts {
|
||||
u32 start_trace;
|
||||
u32 restarting_tick;
|
||||
u64 soft_ts;
|
||||
};
|
||||
|
||||
struct softirq_lat {
|
||||
u64 timestamp;
|
||||
u64 total_latency[LAT_ZONE_MAX];
|
||||
struct report_event {
|
||||
u64 stack[PERF_MAX_STACK_DEPTH];
|
||||
s64 stack_size;
|
||||
u64 now;
|
||||
u64 stall_time;
|
||||
char comm[TASK_COMM_LEN];
|
||||
u32 pid;
|
||||
u32 cpu;
|
||||
};
|
||||
|
||||
// the map for recording irq/softirq timer ts
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||
__uint(key_size, sizeof(u32));
|
||||
__uint(value_size, sizeof(struct softirq_lat));
|
||||
__uint(max_entries, NR_SOFTIRQS_MAX);
|
||||
} softirq_percpu_lats SEC(".maps");
|
||||
__uint(value_size, sizeof(struct timer_softirq_run_ts));
|
||||
__uint(max_entries, 1);
|
||||
} timerts_map SEC(".maps");
|
||||
|
||||
SEC("tracepoint/irq/softirq_raise")
|
||||
int probe_softirq_raise(struct trace_event_raw_softirq *ctx)
|
||||
// the map use for storing struct report_event memory
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||
__uint(key_size, sizeof(u32)); // key = 0
|
||||
__uint(value_size, sizeof(struct report_event));
|
||||
__uint(max_entries, 1);
|
||||
} report_map SEC(".maps");
|
||||
|
||||
// the event map use for report userspace
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(u32));
|
||||
} irqoff_event_map SEC(".maps");
|
||||
|
||||
SEC("kprobe/scheduler_tick")
|
||||
void probe_scheduler_tick(struct pt_regs *ctx)
|
||||
{
|
||||
struct softirq_lat lat = {
|
||||
.timestamp = bpf_ktime_get_ns(),
|
||||
};
|
||||
u32 vec = ctx->vec;
|
||||
// verify bpf-ratelimit
|
||||
if (bpf_ratelimited(&rate))
|
||||
return;
|
||||
|
||||
if (vec >= NR_SOFTIRQS)
|
||||
return 0;
|
||||
//update soft timer timestamps
|
||||
int key = 0;
|
||||
struct timer_softirq_run_ts *ts;
|
||||
//struct thresh_data *tdata;
|
||||
struct report_event *event;
|
||||
u64 now;
|
||||
u64 delta;
|
||||
|
||||
bpf_map_update_elem(&softirq_percpu_lats, &vec, &lat, COMPAT_BPF_ANY);
|
||||
return 0;
|
||||
}
|
||||
ts = bpf_map_lookup_elem(&timerts_map, &key);
|
||||
if (!ts)
|
||||
return;
|
||||
|
||||
SEC("tracepoint/irq/softirq_entry")
|
||||
int probe_softirq_entry(struct trace_event_raw_softirq *ctx)
|
||||
{
|
||||
struct softirq_lat *lat;
|
||||
u32 vec = ctx->vec;
|
||||
if (!ts->start_trace)
|
||||
return;
|
||||
|
||||
if (vec >= NR_SOFTIRQS)
|
||||
return 0;
|
||||
|
||||
lat = bpf_map_lookup_elem(&softirq_percpu_lats, &vec);
|
||||
if (!lat)
|
||||
return 0;
|
||||
|
||||
u64 latency = bpf_ktime_get_ns() - lat->timestamp;
|
||||
|
||||
if (latency < 10 * NSEC_PER_USEC) {
|
||||
__sync_fetch_and_add(&lat->total_latency[LAT_ZONE0], 1);
|
||||
} else if (latency < 100 * NSEC_PER_USEC) {
|
||||
__sync_fetch_and_add(&lat->total_latency[LAT_ZONE1], 1);
|
||||
} else if (latency < 1 * NSEC_PER_MSEC) {
|
||||
__sync_fetch_and_add(&lat->total_latency[LAT_ZONE2], 1);
|
||||
} else {
|
||||
__sync_fetch_and_add(&lat->total_latency[LAT_ZONE3], 1);
|
||||
//update soft timer timestamps
|
||||
if (!ts->soft_ts) {
|
||||
ts->soft_ts = bpf_ktime_get_ns();
|
||||
return;
|
||||
}
|
||||
return 0;
|
||||
|
||||
event = bpf_map_lookup_elem(&report_map, &key);
|
||||
if (!event)
|
||||
return;
|
||||
|
||||
if (ts->restarting_tick) {
|
||||
ts->restarting_tick = 0;
|
||||
ts->soft_ts = bpf_ktime_get_ns();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
now = bpf_ktime_get_ns();
|
||||
delta = now - ts->soft_ts;
|
||||
|
||||
// if delta over threshold, dump important info to user
|
||||
if (delta >= softirq_thresh) {
|
||||
event->now = now;
|
||||
event->stall_time = delta;
|
||||
__builtin_memset(event->comm, 0, sizeof(event->comm));
|
||||
bpf_get_current_comm(&event->comm, sizeof(event->comm));
|
||||
event->pid = (u32)bpf_get_current_pid_tgid();
|
||||
event->cpu = bpf_get_smp_processor_id();
|
||||
event->stack_size = bpf_get_stack(ctx, event->stack, sizeof(event->stack), 0);
|
||||
|
||||
bpf_perf_event_output(ctx, &irqoff_event_map, BPF_F_CURRENT_CPU,
|
||||
event, sizeof(struct report_event));
|
||||
}
|
||||
|
||||
// update soft_ts, use for next trace
|
||||
ts->soft_ts = now;
|
||||
}
|
||||
|
||||
char __license[] SEC("license") = "Dual MIT/GPL";
|
||||
struct tp_tick_stop {
|
||||
unsigned long pad;
|
||||
int success;
|
||||
int dependency;
|
||||
};
|
||||
|
||||
SEC("tracepoint/timer/tick_stop")
|
||||
void probe_tick_stop(struct tp_tick_stop *ctx)
|
||||
{
|
||||
struct timer_softirq_run_ts *ts;
|
||||
int key = 0;
|
||||
|
||||
ts = bpf_map_lookup_elem(&timerts_map, &key);
|
||||
if (!ts)
|
||||
return;
|
||||
|
||||
if (ctx->success == 1 && ctx->dependency == TICK_DEP_MASK_NONE) {
|
||||
ts->start_trace = 0;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
SEC("kprobe/tick_nohz_restart_sched_tick")
|
||||
void probe_tick_nohz_restart_sched_tick(struct pt_regs *ctx)
|
||||
{
|
||||
struct timer_softirq_run_ts *ts;
|
||||
int key = 0;
|
||||
u64 now;
|
||||
|
||||
ts = bpf_map_lookup_elem(&timerts_map, &key);
|
||||
if (!ts)
|
||||
return;
|
||||
|
||||
now = bpf_ktime_get_ns();
|
||||
|
||||
ts->soft_ts = now;
|
||||
ts->start_trace = 1;
|
||||
ts->restarting_tick = 1;
|
||||
}
|
||||
|
|
|
@ -1,155 +0,0 @@
|
|||
#include "vmlinux.h"
|
||||
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
#include "bpf_common.h"
|
||||
#include "bpf_ratelimit.h"
|
||||
|
||||
char __license[] SEC("license") = "Dual MIT/GPL";
|
||||
|
||||
#define NR_STACK_TRACE_MAX 0x4000
|
||||
#define MSEC_PER_NSEC 1000000UL
|
||||
#define TICK_DEP_MASK_NONE 0
|
||||
#define SOFTIRQ_THRESH 5000000UL
|
||||
|
||||
volatile const u64 softirq_thresh = SOFTIRQ_THRESH;
|
||||
|
||||
#define TICK 1000
|
||||
BPF_RATELIMIT(rate, 1, COMPAT_CPU_NUM *TICK * 1000);
|
||||
|
||||
struct timer_softirq_run_ts {
|
||||
u32 start_trace;
|
||||
u32 restarting_tick;
|
||||
u64 soft_ts;
|
||||
};
|
||||
|
||||
struct report_event {
|
||||
u64 stack[PERF_MAX_STACK_DEPTH];
|
||||
s64 stack_size;
|
||||
u64 now;
|
||||
u64 stall_time;
|
||||
char comm[COMPAT_TASK_COMM_LEN];
|
||||
u32 pid;
|
||||
u32 cpu;
|
||||
};
|
||||
|
||||
// the map for recording irq/softirq timer ts
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||
__uint(key_size, sizeof(u32));
|
||||
__uint(value_size, sizeof(struct timer_softirq_run_ts));
|
||||
__uint(max_entries, 1);
|
||||
} timerts_map SEC(".maps");
|
||||
|
||||
// the map use for storing struct report_event memory
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||
__uint(key_size, sizeof(u32)); // key = 0
|
||||
__uint(value_size, sizeof(struct report_event));
|
||||
__uint(max_entries, 1);
|
||||
} report_map SEC(".maps");
|
||||
|
||||
// the event map use for report userspace
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(u32));
|
||||
} irqoff_event_map SEC(".maps");
|
||||
|
||||
SEC("kprobe/account_process_tick")
|
||||
void probe_account_process_tick(struct pt_regs *ctx)
|
||||
{
|
||||
// verify bpf-ratelimit
|
||||
if (bpf_ratelimited(&rate))
|
||||
return;
|
||||
|
||||
// update soft timer timestamps
|
||||
int key = 0;
|
||||
struct timer_softirq_run_ts *ts;
|
||||
// struct thresh_data *tdata;
|
||||
struct report_event *event;
|
||||
u64 now;
|
||||
u64 delta;
|
||||
|
||||
ts = bpf_map_lookup_elem(&timerts_map, &key);
|
||||
if (!ts)
|
||||
return;
|
||||
|
||||
if (!ts->start_trace)
|
||||
return;
|
||||
|
||||
// update soft timer timestamps
|
||||
if (!ts->soft_ts) {
|
||||
ts->soft_ts = bpf_ktime_get_ns();
|
||||
return;
|
||||
}
|
||||
|
||||
event = bpf_map_lookup_elem(&report_map, &key);
|
||||
if (!event)
|
||||
return;
|
||||
|
||||
if (ts->restarting_tick) {
|
||||
ts->restarting_tick = 0;
|
||||
ts->soft_ts = bpf_ktime_get_ns();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
now = bpf_ktime_get_ns();
|
||||
delta = now - ts->soft_ts;
|
||||
|
||||
// if delta over threshold, dump important info to user
|
||||
if (delta >= softirq_thresh) {
|
||||
event->now = now;
|
||||
event->stall_time = delta;
|
||||
__builtin_memset(event->comm, 0, sizeof(event->comm));
|
||||
bpf_get_current_comm(&event->comm, sizeof(event->comm));
|
||||
event->pid = (u32)bpf_get_current_pid_tgid();
|
||||
event->cpu = bpf_get_smp_processor_id();
|
||||
event->stack_size =
|
||||
bpf_get_stack(ctx, event->stack, sizeof(event->stack), 0);
|
||||
|
||||
bpf_perf_event_output(ctx, &irqoff_event_map,
|
||||
COMPAT_BPF_F_CURRENT_CPU, event,
|
||||
sizeof(struct report_event));
|
||||
}
|
||||
|
||||
// update soft_ts, use for next trace
|
||||
ts->soft_ts = now;
|
||||
}
|
||||
|
||||
SEC("tracepoint/timer/tick_stop")
|
||||
void probe_tick_stop(struct trace_event_raw_tick_stop *ctx)
|
||||
{
|
||||
struct timer_softirq_run_ts *ts;
|
||||
int key = 0;
|
||||
|
||||
ts = bpf_map_lookup_elem(&timerts_map, &key);
|
||||
if (!ts)
|
||||
return;
|
||||
|
||||
if (ctx->success == 1 && ctx->dependency == TICK_DEP_MASK_NONE) {
|
||||
ts->start_trace = 0;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
SEC("kprobe/tick_nohz_restart_sched_tick")
|
||||
void probe_tick_nohz_restart_sched_tick(struct pt_regs *ctx)
|
||||
{
|
||||
struct timer_softirq_run_ts *ts;
|
||||
int key = 0;
|
||||
u64 now;
|
||||
|
||||
ts = bpf_map_lookup_elem(&timerts_map, &key);
|
||||
if (!ts)
|
||||
return;
|
||||
|
||||
now = bpf_ktime_get_ns();
|
||||
|
||||
ts->soft_ts = now;
|
||||
ts->start_trace = 1;
|
||||
ts->restarting_tick = 1;
|
||||
}
|
|
@ -1,42 +1,40 @@
|
|||
#include "vmlinux.h"
|
||||
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
#include "bpf_common.h"
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include "bpf_ratelimit.h"
|
||||
|
||||
char __license[] SEC("license") = "Dual MIT/GPL";
|
||||
|
||||
BPF_RATELIMIT_IN_MAP(rate, 1, COMPAT_CPU_NUM * 10000, 0);
|
||||
#define CPU_NUM 128
|
||||
BPF_RATELIMIT_IN_MAP(rate, 1, CPU_NUM * 10000, 0);
|
||||
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(u32));
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(u32));
|
||||
} softlockup_perf_events SEC(".maps");
|
||||
|
||||
struct softlockup_info {
|
||||
u32 cpu;
|
||||
u32 pid;
|
||||
char comm[COMPAT_TASK_COMM_LEN];
|
||||
u32 pid;
|
||||
char comm[TASK_COMM_LEN];
|
||||
};
|
||||
|
||||
SEC("kprobe/watchdog_timer_fn+442")
|
||||
int kprobe_watchdog_timer_fn(struct pt_regs *ctx)
|
||||
{
|
||||
struct softlockup_info info = {};
|
||||
struct task_struct *task;
|
||||
struct softlockup_info info = {};
|
||||
struct task_struct *task;
|
||||
|
||||
if (bpf_ratelimited_in_map(ctx, rate))
|
||||
return 0;
|
||||
|
||||
info.cpu = bpf_get_smp_processor_id();
|
||||
task = (struct task_struct *)bpf_get_current_task();
|
||||
info.pid = bpf_get_current_pid_tgid() & 0xffffffffUL;
|
||||
BPF_CORE_READ_STR_INTO(&info.comm, task, comm);
|
||||
bpf_perf_event_output(ctx, &softlockup_perf_events,
|
||||
COMPAT_BPF_F_CURRENT_CPU, &info, sizeof(info));
|
||||
return 0;
|
||||
if (bpf_ratelimited_in_map(ctx, rate))
|
||||
return 0;
|
||||
info.cpu = bpf_get_smp_processor_id();
|
||||
task = (struct task_struct *)bpf_get_current_task();
|
||||
info.pid = bpf_get_current_pid_tgid() & 0xffffffffUL;
|
||||
BPF_CORE_READ_STR_INTO(&info.comm, task, comm);
|
||||
bpf_perf_event_output(ctx, &softlockup_perf_events, BPF_F_CURRENT_CPU, &info, sizeof(info));
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -6,7 +6,7 @@ usage() {
|
|||
USAGE: clang.sh -s <source.c> -o <output.o> -I [includes] -C '[compile_options]'
|
||||
EXAMPLE:
|
||||
clang.sh -s example.bpf.c -o example.o # run preprocess, compile, and assemble steps (-C '-c')
|
||||
clang.sh -s example.bpf.c -o example.o -I include -I include/4.18.0-193.6.3.el8_2.x86_64 # specify the headers, (-C '-c')
|
||||
clang.sh -s example.bpf.c -o example.o -I include -I include/4.18.0-193.6.3.el8_2.v1.3.x86_64 # specify the headers, (-C '-c')
|
||||
clang.sh -s example.bpf.c -o example.o -C '-E' # only run the preprocessor
|
||||
clang.sh -s example.bpf.c -o example.o -C '-S' # only run preprocess and compilation steps"
|
||||
}
|
||||
|
@ -14,7 +14,7 @@ EXAMPLE:
|
|||
SRC=
|
||||
OBJ=
|
||||
INCLUDES=
|
||||
DEFAULT_INCLUDES="-I include -I include/4.18.0-193.6.3.el8_2.x86_64"
|
||||
DEFAULT_INCLUDES="-I include -I include/4.18.0-193.6.3.el8_2.v1.2.x86_64"
|
||||
COMPILE_OPTIONS=
|
||||
DEFAULT_COMPILE_OPTIONS="-Wall -O2 -g -target bpf -D__TARGET_ARCH_x86 -mcpu=v1 -c"
|
||||
|
||||
|
|
|
@ -12,7 +12,4 @@ ELASTICSEARCH_HOST='localhost'
|
|||
PROMETHEUS_VERSION=v2.53.3 # LTS v2.53
|
||||
|
||||
# Grafana
|
||||
GRAFANA_VERSION=11.0.0
|
||||
|
||||
# Run huatuo-bamai
|
||||
RUN_PATH=/home/huatuo-bamai
|
||||
GRAFANA_VERSION=11.0.0
|
|
@ -0,0 +1,34 @@
|
|||
ARG BUILD_PATH=/go/huatuo-bamai
|
||||
ARG RUN_PATH=/home/huatuo-bamai
|
||||
|
||||
# https://hub.docker.com/_/golang/tags?name=1.22.4
|
||||
FROM golang:1.22.4-alpine AS base
|
||||
|
||||
# Install dependencies for build
|
||||
RUN apk add --no-cache \
|
||||
make \
|
||||
clang15 \
|
||||
libbpf-dev \
|
||||
bpftool \
|
||||
curl && \
|
||||
bpftool btf dump file /sys/kernel/btf/vmlinux format c > bpf/include/vmlinux.h
|
||||
ENV PATH=$PATH:/usr/lib/llvm15/bin
|
||||
|
||||
# Build huatuo
|
||||
FROM base AS build
|
||||
ARG BUILD_PATH
|
||||
WORKDIR ${BUILD_PATH}
|
||||
COPY . .
|
||||
RUN make
|
||||
|
||||
# Release huatuo image
|
||||
FROM base AS run
|
||||
ARG BUILD_PATH
|
||||
ARG RUN_PATH
|
||||
WORKDIR ${RUN_PATH}
|
||||
COPY --from=build \
|
||||
${BUILD_PATH}/_output ./_output
|
||||
COPY --from=build \
|
||||
${BUILD_PATH}/huatuo-bamai.conf .
|
||||
|
||||
CMD ["/run.sh"]
|
|
@ -30,22 +30,19 @@ services:
|
|||
- elasticsearch
|
||||
|
||||
huatuo-bamai:
|
||||
image: huatuo/huatuo-bamai:latest
|
||||
build:
|
||||
context: ./../../ # compile required in Dockerfile
|
||||
dockerfile: ./build/docker/Dockerfile
|
||||
container_name: huatuo-bamai
|
||||
network_mode: host
|
||||
cgroup: host
|
||||
privileged: true
|
||||
environment:
|
||||
ELASTICSEARCH_HOST: ${ELASTICSEARCH_HOST:-}
|
||||
ELASTIC_PASSWORD: ${ELASTIC_PASSWORD:-}
|
||||
RUN_PATH: ${RUN_PATH:-}
|
||||
volumes:
|
||||
- /sys:/sys:rw
|
||||
- /run:/run:rw
|
||||
- ../../huatuo-bamai.conf:${RUN_PATH}/huatuo-bamai.conf:rw
|
||||
- ./run.sh:${RUN_PATH}/run.sh:ro
|
||||
command: ["./run.sh"]
|
||||
- ./run.sh:/run.sh:ro
|
||||
- /sys/kernel:/sys/kernel
|
||||
depends_on:
|
||||
- elasticsearch
|
||||
- prometheus
|
||||
- grafana
|
||||
- grafana
|
|
@ -3,9 +3,8 @@
|
|||
ELASTICSEARCH_HOST=${ELASTICSEARCH_HOST:-localhost}
|
||||
ELASTIC_PASSWORD=${ELASTIC_PASSWORD:-huatuo-bamai}
|
||||
|
||||
RUN_PATH=${RUN_PATH:-/home/huatuo-bamai}
|
||||
|
||||
# Wait for Elasticsearch to be ready
|
||||
# ref: https://github.com/deviantony/docker-elk/blob/main/setup/entrypoint.sh
|
||||
wait_for_elasticsearch() {
|
||||
args="-s -D- -m15 -w '%{http_code}' http://${ELASTICSEARCH_HOST}:9200/"
|
||||
if [ -n "${ELASTIC_PASSWORD}" ]; then
|
||||
|
@ -39,31 +38,34 @@ wait_for_elasticsearch() {
|
|||
echo "$output" | head -c -3
|
||||
fi
|
||||
|
||||
if [ $result -ne 0 ]; then
|
||||
case $result in
|
||||
6)
|
||||
echo 'Could not resolve host. Is Elasticsearch running?'
|
||||
;;
|
||||
7)
|
||||
echo 'Failed to connect to host. Is Elasticsearch healthy?'
|
||||
;;
|
||||
28)
|
||||
echo 'Timeout connecting to host. Is Elasticsearch healthy?'
|
||||
;;
|
||||
*)
|
||||
echo "Connection to Elasticsearch failed. Exit code: ${result}"
|
||||
;;
|
||||
esac
|
||||
|
||||
exit $result
|
||||
fi
|
||||
return $result
|
||||
}
|
||||
|
||||
exit_code=0
|
||||
wait_for_elasticsearch || exit_code=$?
|
||||
if [ $exit_code -ne 0 ]; then
|
||||
case $exit_code in
|
||||
6)
|
||||
echo 'Could not resolve host. Is Elasticsearch running?'
|
||||
;;
|
||||
7)
|
||||
echo 'Failed to connect to host. Is Elasticsearch healthy?'
|
||||
;;
|
||||
28)
|
||||
echo 'Timeout connecting to host. Is Elasticsearch healthy?'
|
||||
;;
|
||||
*)
|
||||
echo "Connection to Elasticsearch failed. Exit code: ${exit_code}"
|
||||
;;
|
||||
esac
|
||||
|
||||
exit $exit_code
|
||||
fi
|
||||
|
||||
# Waiting for initialization of Elasticsearch built-in users
|
||||
sleep 5
|
||||
|
||||
wait_for_elasticsearch
|
||||
sleep 5 # Waiting for initialization of Elasticsearch built-in users
|
||||
echo "Elasticsearch is ready."
|
||||
|
||||
# Run huatuo-bamai
|
||||
cd $RUN_PATH
|
||||
exec ./huatuo-bamai --region example --config huatuo-bamai.conf
|
||||
exec _output/bin/huatuo-bamai --region example --config huatuo-bamai.conf
|
|
@ -26,12 +26,12 @@ import (
|
|||
_ "huatuo-bamai/core/events"
|
||||
_ "huatuo-bamai/core/metrics"
|
||||
"huatuo-bamai/internal/bpf"
|
||||
"huatuo-bamai/internal/cgroups"
|
||||
"huatuo-bamai/internal/conf"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/pod"
|
||||
"huatuo-bamai/internal/services"
|
||||
"huatuo-bamai/internal/storage"
|
||||
"huatuo-bamai/internal/utils/cgrouputil"
|
||||
"huatuo-bamai/internal/utils/pidutil"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
|
||||
|
@ -49,26 +49,13 @@ func mainAction(ctx *cli.Context) error {
|
|||
defer pidutil.RemovePidFile(ctx.App.Name)
|
||||
|
||||
// init cpu quota
|
||||
cgr, err := cgroups.NewCgroupManager()
|
||||
host, err := cgrouputil.NewRuntimeCgroup(ctx.App.Name,
|
||||
conf.Get().RuntimeCgroup.LimitInitCPU,
|
||||
conf.Get().RuntimeCgroup.LimitMem)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := cgr.NewRuntime(ctx.App.Name,
|
||||
cgroups.ToSpec(
|
||||
conf.Get().RuntimeCgroup.LimitInitCPU,
|
||||
conf.Get().RuntimeCgroup.LimitMem,
|
||||
),
|
||||
); err != nil {
|
||||
return fmt.Errorf("new runtime cgroup: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
_ = cgr.DeleteRuntime()
|
||||
}()
|
||||
|
||||
if err := cgr.AddProc(uint64(os.Getpid())); err != nil {
|
||||
return fmt.Errorf("cgroup add pid to cgroups.proc")
|
||||
return fmt.Errorf("new cgroup: %w", err)
|
||||
}
|
||||
defer host.Delete()
|
||||
|
||||
// initialize the storage clients.
|
||||
storageInitCtx := storage.InitContext{
|
||||
|
@ -95,24 +82,8 @@ func mainAction(ctx *cli.Context) error {
|
|||
return fmt.Errorf("init pod cgroup metadata: %w", err)
|
||||
}
|
||||
|
||||
podListInitCtx := pod.PodContainerInitCtx{
|
||||
PodListReadOnlyPort: conf.Get().Pod.KubeletPodListURL,
|
||||
PodListAuthorizedPort: conf.Get().Pod.KubeletPodListHTTPSURL,
|
||||
PodClientCertPath: conf.Get().Pod.KubeletPodClientCertPath,
|
||||
PodCACertPath: conf.Get().Pod.KubeletPodCACertPath,
|
||||
}
|
||||
|
||||
if err := pod.ContainerPodMgrInit(&podListInitCtx); err != nil {
|
||||
return fmt.Errorf("init podlist and sync module: %w", err)
|
||||
}
|
||||
|
||||
blacklisted := conf.Get().Blacklist
|
||||
prom, err := InitMetricsCollector(blacklisted, conf.Region)
|
||||
if err != nil {
|
||||
return fmt.Errorf("InitMetricsCollector: %w", err)
|
||||
}
|
||||
|
||||
mgr, err := tracing.NewMgrTracingEvent(blacklisted)
|
||||
blackListed := conf.Get().Tracing.BlackList
|
||||
mgr, err := tracing.NewMgrTracingEvent(blackListed)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -121,12 +92,18 @@ func mainAction(ctx *cli.Context) error {
|
|||
return err
|
||||
}
|
||||
|
||||
prom, err := InitMetricsCollector(blackListed)
|
||||
if err != nil {
|
||||
return fmt.Errorf("InitMetricsCollector: %w", err)
|
||||
}
|
||||
|
||||
log.Infof("Initialize the Metrics collector: %v", prom)
|
||||
|
||||
services.Start(conf.Get().APIServer.TCPAddr, mgr, prom)
|
||||
|
||||
// update cpu quota
|
||||
if err := cgr.UpdateRuntime(cgroups.ToSpec(conf.Get().RuntimeCgroup.LimitCPU, 0)); err != nil {
|
||||
return fmt.Errorf("update runtime: %w", err)
|
||||
if err := host.UpdateCPU(conf.Get().RuntimeCgroup.LimitCPU); err != nil {
|
||||
return fmt.Errorf("cg update cpu: %w", err)
|
||||
}
|
||||
|
||||
waitExit := make(chan os.Signal, 1)
|
||||
|
@ -137,7 +114,6 @@ func mainAction(ctx *cli.Context) error {
|
|||
case syscall.SIGQUIT, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM:
|
||||
log.Infof("huatuo-bamai exit by signal %d", s)
|
||||
bpf.CloseBpfManager()
|
||||
pod.ContainerPodMgrClose()
|
||||
return nil
|
||||
case syscall.SIGUSR1:
|
||||
return nil
|
||||
|
@ -189,7 +165,7 @@ func main() {
|
|||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "disable-tracing",
|
||||
Usage: "disable tracing. This is related to Blacklist in config, and complement each other",
|
||||
Usage: "disable tracing. This is related to TracerConfig.BlackList in config, and complement each other",
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "log-debug",
|
||||
|
@ -225,11 +201,11 @@ func main() {
|
|||
// tracer
|
||||
disabledTracing := ctx.StringSlice("disable-tracing")
|
||||
if len(disabledTracing) > 0 {
|
||||
definedTracers := conf.Get().Blacklist
|
||||
definedTracers := conf.Get().Tracing.BlackList
|
||||
definedTracers = append(definedTracers, disabledTracing...)
|
||||
|
||||
conf.Set("Blacklist", definedTracers)
|
||||
log.Infof("The tracer black list by cli: %v", conf.Get().Blacklist)
|
||||
conf.Set("TracerConfig.BlackList", definedTracers)
|
||||
log.Infof("The tracer black list by cli: %v", conf.Get().Tracing.BlackList)
|
||||
}
|
||||
|
||||
if ctx.Bool("log-debug") {
|
||||
|
|
|
@ -26,8 +26,8 @@ import (
|
|||
var promNamespace = "huatuo_bamai"
|
||||
|
||||
// InitMetricsCollector creates a new MetricsCollector instance.
|
||||
func InitMetricsCollector(blackListed []string, region string) (*prometheus.Registry, error) {
|
||||
nc, err := metric.NewCollectorManager(blackListed, region)
|
||||
func InitMetricsCollector(blackListed []string) (*prometheus.Registry, error) {
|
||||
nc, err := metric.NewCollectorManager(blackListed)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("create collector: %w", err)
|
||||
}
|
||||
|
|
|
@ -20,17 +20,18 @@ import (
|
|||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/internal/cgroups"
|
||||
"huatuo-bamai/internal/conf"
|
||||
"huatuo-bamai/internal/flamegraph"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/pod"
|
||||
"huatuo-bamai/internal/storage"
|
||||
"huatuo-bamai/internal/utils/cgrouputil"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
"huatuo-bamai/pkg/types"
|
||||
)
|
||||
|
@ -89,22 +90,36 @@ func readIntFromFile(filePath string) (int, error) {
|
|||
}
|
||||
|
||||
func readCPUUsage(path string) (map[string]uint64, error) {
|
||||
// FIXME!!!
|
||||
cgr, err := cgroups.NewCgroupManager()
|
||||
cpuacctPath := path + "/cpuacct.stat"
|
||||
output, err := os.ReadFile(cpuacctPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
usage, err := cgr.CpuUsage(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cpuUsage := make(map[string]uint64)
|
||||
lines := strings.Split(string(output), "\n")
|
||||
for _, line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
return map[string]uint64{
|
||||
"user": usage.User,
|
||||
"system": usage.System,
|
||||
"total": uint64(time.Now().UnixNano()),
|
||||
}, nil
|
||||
parts := strings.Fields(line)
|
||||
if len(parts) != 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
key := parts[0]
|
||||
valueStr := parts[1]
|
||||
value, err := strconv.ParseUint(valueStr, 10, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cpuUsage[key] = value
|
||||
}
|
||||
cpuUsage["total"] = uint64(time.Now().UnixNano())
|
||||
return cpuUsage, nil
|
||||
}
|
||||
|
||||
// UserHZtons because kernel USER_HZ = 100, the default value set to 10,000,000
|
||||
|
@ -156,7 +171,7 @@ func updateCPUIdleIDMap(m cpuIdleIDMap) error {
|
|||
for _, container := range containers {
|
||||
_, ok := m[container.ID]
|
||||
if ok {
|
||||
m[container.ID].path = container.CgroupSuffix
|
||||
m[container.ID].path = filepath.Join(cgrouputil.V1CpuPath(), container.CgroupSuffix)
|
||||
m[container.ID].alive = true
|
||||
} else {
|
||||
temp := &containerCPUInfo{
|
||||
|
@ -176,7 +191,7 @@ func updateCPUIdleIDMap(m cpuIdleIDMap) error {
|
|||
deltaUser: 0,
|
||||
deltaSys: 0,
|
||||
timestamp: 0,
|
||||
path: container.CgroupSuffix,
|
||||
path: filepath.Join(cgrouputil.V1CpuPath(), container.CgroupSuffix),
|
||||
alive: true,
|
||||
}
|
||||
m[container.ID] = temp
|
||||
|
|
|
@ -15,22 +15,24 @@
|
|||
package autotracing
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/internal/cgroups"
|
||||
"huatuo-bamai/internal/cgroups/paths"
|
||||
"huatuo-bamai/internal/conf"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/pod"
|
||||
"huatuo-bamai/internal/storage"
|
||||
"huatuo-bamai/internal/utils/cgrouputil"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
"huatuo-bamai/pkg/types"
|
||||
|
||||
cadvisorV1 "github.com/google/cadvisor/info/v1"
|
||||
"github.com/google/cadvisor/utils/cpuload/netlink"
|
||||
"github.com/prometheus/procfs"
|
||||
"github.com/shirou/gopsutil/process"
|
||||
|
@ -60,6 +62,7 @@ type containerDloadInfo struct {
|
|||
}
|
||||
|
||||
type DloadTracingData struct {
|
||||
Avg float64 `json:"avg"`
|
||||
Threshold float64 `json:"threshold"`
|
||||
NrSleeping uint64 `json:"nr_sleeping"`
|
||||
NrRunning uint64 `json:"nr_running"`
|
||||
|
@ -73,34 +76,142 @@ type DloadTracingData struct {
|
|||
Stack string `json:"stack"`
|
||||
}
|
||||
|
||||
const (
|
||||
taskHostType = 1
|
||||
taskCgroupType = 2
|
||||
)
|
||||
|
||||
const debugDload = false
|
||||
|
||||
type containersDloadMap map[string]*containerDloadInfo
|
||||
|
||||
var containersDloads = make(containersDloadMap)
|
||||
|
||||
func updateContainersDload() error {
|
||||
containers, err := pod.GetAllContainers()
|
||||
func getStack(targetPid int32) (string, error) {
|
||||
procStack := "/proc/" + strconv.Itoa(int(targetPid)) + "/stack"
|
||||
content, err := os.ReadFile(procStack)
|
||||
if err != nil {
|
||||
return err
|
||||
log.Infof("%v", err)
|
||||
return "", err
|
||||
}
|
||||
|
||||
for _, container := range containers {
|
||||
if _, ok := containersDloads[container.ID]; ok {
|
||||
containersDloads[container.ID].name = container.CgroupSuffix
|
||||
containersDloads[container.ID].path = paths.Path("cpu", container.CgroupSuffix)
|
||||
containersDloads[container.ID].container = container
|
||||
containersDloads[container.ID].alive = true
|
||||
return string(content), nil
|
||||
}
|
||||
|
||||
const (
|
||||
isHost = 1
|
||||
isCgrp = 2
|
||||
)
|
||||
|
||||
func getUnTaskList(cgrpPath string, infoType int) ([]int32, error) {
|
||||
var pidList []int32
|
||||
var err error
|
||||
|
||||
if infoType == isCgrp {
|
||||
taskPath := cgrpPath + "/tasks"
|
||||
|
||||
tskfi, err := os.Open(taskPath)
|
||||
if err != nil {
|
||||
log.Infof("%v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
r := bufio.NewReader(tskfi)
|
||||
|
||||
for {
|
||||
lineBytes, err := r.ReadBytes('\n')
|
||||
line := strings.TrimSpace(string(lineBytes))
|
||||
if err != nil && err != io.EOF {
|
||||
log.Infof("fail to read tasklist: %v", err)
|
||||
break
|
||||
}
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
|
||||
pid, _ := strconv.ParseInt(line, 10, 32)
|
||||
pidList = append(pidList, int32(pid))
|
||||
}
|
||||
} else {
|
||||
procs, err := procfs.AllProcs()
|
||||
if err != nil {
|
||||
log.Infof("%v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, p := range procs {
|
||||
pidList = append(pidList, int32(p.PID))
|
||||
}
|
||||
}
|
||||
|
||||
return pidList, err
|
||||
}
|
||||
|
||||
func dumpUnTaskStack(tskList []int32, dumpType int) (string, error) {
|
||||
var infoTitle string
|
||||
var getValidStackinfo bool = false
|
||||
var strResult string = ""
|
||||
|
||||
stackInfo := new(bytes.Buffer)
|
||||
|
||||
switch dumpType {
|
||||
case isHost:
|
||||
infoTitle = "\nbacktrace of D process in Host:\n"
|
||||
case isCgrp:
|
||||
infoTitle = "\nbacktrace of D process in Cgroup:\n"
|
||||
}
|
||||
|
||||
for _, pid := range tskList {
|
||||
proc, err := process.NewProcess(pid)
|
||||
if err != nil {
|
||||
log.Debugf("fail to get process %d: %v", pid, err)
|
||||
continue
|
||||
}
|
||||
|
||||
containersDloads[container.ID] = &containerDloadInfo{
|
||||
path: paths.Path("cpu", container.CgroupSuffix),
|
||||
status, err := proc.Status()
|
||||
if err != nil {
|
||||
log.Debugf("fail to get status %d: %v", pid, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if status == "D" || status == "U" {
|
||||
comm, err := proc.Name()
|
||||
if err != nil {
|
||||
log.Infof("%v", err)
|
||||
continue
|
||||
}
|
||||
stack, err := getStack(pid)
|
||||
if err != nil {
|
||||
log.Infof("%v", err)
|
||||
continue
|
||||
}
|
||||
if stack == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
fmt.Fprintf(stackInfo, "Comm: %s\tPid: %d\n%s\n", comm, pid, stack)
|
||||
getValidStackinfo = true
|
||||
}
|
||||
}
|
||||
|
||||
if getValidStackinfo {
|
||||
strResult = fmt.Sprintf("%s%s", infoTitle, stackInfo)
|
||||
}
|
||||
|
||||
return strResult, nil
|
||||
}
|
||||
|
||||
// dloadIDMap is the container information
|
||||
type dloadIDMap map[string]*containerDloadInfo
|
||||
|
||||
var dloadIdMap = make(dloadIDMap)
|
||||
|
||||
func updateIDMap(m dloadIDMap) error {
|
||||
containers, err := pod.GetAllContainers()
|
||||
if err != nil {
|
||||
return fmt.Errorf("GetAllContainers: %w", err)
|
||||
}
|
||||
|
||||
for _, container := range containers {
|
||||
if _, ok := m[container.ID]; ok {
|
||||
m[container.ID].name = container.CgroupSuffix
|
||||
m[container.ID].path = cgrouputil.NewCPU().Path(container.CgroupSuffix)
|
||||
m[container.ID].container = container
|
||||
m[container.ID].alive = true
|
||||
continue
|
||||
}
|
||||
|
||||
m[container.ID] = &containerDloadInfo{
|
||||
path: cgrouputil.NewCPU().Path(container.CgroupSuffix),
|
||||
name: container.CgroupSuffix,
|
||||
container: container,
|
||||
alive: true,
|
||||
|
@ -110,92 +221,6 @@ func updateContainersDload() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func detectDloadContainer(thresh float64, interval int) (*containerDloadInfo, cadvisorV1.LoadStats, error) {
|
||||
empty := cadvisorV1.LoadStats{}
|
||||
|
||||
n, err := netlink.New()
|
||||
if err != nil {
|
||||
return nil, empty, err
|
||||
}
|
||||
defer n.Stop()
|
||||
|
||||
for containerId, dload := range containersDloads {
|
||||
if !dload.alive {
|
||||
delete(containersDloads, containerId)
|
||||
} else {
|
||||
dload.alive = false
|
||||
|
||||
timeStart := dload.container.StartedAt.Add(time.Second * time.Duration(interval))
|
||||
if time.Now().Before(timeStart) {
|
||||
log.Debugf("%s were just started, we'll start monitoring it later.", dload.container.Hostname)
|
||||
continue
|
||||
}
|
||||
|
||||
stats, err := n.GetCpuLoad(dload.name, dload.path)
|
||||
if err != nil {
|
||||
log.Debugf("failed to get %s load, probably the container has been deleted: %s", dload.container.Hostname, err)
|
||||
continue
|
||||
}
|
||||
|
||||
updateLoad(dload, stats.NrRunning, stats.NrUninterruptible)
|
||||
|
||||
if dload.loaduni[0] > thresh || debugDload {
|
||||
log.Infof("dload event: Threshold=%0.2f %+v, LoadAvg=%0.2f, DLoadAvg=%0.2f",
|
||||
thresh, stats, dload.load[0], dload.loaduni[0])
|
||||
|
||||
return dload, stats, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil, empty, fmt.Errorf("no dload containers")
|
||||
}
|
||||
|
||||
func buildAndSaveDloadContainer(thresh float64, container *containerDloadInfo, loadstat cadvisorV1.LoadStats) error {
|
||||
cgrpPath := container.name
|
||||
containerID := container.container.ID
|
||||
containerHostNamespace := container.container.LabelHostNamespace()
|
||||
|
||||
stackCgrp, err := dumpUninterruptibleTaskStack(taskCgroupType, cgrpPath, debugDload)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if stackCgrp == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
stackHost, err := dumpUninterruptibleTaskStack(taskHostType, "", debugDload)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
data := &DloadTracingData{
|
||||
NrSleeping: loadstat.NrSleeping,
|
||||
NrRunning: loadstat.NrRunning,
|
||||
NrStopped: loadstat.NrStopped,
|
||||
NrUninterruptible: loadstat.NrUninterruptible,
|
||||
NrIoWait: loadstat.NrIoWait,
|
||||
LoadAvg: container.load[0],
|
||||
DLoadAvg: container.loaduni[0],
|
||||
Threshold: thresh,
|
||||
Stack: fmt.Sprintf("%s%s", stackCgrp, stackHost),
|
||||
}
|
||||
|
||||
// Check if this is caused by known issues.
|
||||
knownIssue, inKnownList := conf.KnownIssueSearch(stackCgrp, containerHostNamespace, "")
|
||||
if knownIssue != "" {
|
||||
data.KnowIssue = knownIssue
|
||||
data.InKnownList = inKnownList
|
||||
} else {
|
||||
data.KnowIssue = "none"
|
||||
data.InKnownList = inKnownList
|
||||
}
|
||||
|
||||
storage.Save("dload", containerID, time.Now(), data)
|
||||
return nil
|
||||
}
|
||||
|
||||
const (
|
||||
fShift = 11
|
||||
fixed1 = 1 << fShift
|
||||
|
@ -258,109 +283,145 @@ func updateLoad(info *containerDloadInfo, nrRunning, nrUninterruptible uint64) {
|
|||
info.loaduni = getAvenrun(info.avgnuni, fixed1/200, 0)
|
||||
}
|
||||
|
||||
func pidStack(pid int32) string {
|
||||
data, _ := os.ReadFile(fmt.Sprintf("/proc/%d/stack", pid))
|
||||
return string(data)
|
||||
}
|
||||
func detect(ctx context.Context) (*containerDloadInfo, string, *DloadTracingData, error) {
|
||||
var caseData DloadTracingData
|
||||
|
||||
func cgroupHostTasks(where int, path string) ([]int32, error) {
|
||||
switch where {
|
||||
case taskCgroupType:
|
||||
cgroup, err := cgroups.NewCgroupManager()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return cgroup.Pids(path)
|
||||
case taskHostType:
|
||||
var pidList []int32
|
||||
|
||||
procs, err := procfs.AllProcs()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, p := range procs {
|
||||
pidList = append(pidList, int32(p.PID))
|
||||
}
|
||||
return pidList, err
|
||||
default:
|
||||
return nil, fmt.Errorf("type not supported")
|
||||
}
|
||||
}
|
||||
|
||||
func dumpUninterruptibleTaskStack(where int, path string, all bool) (string, error) {
|
||||
var appended bool = false
|
||||
|
||||
stacks := new(bytes.Buffer)
|
||||
|
||||
tasks, err := cgroupHostTasks(where, path)
|
||||
n, err := netlink.New()
|
||||
if err != nil {
|
||||
return "", err
|
||||
log.Infof("Failed to create cpu load util: %s", err)
|
||||
return nil, "", nil, err
|
||||
}
|
||||
defer n.Stop()
|
||||
|
||||
for _, pid := range tasks {
|
||||
proc, err := process.NewProcess(pid)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
dloadThresh := conf.Get().Tracing.Dload.ThresholdLoad
|
||||
monitorGap := conf.Get().Tracing.Dload.MonitorGap
|
||||
|
||||
status, err := proc.Status()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if status == "D" || status == "U" || all {
|
||||
comm, err := proc.Name()
|
||||
if err != nil {
|
||||
continue
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil, "", nil, types.ErrExitByCancelCtx
|
||||
default:
|
||||
if err := updateIDMap(dloadIdMap); err != nil {
|
||||
return nil, "", nil, err
|
||||
}
|
||||
stack := pidStack(pid)
|
||||
if stack == "" {
|
||||
continue
|
||||
for k, v := range dloadIdMap {
|
||||
if !v.alive {
|
||||
delete(dloadIdMap, k)
|
||||
} else {
|
||||
v.alive = false
|
||||
|
||||
timeStartMonitor := v.container.StartedAt.Add(time.Second * time.Duration(monitorGap))
|
||||
|
||||
if time.Now().Before(timeStartMonitor) {
|
||||
log.Debugf("%s were just started, we'll start monitoring it later.", v.container.Hostname)
|
||||
continue
|
||||
}
|
||||
|
||||
stats, err := n.GetCpuLoad(v.name, v.path)
|
||||
if err != nil {
|
||||
log.Debugf("failed to get %s load, probably the container has been deleted: %s", v.container.Hostname, err)
|
||||
continue
|
||||
}
|
||||
|
||||
updateLoad(v, stats.NrRunning, stats.NrUninterruptible)
|
||||
|
||||
if v.loaduni[0] > dloadThresh {
|
||||
logTitle := fmt.Sprintf("Avg=%0.2f Threshold=%0.2f %+v ", v.loaduni[0], dloadThresh, stats)
|
||||
logBody := fmt.Sprintf("LoadAvg=%0.2f, DLoadAvg=%0.2f", v.load[0], v.loaduni[0])
|
||||
logLoad := fmt.Sprintf("%s%s", logTitle, logBody)
|
||||
|
||||
log.Infof("dload event %s", logLoad)
|
||||
|
||||
caseData.Avg = v.loaduni[0]
|
||||
caseData.Threshold = dloadThresh
|
||||
caseData.NrSleeping = stats.NrSleeping
|
||||
caseData.NrRunning = stats.NrRunning
|
||||
caseData.NrStopped = stats.NrStopped
|
||||
caseData.NrUninterruptible = stats.NrUninterruptible
|
||||
caseData.NrIoWait = stats.NrIoWait
|
||||
caseData.LoadAvg = v.load[0]
|
||||
caseData.DLoadAvg = v.loaduni[0]
|
||||
|
||||
return v, logLoad, &caseData, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Fprintf(stacks, "Comm: %s\tPid: %d\n%s\n", comm, pid, stack)
|
||||
appended = true
|
||||
time.Sleep(5 * time.Second)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if appended {
|
||||
title := "\nstacktrace of D task in cgroup:\n"
|
||||
if where == taskHostType {
|
||||
title = "\nstacktrace of D task in host:\n"
|
||||
}
|
||||
func dumpInfo(info *containerDloadInfo, logLoad string, caseData *DloadTracingData) error {
|
||||
var tskList []int32
|
||||
var err error
|
||||
var stackCgrp string
|
||||
var stackHost string
|
||||
var containerHostNamespace string
|
||||
|
||||
return fmt.Sprintf("%s%s", title, stacks), nil
|
||||
cgrpPath := info.path
|
||||
containerID := info.container.ID
|
||||
containerHostNamespace = info.container.LabelHostNamespace()
|
||||
|
||||
tskList, err = getUnTaskList(cgrpPath, isCgrp)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get cgroup task list: %w", err)
|
||||
}
|
||||
|
||||
return "", nil
|
||||
stackCgrp, err = dumpUnTaskStack(tskList, isCgrp)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to dump cgroup task backtrace: %w", err)
|
||||
}
|
||||
|
||||
tskList, err = getUnTaskList("", isHost)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get host task list: %w", err)
|
||||
}
|
||||
|
||||
stackHost, err = dumpUnTaskStack(tskList, isHost)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to dump host task backtrace: %w", err)
|
||||
}
|
||||
|
||||
// We'll not record it if got no cgroup stack info.
|
||||
if stackCgrp == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check if this is caused by known issues.
|
||||
knownIssue, inKnownList := conf.KnownIssueSearch(stackCgrp, containerHostNamespace, "")
|
||||
if knownIssue != "" {
|
||||
caseData.KnowIssue = knownIssue
|
||||
caseData.InKnownList = inKnownList
|
||||
} else {
|
||||
caseData.KnowIssue = "none"
|
||||
caseData.InKnownList = inKnownList
|
||||
}
|
||||
|
||||
// save storage
|
||||
caseData.Stack = fmt.Sprintf("%s%s", stackCgrp, stackHost)
|
||||
storage.Save("ctnDLoad", containerID, time.Now(), caseData)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type dloadTracing struct{}
|
||||
|
||||
// Start detect work, monitor the load of containers
|
||||
func (c *dloadTracing) Start(ctx context.Context) error {
|
||||
thresh := conf.Get().Tracing.Dload.ThresholdLoad
|
||||
interval := conf.Get().Tracing.Dload.MonitorGap
|
||||
cntInfo, logLoad, caseData, err := detect(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return types.ErrExitByCancelCtx
|
||||
default:
|
||||
time.Sleep(5 * time.Second)
|
||||
|
||||
if err := updateContainersDload(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
container, loadstat, err := detectDloadContainer(thresh, interval)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
_ = buildAndSaveDloadContainer(thresh, container, loadstat)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.Infof("caller requests stop !!!")
|
||||
default:
|
||||
err = dumpInfo(cntInfo, logLoad, caseData)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to dump info: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -239,7 +239,11 @@ func (c *memBurstTracing) Start(ctx context.Context) error {
|
|||
|
||||
lastReportTime = currentTime
|
||||
|
||||
storage.Save("memburst", "", time.Now(), &MemoryTracingData{TopMemoryUsage: topProcesses})
|
||||
// save storage
|
||||
caseData := &MemoryTracingData{
|
||||
TopMemoryUsage: topProcesses,
|
||||
}
|
||||
storage.Save("memburst", "", time.Now(), caseData)
|
||||
}
|
||||
|
||||
return nil
|
||||
|
|
|
@ -121,9 +121,11 @@ func newDropWatch() (*tracing.EventTracingAttr, error) {
|
|||
|
||||
// Start starts the tracer.
|
||||
func (c *dropWatchTracing) Start(ctx context.Context) error {
|
||||
log.Info(logPrefix + "tracer will be starting.")
|
||||
|
||||
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("load bpf: %w", err)
|
||||
return fmt.Errorf(logPrefix+"failed to load bpf: %w", err)
|
||||
}
|
||||
defer b.Close()
|
||||
|
||||
|
@ -133,12 +135,15 @@ func (c *dropWatchTracing) Start(ctx context.Context) error {
|
|||
// attach
|
||||
reader, err := b.AttachAndEventPipe(childCtx, "perf_events", 8192)
|
||||
if err != nil {
|
||||
return fmt.Errorf("attach and event pipe: %w", err)
|
||||
return fmt.Errorf(logPrefix+"failed to attach and event pipe: %w", err)
|
||||
}
|
||||
defer reader.Close()
|
||||
|
||||
// breaker
|
||||
b.WaitDetachByBreaker(childCtx, cancel)
|
||||
|
||||
log.Info(logPrefix + "tracer is waitting for event.")
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-childCtx.Done():
|
||||
|
@ -151,14 +156,17 @@ func (c *dropWatchTracing) Start(ctx context.Context) error {
|
|||
}
|
||||
|
||||
// format
|
||||
tracerTime := time.Now()
|
||||
tracerData := c.formatEvent(&event)
|
||||
|
||||
// ignore
|
||||
if c.ignore(tracerData) {
|
||||
log.Debugf(logPrefix+"ignore dropwatch data: %v", tracerData)
|
||||
continue
|
||||
}
|
||||
|
||||
storage.Save(tracerName, "", time.Now(), tracerData)
|
||||
// save storage
|
||||
storage.Save(tracerName, "", tracerTime, tracerData)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,11 +17,11 @@ package events
|
|||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/internal/bpf"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/storage"
|
||||
"huatuo-bamai/internal/utils/bpfutil"
|
||||
"huatuo-bamai/internal/utils/kmsgutil"
|
||||
|
@ -45,24 +45,18 @@ type HungTaskTracerData struct {
|
|||
}
|
||||
|
||||
type hungTaskTracing struct {
|
||||
metric []*metric.Data
|
||||
hungtaskMetric []*metric.Data
|
||||
}
|
||||
|
||||
func init() {
|
||||
// Some OS distributions such as Fedora-42 may disable this feature.
|
||||
hungTaskSysctl := "/proc/sys/kernel/hung_task_timeout_secs"
|
||||
if _, err := os.Stat(hungTaskSysctl); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
tracing.RegisterEventTracing("hungtask", newHungTask)
|
||||
}
|
||||
|
||||
func newHungTask() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &hungTaskTracing{
|
||||
metric: []*metric.Data{
|
||||
metric.NewGaugeData("counter", 0, "hungtask counter", nil),
|
||||
hungtaskMetric: []*metric.Data{
|
||||
metric.NewGaugeData("happened", 0, "hungtask happened", nil),
|
||||
},
|
||||
},
|
||||
Internal: 10,
|
||||
|
@ -73,13 +67,15 @@ func newHungTask() (*tracing.EventTracingAttr, error) {
|
|||
var hungtaskCounter float64
|
||||
|
||||
func (c *hungTaskTracing) Update() ([]*metric.Data, error) {
|
||||
c.metric[0].Value = hungtaskCounter
|
||||
return c.metric, nil
|
||||
c.hungtaskMetric[0].Value = hungtaskCounter
|
||||
hungtaskCounter = 0
|
||||
return c.hungtaskMetric, nil
|
||||
}
|
||||
|
||||
func (c *hungTaskTracing) Start(ctx context.Context) error {
|
||||
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
|
||||
if err != nil {
|
||||
log.Infof("failed to LoadBpf, err: %v", err)
|
||||
return err
|
||||
}
|
||||
defer b.Close()
|
||||
|
@ -89,6 +85,7 @@ func (c *hungTaskTracing) Start(ctx context.Context) error {
|
|||
|
||||
reader, err := b.AttachAndEventPipe(childCtx, "hungtask_perf_events", 8192)
|
||||
if err != nil {
|
||||
log.Infof("failed to AttachAndEventPipe, err: %v", err)
|
||||
return err
|
||||
}
|
||||
defer reader.Close()
|
||||
|
@ -102,27 +99,28 @@ func (c *hungTaskTracing) Start(ctx context.Context) error {
|
|||
default:
|
||||
var data hungTaskPerfEventData
|
||||
if err := reader.ReadInto(&data); err != nil {
|
||||
return fmt.Errorf("hungtask ReadFromPerfEvent: %w", err)
|
||||
return fmt.Errorf("ReadFromPerfEvent fail: %w", err)
|
||||
}
|
||||
|
||||
cpusBT, err := kmsgutil.GetAllCPUsBT()
|
||||
if err != nil {
|
||||
cpusBT = err.Error()
|
||||
}
|
||||
|
||||
blockedProcessesBT, err := kmsgutil.GetBlockedProcessesBT()
|
||||
if err != nil {
|
||||
blockedProcessesBT = err.Error()
|
||||
}
|
||||
|
||||
hungtaskCounter++
|
||||
|
||||
storage.Save("hungtask", "", time.Now(), &HungTaskTracerData{
|
||||
caseData := &HungTaskTracerData{
|
||||
Pid: data.Pid,
|
||||
Comm: strings.TrimRight(string(data.Comm[:]), "\x00"),
|
||||
CPUsStack: cpusBT,
|
||||
BlockedProcessesStack: blockedProcessesBT,
|
||||
})
|
||||
}
|
||||
hungtaskCounter++
|
||||
|
||||
// save storage
|
||||
storage.Save("hungtask", "", time.Now(), caseData)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -58,7 +58,7 @@ func newLACPTracing() (*tracing.EventTracingAttr, error) {
|
|||
func (lacp *lacpTracing) Start(ctx context.Context) (err error) {
|
||||
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("load bpf: %w", err)
|
||||
return fmt.Errorf("Load lacp err: %w", err)
|
||||
}
|
||||
defer b.Close()
|
||||
|
||||
|
@ -67,7 +67,7 @@ func (lacp *lacpTracing) Start(ctx context.Context) (err error) {
|
|||
|
||||
reader, err := b.AttachAndEventPipe(childCtx, "ad_event_map", 8192)
|
||||
if err != nil {
|
||||
return fmt.Errorf("attach and event pipe: %w", err)
|
||||
return fmt.Errorf("failed to AttachAndEventPipe, err: %w", err)
|
||||
}
|
||||
defer reader.Close()
|
||||
|
||||
|
|
|
@ -46,7 +46,7 @@ type MemoryReclaimTracingData struct {
|
|||
}
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("memory_reclaim_events", newMemoryReclaim)
|
||||
tracing.RegisterEventTracing("memreclaim", newMemoryReclaim)
|
||||
}
|
||||
|
||||
func newMemoryReclaim() (*tracing.EventTracingAttr, error) {
|
||||
|
@ -57,14 +57,17 @@ func newMemoryReclaim() (*tracing.EventTracingAttr, error) {
|
|||
}, nil
|
||||
}
|
||||
|
||||
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/memory_reclaim_events.c -o $BPF_DIR/memory_reclaim_events.o
|
||||
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/memory_reclaim.c -o $BPF_DIR/memory_reclaim.o
|
||||
|
||||
// Start detect work, load bpf and wait data form perfevent
|
||||
func (c *memoryReclaimTracing) Start(ctx context.Context) error {
|
||||
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), map[string]any{
|
||||
"deltath": conf.Get().Tracing.MemoryReclaim.Deltath,
|
||||
})
|
||||
log.Infof("memory reclaim start.")
|
||||
|
||||
deltath := conf.Get().Tracing.MemoryReclaim.Deltath
|
||||
|
||||
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), map[string]any{"deltath": deltath})
|
||||
if err != nil {
|
||||
log.Infof("LoadBpf: %v", err)
|
||||
return err
|
||||
}
|
||||
defer b.Close()
|
||||
|
@ -74,6 +77,7 @@ func (c *memoryReclaimTracing) Start(ctx context.Context) error {
|
|||
|
||||
reader, err := b.AttachAndEventPipe(childCtx, "reclaim_perf_events", 8192)
|
||||
if err != nil {
|
||||
log.Infof("AttachAndEventPipe: %v", err)
|
||||
return err
|
||||
}
|
||||
defer reader.Close()
|
|
@ -43,7 +43,7 @@ const (
|
|||
)
|
||||
|
||||
func (l linkStatusType) String() string {
|
||||
return [...]string{"linkstatus_unknown", "linkstatus_adminup", "linkstatus_admindown", "linkstatus_carrierup", "linkstatus_carrierdown"}[l]
|
||||
return [...]string{"linkStatusUnknown", "linkStatusAdminUp", "linkStatusAdminDown", "linkStatusCarrierUp", "linkStatusCarrierDown"}[l]
|
||||
}
|
||||
|
||||
func flags2status(flags, change uint32) []linkStatusType {
|
||||
|
@ -88,7 +88,7 @@ type netdevEventData struct {
|
|||
}
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("netdev_events", newNetdevTracing)
|
||||
tracing.RegisterEventTracing("netdev_event", newNetdevTracing)
|
||||
}
|
||||
|
||||
func newNetdevTracing() (*tracing.EventTracingAttr, error) {
|
||||
|
@ -103,7 +103,7 @@ func newNetdevTracing() (*tracing.EventTracingAttr, error) {
|
|||
linkDoneCh: make(chan struct{}),
|
||||
ifFlagsMap: make(map[string]uint32),
|
||||
metricsLinkStatusCountMap: initMap,
|
||||
name: "netdev_events",
|
||||
name: "netdev_event",
|
||||
},
|
||||
Internal: 10,
|
||||
Flag: tracing.FlagTracing | tracing.FlagMetric,
|
|
@ -97,7 +97,7 @@ var toWhere = []string{
|
|||
}
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("netrecvlat", newNetRcvLat)
|
||||
tracing.RegisterEventTracing("netrcvlat", newNetRcvLat)
|
||||
}
|
||||
|
||||
func newNetRcvLat() (*tracing.EventTracingAttr, error) {
|
||||
|
@ -133,6 +133,7 @@ func (c *netRecvLatTracing) Start(ctx context.Context) error {
|
|||
}
|
||||
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), args)
|
||||
if err != nil {
|
||||
log.Infof("failed to LoadBpf, err: %v", err)
|
||||
return err
|
||||
}
|
||||
defer b.Close()
|
||||
|
@ -142,6 +143,7 @@ func (c *netRecvLatTracing) Start(ctx context.Context) error {
|
|||
|
||||
reader, err := b.AttachAndEventPipe(childCtx, "net_recv_lat_event_map", 8192)
|
||||
if err != nil {
|
||||
log.Infof("failed to AttachAndEventPipe, err: %v", err)
|
||||
return err
|
||||
}
|
||||
defer reader.Close()
|
||||
|
|
|
@ -87,15 +87,15 @@ func (c *oomCollector) Update() ([]*metric.Data, error) {
|
|||
}
|
||||
metrics := []*metric.Data{}
|
||||
mutex.Lock()
|
||||
metrics = append(metrics, metric.NewGaugeData("host_counter", hostOOMCounter, "host oom counter", nil))
|
||||
metrics = append(metrics, metric.NewGaugeData("host_happened", hostOOMCounter, "host oom happened", nil))
|
||||
for _, container := range containers {
|
||||
if val, exists := containerOOMCounter[container.ID]; exists {
|
||||
metrics = append(metrics,
|
||||
metric.NewContainerGaugeData(container, "counter", float64(val.count), "containers oom counter", map[string]string{"process": val.victimProcessName}),
|
||||
metric.NewContainerGaugeData(container, "counter", float64(val.count), "ct oom happened", map[string]string{"process": val.victimProcessName}),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
hostOOMCounter = 0
|
||||
containerOOMCounter = make(map[string]oomMetric)
|
||||
mutex.Unlock()
|
||||
return metrics, nil
|
||||
|
@ -105,6 +105,7 @@ func (c *oomCollector) Update() ([]*metric.Data, error) {
|
|||
func (c *oomCollector) Start(ctx context.Context) error {
|
||||
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
|
||||
if err != nil {
|
||||
log.Infof("failed to LoadBpf, err: %v", err)
|
||||
return err
|
||||
}
|
||||
defer b.Close()
|
||||
|
@ -114,6 +115,7 @@ func (c *oomCollector) Start(ctx context.Context) error {
|
|||
|
||||
reader, err := b.AttachAndEventPipe(childCtx, "oom_perf_events", 8192)
|
||||
if err != nil {
|
||||
log.Infof("failed to AttachAndEventPipe, err: %v", err)
|
||||
return err
|
||||
}
|
||||
defer reader.Close()
|
||||
|
@ -182,6 +184,7 @@ func (c *oomCollector) Start(ctx context.Context) error {
|
|||
}
|
||||
mutex.Unlock()
|
||||
|
||||
// save storage
|
||||
storage.Save("oom", "", time.Now(), caseData)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,13 +22,14 @@ import (
|
|||
|
||||
"huatuo-bamai/internal/bpf"
|
||||
"huatuo-bamai/internal/conf"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/storage"
|
||||
"huatuo-bamai/internal/utils/bpfutil"
|
||||
"huatuo-bamai/internal/utils/symbolutil"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
)
|
||||
|
||||
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/softirq_tracing.c -o $BPF_DIR/softirq_tracing.o
|
||||
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/softirq.c -o $BPF_DIR/softirq.o
|
||||
|
||||
type softirqTracing struct{}
|
||||
|
||||
|
@ -54,7 +55,7 @@ type SoftirqTracingData struct {
|
|||
}
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("softirq_tracing", newSoftirq)
|
||||
tracing.RegisterEventTracing("softirq", newSoftirq)
|
||||
}
|
||||
|
||||
func newSoftirq() (*tracing.EventTracingAttr, error) {
|
||||
|
@ -66,11 +67,14 @@ func newSoftirq() (*tracing.EventTracingAttr, error) {
|
|||
}
|
||||
|
||||
func (c *softirqTracing) Start(ctx context.Context) error {
|
||||
log.Infof("Softirq start")
|
||||
|
||||
softirqThresh := conf.Get().Tracing.Softirq.ThresholdTime
|
||||
|
||||
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), map[string]any{"softirq_thresh": softirqThresh})
|
||||
if err != nil {
|
||||
return fmt.Errorf("load bpf: %w", err)
|
||||
log.Infof("failed to LoadBpf, err: %v", err)
|
||||
return err
|
||||
}
|
||||
defer b.Close()
|
||||
|
||||
|
@ -79,7 +83,8 @@ func (c *softirqTracing) Start(ctx context.Context) error {
|
|||
|
||||
reader, err := attachIrqAndEventPipe(childCtx, b)
|
||||
if err != nil {
|
||||
return fmt.Errorf("attach irq and event pipe: %w", err)
|
||||
log.Infof("failed to attachIrqAndEventPipe, err: %v", err)
|
||||
return err
|
||||
}
|
||||
defer reader.Close()
|
||||
|
||||
|
@ -115,7 +120,8 @@ func (c *softirqTracing) Start(ctx context.Context) error {
|
|||
stack = softirqDumpTrace(data.Stack[:])
|
||||
}
|
||||
|
||||
storage.Save("softirq_tracing", "", time.Now(), &SoftirqTracingData{
|
||||
// save storage
|
||||
caseData := &SoftirqTracingData{
|
||||
OffTime: data.StallTime,
|
||||
Threshold: softirqThresh,
|
||||
Comm: strings.TrimRight(comm, "\x00"),
|
||||
|
@ -123,7 +129,8 @@ func (c *softirqTracing) Start(ctx context.Context) error {
|
|||
CPU: data.CPU,
|
||||
Now: data.Now,
|
||||
Stack: fmt.Sprintf("stack:\n%s", stack),
|
||||
})
|
||||
}
|
||||
storage.Save("softirq", "", time.Now(), caseData)
|
||||
}
|
||||
} // forever
|
||||
}
|
||||
|
@ -162,8 +169,8 @@ func attachIrqAndEventPipe(ctx context.Context, b bpf.BPF) (bpf.PerfEventReader,
|
|||
*/
|
||||
if err := b.AttachWithOptions([]bpf.AttachOption{
|
||||
{
|
||||
ProgramName: "probe_account_process_tick",
|
||||
Symbol: "account_process_tick",
|
||||
ProgramName: "probe_scheduler_tick",
|
||||
Symbol: "scheduler_tick",
|
||||
},
|
||||
{
|
||||
ProgramName: "probe_tick_nohz_restart_sched_tick",
|
|
@ -21,6 +21,7 @@ import (
|
|||
"time"
|
||||
|
||||
"huatuo-bamai/internal/bpf"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/storage"
|
||||
"huatuo-bamai/internal/utils/bpfutil"
|
||||
"huatuo-bamai/internal/utils/kmsgutil"
|
||||
|
@ -56,7 +57,7 @@ func newSoftLockup() (*tracing.EventTracingAttr, error) {
|
|||
return &tracing.EventTracingAttr{
|
||||
TracingData: &softLockupTracing{
|
||||
softlockupMetric: []*metric.Data{
|
||||
metric.NewGaugeData("counter", 0, "softlockup counter", nil),
|
||||
metric.NewGaugeData("happened", 0, "softlockup happened", nil),
|
||||
},
|
||||
},
|
||||
Internal: 10,
|
||||
|
@ -68,12 +69,14 @@ var softlockupCounter float64
|
|||
|
||||
func (c *softLockupTracing) Update() ([]*metric.Data, error) {
|
||||
c.softlockupMetric[0].Value = softlockupCounter
|
||||
softlockupCounter = 0
|
||||
return c.softlockupMetric, nil
|
||||
}
|
||||
|
||||
func (c *softLockupTracing) Start(ctx context.Context) error {
|
||||
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
|
||||
if err != nil {
|
||||
log.Infof("failed to LoadBpf, err: %v", err)
|
||||
return err
|
||||
}
|
||||
defer b.Close()
|
||||
|
@ -83,6 +86,7 @@ func (c *softLockupTracing) Start(ctx context.Context) error {
|
|||
|
||||
reader, err := b.AttachAndEventPipe(childCtx, "softlockup_perf_events", 8192)
|
||||
if err != nil {
|
||||
log.Infof("failed to AttachAndEventPipe, err: %v", err)
|
||||
return err
|
||||
}
|
||||
defer reader.Close()
|
||||
|
@ -104,14 +108,16 @@ func (c *softLockupTracing) Start(ctx context.Context) error {
|
|||
bt = err.Error()
|
||||
}
|
||||
|
||||
softlockupCounter++
|
||||
|
||||
storage.Save("softlockup", "", time.Now(), &SoftLockupTracerData{
|
||||
caseData := &SoftLockupTracerData{
|
||||
CPU: data.CPU,
|
||||
Pid: data.Pid,
|
||||
Comm: strings.TrimRight(string(data.Comm[:]), "\x00"),
|
||||
CPUsStack: bt,
|
||||
})
|
||||
}
|
||||
softlockupCounter++
|
||||
|
||||
// save storage
|
||||
storage.Save("softlockup", "", time.Now(), caseData)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,9 +19,9 @@ import (
|
|||
"sync"
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/internal/cgroups"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/pod"
|
||||
"huatuo-bamai/internal/utils/cgrouputil"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
)
|
||||
|
@ -46,8 +46,9 @@ type cpuStat struct {
|
|||
}
|
||||
|
||||
type cpuStatCollector struct {
|
||||
cgroup cgroups.Cgroup
|
||||
mutex sync.Mutex
|
||||
cpu *cgrouputil.CPU
|
||||
cpuacct *cgrouputil.CPUAcct
|
||||
mutex sync.Mutex
|
||||
}
|
||||
|
||||
func init() {
|
||||
|
@ -56,14 +57,10 @@ func init() {
|
|||
}
|
||||
|
||||
func newCPUStat() (*tracing.EventTracingAttr, error) {
|
||||
cgroup, err := cgroups.NewCgroupManager()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &cpuStatCollector{
|
||||
cgroup: cgroup,
|
||||
cpu: cgrouputil.NewCPU(),
|
||||
cpuacct: cgrouputil.NewCPUAcctDefault(),
|
||||
},
|
||||
Flag: tracing.FlagMetric,
|
||||
}, nil
|
||||
|
@ -85,12 +82,12 @@ func (c *cpuStatCollector) cpuMetricUpdate(cpu *cpuStat, container *pod.Containe
|
|||
return nil
|
||||
}
|
||||
|
||||
raw, err := c.cgroup.CpuStatRaw(container.CgroupSuffix)
|
||||
raw, err := c.cpu.StatRaw(container.CgroupSuffix)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
usage, err := c.cgroup.CpuUsage(container.CgroupSuffix)
|
||||
usageTotal, err := c.cpuacct.Usage(container.CgroupSuffix)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -102,7 +99,7 @@ func (c *cpuStatCollector) cpuMetricUpdate(cpu *cpuStat, container *pod.Containe
|
|||
innerWaitSum: raw["inner_wait_sum"],
|
||||
nrBursts: raw["nr_bursts"],
|
||||
burstTime: raw["burst_time"],
|
||||
cpuTotal: usage.Usage * 1000,
|
||||
cpuTotal: usageTotal,
|
||||
lastUpdate: now,
|
||||
}
|
||||
|
||||
|
|
|
@ -15,15 +15,14 @@
|
|||
package collector
|
||||
|
||||
import (
|
||||
"math"
|
||||
"reflect"
|
||||
"runtime"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/internal/cgroups"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/pod"
|
||||
"huatuo-bamai/internal/utils/cgrouputil"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
)
|
||||
|
@ -40,7 +39,8 @@ type cpuMetric struct {
|
|||
|
||||
type cpuUtilCollector struct {
|
||||
cpuUtil []*metric.Data
|
||||
cgroup cgroups.Cgroup
|
||||
cpuacct *cgrouputil.CPUAcct
|
||||
cpu *cgrouputil.CPU
|
||||
|
||||
// included struct for used in multi modules
|
||||
hostCPUCount int
|
||||
|
@ -55,11 +55,6 @@ func init() {
|
|||
}
|
||||
|
||||
func newCPUUtil() (*tracing.EventTracingAttr, error) {
|
||||
cgroup, err := cgroups.NewCgroupManager()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &cpuUtilCollector{
|
||||
cpuUtil: []*metric.Data{
|
||||
|
@ -67,8 +62,9 @@ func newCPUUtil() (*tracing.EventTracingAttr, error) {
|
|||
metric.NewGaugeData("sys", 0, "sys for container and host", nil),
|
||||
metric.NewGaugeData("total", 0, "total for container and host", nil),
|
||||
},
|
||||
cpuacct: cgrouputil.NewCPUAcctDefault(),
|
||||
cpu: cgrouputil.NewCPU(),
|
||||
hostCPUCount: runtime.NumCPU(),
|
||||
cgroup: cgroup,
|
||||
},
|
||||
Flag: tracing.FlagMetric,
|
||||
}, nil
|
||||
|
@ -94,14 +90,15 @@ func (c *cpuUtilCollector) cpuMetricUpdate(cpuMetric *cpuMetric, container *pod.
|
|||
cgroupPath = container.CgroupSuffix
|
||||
}
|
||||
|
||||
stat, err := c.cgroup.CpuUsage(cgroupPath)
|
||||
usageTotal, err := c.cpuacct.Usage(cgroupPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
usageTotal := stat.Usage
|
||||
usageUsr := stat.User
|
||||
usageSys := stat.System
|
||||
usageUsr, usageSys, err := c.cpuacct.Stat(cgroupPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// allow statistics 0
|
||||
deltaTotal := usageTotal - cpuMetric.lastCPUTotal
|
||||
|
@ -152,18 +149,12 @@ func (c *cpuUtilCollector) Update() ([]*metric.Data, error) {
|
|||
}
|
||||
|
||||
for _, container := range containers {
|
||||
cpuQuota, err := c.cgroup.CpuQuotaAndPeriod(container.CgroupSuffix)
|
||||
count, err := c.cpu.CPUNum(container.CgroupSuffix)
|
||||
if err != nil {
|
||||
log.Infof("fetch container [%s] cpu quota and period: %v", container, err)
|
||||
log.Infof("failed to get cpu count of %s, %v", container, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if cpuQuota.Quota == math.MaxUint64 {
|
||||
continue
|
||||
}
|
||||
|
||||
count := int(cpuQuota.Quota / cpuQuota.Period)
|
||||
|
||||
containerMetric := container.LifeResouces("collector_cpu_util").(*cpuMetric)
|
||||
if err := c.cpuMetricUpdate(containerMetric, container, count); err != nil {
|
||||
log.Infof("failed to update cpu info of %s, %v", container, err)
|
||||
|
@ -177,6 +168,7 @@ func (c *cpuUtilCollector) Update() ([]*metric.Data, error) {
|
|||
}
|
||||
|
||||
if err := c.hostMetricUpdate(); err != nil {
|
||||
log.Errorf("c.hostCpuMetricUpdate :%v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
|
|
|
@ -15,9 +15,11 @@
|
|||
package collector
|
||||
|
||||
import (
|
||||
"huatuo-bamai/internal/cgroups"
|
||||
"huatuo-bamai/internal/cgroups/paths"
|
||||
"fmt"
|
||||
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/pod"
|
||||
"huatuo-bamai/internal/utils/cgrouputil"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
|
||||
|
@ -68,50 +70,37 @@ func (c *loadavgCollector) hostLoadAvg() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func containerLoadavg() ([]*metric.Data, error) {
|
||||
func (c *loadavgCollector) Update() ([]*metric.Data, error) {
|
||||
loadAvgMetrics := []*metric.Data{}
|
||||
|
||||
n, err := netlink.New()
|
||||
if err != nil {
|
||||
log.Infof("Failed to create netlink: %s", err)
|
||||
return nil, err
|
||||
}
|
||||
defer n.Stop()
|
||||
|
||||
containers, err := pod.GetContainersByType(pod.ContainerTypeNormal | pod.ContainerTypeSidecar)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("GetContainersByType: %w", err)
|
||||
}
|
||||
|
||||
loadavgs := []*metric.Data{}
|
||||
for _, container := range containers {
|
||||
stats, err := n.GetCpuLoad(container.Hostname, paths.Path("cpu", container.CgroupSuffix))
|
||||
stats, err := n.GetCpuLoad(container.Hostname, cgrouputil.NewCPU().Path(container.CgroupSuffix))
|
||||
if err != nil {
|
||||
log.Debugf("failed to get %s load, %v", container, err)
|
||||
continue
|
||||
}
|
||||
|
||||
loadavgs = append(loadavgs,
|
||||
metric.NewContainerGaugeData(container,
|
||||
"nr_running", float64(stats.NrRunning),
|
||||
"nr_running of container", nil),
|
||||
metric.NewContainerGaugeData(container,
|
||||
"nr_uninterruptible", float64(stats.NrUninterruptible),
|
||||
"nr_uninterruptible of container", nil))
|
||||
}
|
||||
|
||||
return loadavgs, nil
|
||||
}
|
||||
|
||||
func (c *loadavgCollector) Update() ([]*metric.Data, error) {
|
||||
loadavgs := []*metric.Data{}
|
||||
|
||||
if cgroups.CgroupMode() == cgroups.Legacy {
|
||||
if containersLoads, err := containerLoadavg(); err == nil {
|
||||
loadavgs = append(loadavgs, containersLoads...)
|
||||
}
|
||||
loadAvgMetrics = append(loadAvgMetrics,
|
||||
metric.NewContainerGaugeData(container, "container_nr_running", float64(stats.NrRunning), "nr_running of container", nil),
|
||||
metric.NewContainerGaugeData(container, "container_nr_uninterruptible", float64(stats.NrUninterruptible), "nr_uninterruptible of container", nil))
|
||||
}
|
||||
|
||||
if err := c.hostLoadAvg(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
loadavgs = append(loadavgs, c.loadAvg...)
|
||||
return loadavgs, nil
|
||||
loadAvgMetrics = append(loadAvgMetrics, c.loadAvg...)
|
||||
return loadAvgMetrics, nil
|
||||
}
|
||||
|
|
|
@ -28,7 +28,7 @@ import (
|
|||
)
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("memory_reclaim", newMemoryCgroup)
|
||||
tracing.RegisterEventTracing("mmcgroup", newMemoryCgroup)
|
||||
}
|
||||
|
||||
func newMemoryCgroup() (*tracing.EventTracingAttr, error) {
|
||||
|
@ -43,7 +43,7 @@ type memoryCgroupMetric struct {
|
|||
DirectstallCount uint64
|
||||
}
|
||||
|
||||
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/memory_reclaim.c -o $BPF_DIR/memory_reclaim.o
|
||||
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/memory_cgroup.c -o $BPF_DIR/memory_cgroup.o
|
||||
|
||||
type memoryCgroup struct {
|
||||
bpf bpf.BPF
|
||||
|
@ -58,7 +58,7 @@ func (c *memoryCgroup) Update() ([]*metric.Data, error) {
|
|||
containersMap := make(map[uint64]*pod.Container)
|
||||
containers, err := pod.GetNormalContainers()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("get container: %w", err)
|
||||
return nil, fmt.Errorf("Can't get normal container: %w", err)
|
||||
}
|
||||
|
||||
for _, container := range containers {
|
||||
|
@ -67,7 +67,7 @@ func (c *memoryCgroup) Update() ([]*metric.Data, error) {
|
|||
|
||||
items, err := c.bpf.DumpMapByName("mem_cgroup_map")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("dump mem_cgroup_map: %w", err)
|
||||
return nil, fmt.Errorf("Can't dump mem_cgroup_map: %w", err)
|
||||
}
|
||||
|
||||
var (
|
||||
|
@ -88,7 +88,7 @@ func (c *memoryCgroup) Update() ([]*metric.Data, error) {
|
|||
|
||||
if container, exist := containersMap[css]; exist {
|
||||
containersMetric = append(containersMetric,
|
||||
metric.NewContainerGaugeData(container, "directstall",
|
||||
metric.NewContainerGaugeData(container, "directstallcount",
|
||||
float64(cgroupMetric.DirectstallCount),
|
||||
"counting of cgroup try_charge reclaim", nil))
|
||||
}
|
||||
|
@ -98,7 +98,7 @@ func (c *memoryCgroup) Update() ([]*metric.Data, error) {
|
|||
if len(items) == 0 {
|
||||
for _, container := range containersMap {
|
||||
containersMetric = append(containersMetric,
|
||||
metric.NewContainerGaugeData(container, "directstall", float64(0),
|
||||
metric.NewContainerGaugeData(container, "directstallcount", float64(0),
|
||||
"counting of cgroup try_charge reclaim", nil))
|
||||
}
|
||||
}
|
||||
|
@ -110,12 +110,12 @@ func (c *memoryCgroup) Start(ctx context.Context) error {
|
|||
var err error
|
||||
c.bpf, err = bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("load bpf: %w", err)
|
||||
return fmt.Errorf("LoadBpf memory_cgroup.o: %w", err)
|
||||
}
|
||||
defer c.bpf.Close()
|
||||
|
||||
if err = c.bpf.Attach(); err != nil {
|
||||
return fmt.Errorf("attach: %w", err)
|
||||
return fmt.Errorf("failed to Attach, err: %w", err)
|
||||
}
|
||||
|
||||
childCtx, cancel := context.WithCancel(ctx)
|
|
@ -17,15 +17,15 @@ package collector
|
|||
import (
|
||||
"fmt"
|
||||
|
||||
"huatuo-bamai/internal/cgroups"
|
||||
"huatuo-bamai/internal/conf"
|
||||
"huatuo-bamai/internal/pod"
|
||||
"huatuo-bamai/internal/utils/cgrouputil"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
)
|
||||
|
||||
type memEventsCollector struct {
|
||||
cgroup cgroups.Cgroup
|
||||
mem cgrouputil.Memory
|
||||
}
|
||||
|
||||
func init() {
|
||||
|
@ -33,14 +33,9 @@ func init() {
|
|||
}
|
||||
|
||||
func newMemEvents() (*tracing.EventTracingAttr, error) {
|
||||
cgroup, err := cgroups.NewCgroupManager()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &memEventsCollector{
|
||||
cgroup: cgroup,
|
||||
mem: *cgrouputil.NewMemory(),
|
||||
}, Flag: tracing.FlagMetric,
|
||||
}, nil
|
||||
}
|
||||
|
@ -56,7 +51,7 @@ func (c *memEventsCollector) Update() ([]*metric.Data, error) {
|
|||
|
||||
metrics := []*metric.Data{}
|
||||
for _, container := range containers {
|
||||
raw, err := c.cgroup.MemoryEventRaw(container.CgroupSuffix)
|
||||
raw, err := c.mem.EventsRaw(container.CgroupSuffix)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -27,14 +27,14 @@ import (
|
|||
)
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("memory_free", newMemoryHost)
|
||||
tracing.RegisterEventTracing("mmhost", newMemoryHost)
|
||||
}
|
||||
|
||||
func newMemoryHost() (*tracing.EventTracingAttr, error) {
|
||||
mm := &memoryHost{
|
||||
metrics: []*metric.Data{
|
||||
metric.NewGaugeData("compaction", 0, "time elapsed in memory compaction", nil),
|
||||
metric.NewGaugeData("allocstall", 0, "time elapsed in memory allocstall", nil),
|
||||
metric.NewGaugeData("compactionstat", 0, "time spent during mm compaction", nil),
|
||||
metric.NewGaugeData("allocstallstat", 0, "time spent during mm allocstall", nil),
|
||||
},
|
||||
}
|
||||
return &tracing.EventTracingAttr{
|
||||
|
@ -66,7 +66,7 @@ func (c *memoryHost) Update() ([]*metric.Data, error) {
|
|||
|
||||
items, err := c.bpf.DumpMapByName("mm_free_compact_map")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("dump map mm_free_compact_map: %w", err)
|
||||
return nil, fmt.Errorf("Can't dump mm_host_metrictable_relay: %w", err)
|
||||
}
|
||||
|
||||
if len(items) == 0 {
|
||||
|
@ -90,12 +90,12 @@ func (c *memoryHost) Start(ctx context.Context) error {
|
|||
var err error
|
||||
c.bpf, err = bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("load bpf: %w", err)
|
||||
return fmt.Errorf("LoadBpf mmhostbpf.o: %w", err)
|
||||
}
|
||||
defer c.bpf.Close()
|
||||
|
||||
if err = c.bpf.Attach(); err != nil {
|
||||
return fmt.Errorf("attach: %w", err)
|
||||
return fmt.Errorf("Attach memory_free_compact.o: %w", err)
|
||||
}
|
||||
|
||||
childCtx, cancel := context.WithCancel(ctx)
|
||||
|
|
|
@ -16,9 +16,11 @@ package collector
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
|
||||
"huatuo-bamai/internal/cgroups/paths"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/pod"
|
||||
"huatuo-bamai/internal/utils/cgrouputil"
|
||||
"huatuo-bamai/internal/utils/parseutil"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
|
@ -38,13 +40,13 @@ func newMemOthersCollector() (*tracing.EventTracingAttr, error) {
|
|||
}, nil
|
||||
}
|
||||
|
||||
func parseValueWithKey(cgroupPath, cgroupFile, key string) (uint64, error) {
|
||||
filePath := paths.Path("memory", cgroupPath, cgroupFile)
|
||||
func parseValueWithKey(path, key string) (uint64, error) {
|
||||
filePath := filepath.Join(cgrouputil.V1MemoryPath(), path)
|
||||
if key == "" {
|
||||
return parseutil.ReadUint(filePath)
|
||||
}
|
||||
|
||||
raw, err := parseutil.RawKV(filePath)
|
||||
raw, err := parseutil.ParseRawKV(filePath)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
@ -82,9 +84,11 @@ func (c *memOthersCollector) Update() ([]*metric.Data, error) {
|
|||
name: "local_direct_reclaim_time",
|
||||
},
|
||||
} {
|
||||
value, err := parseValueWithKey(container.CgroupSuffix, t.path, t.key)
|
||||
path := filepath.Join(container.CgroupSuffix, t.path)
|
||||
value, err := parseValueWithKey(path, t.key)
|
||||
if err != nil {
|
||||
// FIXME: os maynot support this metric
|
||||
log.Debugf("parse %s: %s", path, err)
|
||||
continue
|
||||
}
|
||||
|
||||
|
|
|
@ -17,33 +17,25 @@ package collector
|
|||
import (
|
||||
"fmt"
|
||||
|
||||
"huatuo-bamai/internal/cgroups"
|
||||
"huatuo-bamai/internal/conf"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/pod"
|
||||
"huatuo-bamai/internal/utils/cgrouputil"
|
||||
"huatuo-bamai/internal/utils/parseutil"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
)
|
||||
|
||||
type memStatCollector struct {
|
||||
cgroup cgroups.Cgroup
|
||||
}
|
||||
type memStatCollector struct{}
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("memory_stat", newMemStat)
|
||||
}
|
||||
|
||||
func newMemStat() (*tracing.EventTracingAttr, error) {
|
||||
cgroup, err := cgroups.NewCgroupManager()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &memStatCollector{
|
||||
cgroup: cgroup,
|
||||
},
|
||||
Flag: tracing.FlagMetric,
|
||||
TracingData: &memStatCollector{},
|
||||
Flag: tracing.FlagMetric,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
@ -54,11 +46,11 @@ func (c *memStatCollector) Update() ([]*metric.Data, error) {
|
|||
metrics := []*metric.Data{}
|
||||
containers, err := pod.GetNormalContainers()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("GetNormalContainers: %w", err)
|
||||
}
|
||||
|
||||
for _, container := range containers {
|
||||
raw, err := c.cgroup.MemoryStatRaw(container.CgroupSuffix)
|
||||
raw, err := parseutil.ParseRawKV(cgrouputil.V1MemoryPath() + container.CgroupSuffix + "/memory.stat")
|
||||
if err != nil {
|
||||
log.Infof("parse %s memory.stat %v", container.CgroupSuffix, err)
|
||||
continue
|
||||
|
|
|
@ -0,0 +1,136 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"huatuo-bamai/pkg/metric"
|
||||
)
|
||||
|
||||
const (
|
||||
softirqHi = iota
|
||||
softirqTime
|
||||
softirqNetTx
|
||||
softirqNetRx
|
||||
softirqBlock
|
||||
softirqIrqPoll
|
||||
softirqTasklet
|
||||
softirqSched
|
||||
softirqHrtimer
|
||||
sofirqRcu
|
||||
softirqMax
|
||||
)
|
||||
|
||||
const (
|
||||
latZONE0 = iota // 0 ~ 10us
|
||||
latZONE1 // 10us ~ 100us
|
||||
latZONE2 // 100us ~ 1ms
|
||||
latZONE3 // 1ms ~ inf
|
||||
latZoneMax
|
||||
)
|
||||
|
||||
const (
|
||||
// HI:0x1
|
||||
// TIMER:0x2
|
||||
// NET_TX:0x4
|
||||
// NET_RX:0x8
|
||||
// BLOCK:0x10
|
||||
// IRQ_POLL:0x20
|
||||
// TASKLET:0x40
|
||||
// SCHED:0x80
|
||||
// HRTIMER:0x100
|
||||
// RCU:0x200
|
||||
// fullmask => 0x2ff
|
||||
defaultSiTypeMask = 0x0c // default: only report NET_TX and NET_RX so far
|
||||
|
||||
// Because bpf access array is strictly checked,
|
||||
// the size of the array must be aligned in order
|
||||
// of 2, so we should not use softirqMax, but
|
||||
// use softirqArrayMax as the size of the array
|
||||
softirqArrayMax = 16 // must be 2^order
|
||||
)
|
||||
|
||||
var monTracerIsRunning bool
|
||||
|
||||
func latZoneName(latZone int) string {
|
||||
switch latZone {
|
||||
case latZONE0: // 0 ~ 10us
|
||||
return "0~10 us"
|
||||
case latZONE1: // 10us ~ 100us
|
||||
return "10us ~ 100us"
|
||||
case latZONE2: // 100us ~ 1ms
|
||||
return "100us ~ 1ms"
|
||||
case latZONE3: // 1ms ~ inf
|
||||
return "1ms ~ inf"
|
||||
default:
|
||||
return "ERR_ZONE"
|
||||
}
|
||||
}
|
||||
|
||||
func siTypeName(siType int) string {
|
||||
switch siType {
|
||||
case softirqHi:
|
||||
return "HI"
|
||||
case softirqTime:
|
||||
return "TIMER"
|
||||
case softirqNetTx:
|
||||
return "NET_TX"
|
||||
case softirqNetRx:
|
||||
return "NET_RX"
|
||||
case softirqBlock:
|
||||
return "BLOCK"
|
||||
case softirqIrqPoll:
|
||||
return "IRQ_POLL"
|
||||
case softirqTasklet:
|
||||
return "TASKLET"
|
||||
case softirqSched:
|
||||
return "SCHED"
|
||||
case softirqHrtimer:
|
||||
return "HRTIMER"
|
||||
case sofirqRcu:
|
||||
return "RCU"
|
||||
default:
|
||||
return "ERR_TYPE"
|
||||
}
|
||||
}
|
||||
|
||||
func getMonsoftirqInfo() ([]*metric.Data, error) {
|
||||
siLabel := make(map[string]string)
|
||||
monsoftirqMetric := []*metric.Data{}
|
||||
|
||||
for siType, lats := range &monsoftirqData.SoftirqLat {
|
||||
if (1<<siType)&defaultSiTypeMask == 0 {
|
||||
continue
|
||||
}
|
||||
siLabel["softirqType"] = siTypeName(siType)
|
||||
|
||||
for zone, count := range lats {
|
||||
siLabel["zone"] = latZoneName(zone)
|
||||
monsoftirqMetric = append(monsoftirqMetric, metric.NewGaugeData("latency", float64(count), "softirq latency", siLabel))
|
||||
}
|
||||
}
|
||||
|
||||
return monsoftirqMetric, nil
|
||||
}
|
||||
|
||||
func (c *monsoftirqTracing) Update() ([]*metric.Data, error) {
|
||||
if !monTracerIsRunning {
|
||||
return nil, nil
|
||||
}
|
||||
monsoftirqMetric, err := getMonsoftirqInfo()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return monsoftirqMetric, nil
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/internal/bpf"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/utils/bpfutil"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
)
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("monsoftirq", newSoftirqCollector)
|
||||
}
|
||||
|
||||
func newSoftirqCollector() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &monsoftirqTracing{},
|
||||
Internal: 10,
|
||||
Flag: tracing.FlagTracing | tracing.FlagMetric,
|
||||
}, nil
|
||||
}
|
||||
|
||||
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/monsoftirq_tracing.c -o $BPF_DIR/monsoftirq_tracing.o
|
||||
|
||||
type monsoftirqBpfData struct {
|
||||
SoftirqLat [softirqArrayMax][latZoneMax]uint64
|
||||
}
|
||||
|
||||
type monsoftirqTracing struct{}
|
||||
|
||||
var monsoftirqData monsoftirqBpfData
|
||||
|
||||
// Start monsoftirq work, load bpf and wait data form perfevent
|
||||
func (c *monsoftirqTracing) Start(ctx context.Context) error {
|
||||
// load bpf.
|
||||
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to LoadBpf, err: %w", err)
|
||||
}
|
||||
defer b.Close()
|
||||
|
||||
if err = b.Attach(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
childCtx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
b.WaitDetachByBreaker(childCtx, cancel)
|
||||
|
||||
ticker := time.NewTicker(2 * time.Second)
|
||||
defer ticker.Stop()
|
||||
|
||||
monTracerIsRunning = true
|
||||
defer func() { monTracerIsRunning = false }()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-childCtx.Done():
|
||||
return nil
|
||||
case <-ticker.C:
|
||||
item, err := b.ReadMap(b.MapIDByName("softirq_lats"), []byte{0, 0, 0, 0})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read softirq_lats: %w", err)
|
||||
}
|
||||
buf := bytes.NewReader(item)
|
||||
if err = binary.Read(buf, binary.LittleEndian, &monsoftirqData); err != nil {
|
||||
log.Errorf("can't read softirq_lats: %v", err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -49,9 +49,10 @@ func (c *netstatCollector) Update() ([]*metric.Data, error) {
|
|||
filter := newFieldFilter(conf.Get().MetricCollector.Netstat.ExcludedMetrics, conf.Get().MetricCollector.Netstat.IncludedMetrics)
|
||||
log.Debugf("Updating netstat metrics by filter: %v", filter)
|
||||
|
||||
// normal containers
|
||||
containers, err := pod.GetNormalContainers()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("GetNormalContainers: %w", err)
|
||||
}
|
||||
|
||||
// support the empty container
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
package collector
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"huatuo-bamai/internal/pod"
|
||||
|
@ -57,7 +58,7 @@ func (c *runqlatCollector) Update() ([]*metric.Data, error) {
|
|||
|
||||
containers, err := pod.GetContainersByType(pod.ContainerTypeNormal)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("GetContainersByType: %w", err)
|
||||
}
|
||||
|
||||
for _, container := range containers {
|
|
@ -47,7 +47,7 @@ func startRunqlatTracerWork(ctx context.Context) error {
|
|||
// load bpf.
|
||||
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("load bpf: %w", err)
|
||||
return fmt.Errorf("failed to LoadBpf, err: %w", err)
|
||||
}
|
||||
defer b.Close()
|
||||
|
||||
|
|
|
@ -17,8 +17,8 @@ package collector
|
|||
import (
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/internal/cgroups"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/utils/cgrouputil"
|
||||
"huatuo-bamai/internal/utils/parseutil"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
|
@ -111,7 +111,7 @@ func getMemoryMetric(p *procfs.Proc) []*metric.Data {
|
|||
data[0] = metric.NewGaugeData("memory_vss", float64(status.VmSize)/1024, "memory vss", nil)
|
||||
data[1] = metric.NewGaugeData("memory_rss", float64(status.VmRSS)/1024, "memory rss", nil)
|
||||
|
||||
rssI, err := parseutil.ReadUint(cgroups.RootFsFilePath("memory") + "/huatuo-bamai/memory.usage_in_bytes")
|
||||
rssI, err := parseutil.ReadUint(cgrouputil.V1MemoryPath() + "/huatuo-bamai/memory.usage_in_bytes")
|
||||
if err != nil {
|
||||
log.Warnf("can't ParseUint, err: %v", err)
|
||||
return nil
|
||||
|
|
|
@ -48,9 +48,10 @@ func newSockstatCollector() (*tracing.EventTracingAttr, error) {
|
|||
func (c *sockstatCollector) Update() ([]*metric.Data, error) {
|
||||
log.Debugf("Updating sockstat metrics")
|
||||
|
||||
// normal containers
|
||||
containers, err := pod.GetNormalContainers()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("GetNormalContainers: %w", err)
|
||||
}
|
||||
|
||||
// support the empty container
|
||||
|
|
|
@ -1,193 +0,0 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
"huatuo-bamai/internal/bpf"
|
||||
"huatuo-bamai/internal/utils/bpfutil"
|
||||
"huatuo-bamai/pkg/metric"
|
||||
"huatuo-bamai/pkg/tracing"
|
||||
|
||||
"github.com/tklauser/numcpus"
|
||||
)
|
||||
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("softirq", newSoftirq)
|
||||
}
|
||||
|
||||
func newSoftirq() (*tracing.EventTracingAttr, error) {
|
||||
cpuPossible, err := numcpus.GetPossible()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("fetch possible cpu num")
|
||||
}
|
||||
|
||||
cpuOnline, err := numcpus.GetOnline()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("fetch possible cpu num")
|
||||
}
|
||||
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &softirqLatency{
|
||||
bpf: nil,
|
||||
isRunning: false,
|
||||
cpuPossible: cpuPossible,
|
||||
cpuOnline: cpuOnline,
|
||||
},
|
||||
Internal: 10,
|
||||
Flag: tracing.FlagTracing | tracing.FlagMetric,
|
||||
}, nil
|
||||
}
|
||||
|
||||
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/softirq.c -o $BPF_DIR/softirq.o
|
||||
|
||||
type softirqLatency struct {
|
||||
bpf bpf.BPF
|
||||
isRunning bool
|
||||
cpuPossible int
|
||||
cpuOnline int
|
||||
}
|
||||
|
||||
type softirqLatencyData struct {
|
||||
Timestamp uint64
|
||||
TotalLatency [4]uint64
|
||||
}
|
||||
|
||||
const (
|
||||
softirqHi = iota
|
||||
softirqTime
|
||||
softirqNetTx
|
||||
softirqNetRx
|
||||
softirqBlock
|
||||
softirqIrqPoll
|
||||
softirqTasklet
|
||||
softirqSched
|
||||
softirqHrtimer
|
||||
sofirqRcu
|
||||
softirqMax
|
||||
)
|
||||
|
||||
func irqTypeName(id int) string {
|
||||
switch id {
|
||||
case softirqHi:
|
||||
return "HI"
|
||||
case softirqTime:
|
||||
return "TIMER"
|
||||
case softirqNetTx:
|
||||
return "NET_TX"
|
||||
case softirqNetRx:
|
||||
return "NET_RX"
|
||||
case softirqBlock:
|
||||
return "BLOCK"
|
||||
case softirqIrqPoll:
|
||||
return "IRQ_POLL"
|
||||
case softirqTasklet:
|
||||
return "TASKLET"
|
||||
case softirqSched:
|
||||
return "SCHED"
|
||||
case softirqHrtimer:
|
||||
return "HRTIMER"
|
||||
case sofirqRcu:
|
||||
return "RCU"
|
||||
default:
|
||||
return "ERR_TYPE"
|
||||
}
|
||||
}
|
||||
|
||||
func irqAllowed(id int) bool {
|
||||
switch id {
|
||||
case softirqNetTx, softirqNetRx:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (s *softirqLatency) Update() ([]*metric.Data, error) {
|
||||
if !s.isRunning {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
items, err := s.bpf.DumpMapByName("softirq_percpu_lats")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("dump map: %w", err)
|
||||
}
|
||||
|
||||
labels := make(map[string]string)
|
||||
metricData := []*metric.Data{}
|
||||
|
||||
// IRQ: 0 ... NR_SOFTIRQS_MAX
|
||||
for _, item := range items {
|
||||
var irqVector uint32
|
||||
latencyOnAllCPU := make([]softirqLatencyData, s.cpuPossible)
|
||||
|
||||
if err = binary.Read(bytes.NewReader(item.Key), binary.LittleEndian, &irqVector); err != nil {
|
||||
return nil, fmt.Errorf("read map key: %w", err)
|
||||
}
|
||||
|
||||
if !irqAllowed(int(irqVector)) {
|
||||
continue
|
||||
}
|
||||
|
||||
if err = binary.Read(bytes.NewReader(item.Value), binary.LittleEndian, &latencyOnAllCPU); err != nil {
|
||||
return nil, fmt.Errorf("read map value: %w", err)
|
||||
}
|
||||
|
||||
labels["type"] = irqTypeName(int(irqVector))
|
||||
|
||||
for cpuid, lat := range latencyOnAllCPU {
|
||||
if cpuid >= s.cpuOnline {
|
||||
break
|
||||
}
|
||||
labels["cpuid"] = strconv.Itoa(cpuid)
|
||||
for zoneid, zone := range lat.TotalLatency {
|
||||
labels["zone"] = strconv.Itoa(zoneid)
|
||||
metricData = append(metricData, metric.NewGaugeData("latency", float64(zone), "softirq latency", labels))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return metricData, nil
|
||||
}
|
||||
|
||||
func (s *softirqLatency) Start(ctx context.Context) error {
|
||||
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer b.Close()
|
||||
|
||||
if err = b.Attach(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
s.bpf = b
|
||||
s.isRunning = true
|
||||
|
||||
childCtx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
b.WaitDetachByBreaker(childCtx, cancel)
|
||||
|
||||
<-childCtx.Done()
|
||||
|
||||
s.isRunning = false
|
||||
return nil
|
||||
}
|
|
@ -90,6 +90,7 @@ func (c *tcpMemCollector) getTCPMem() (tcpMem, tcpMemBytes, tcpMemLimit float64,
|
|||
func (c *tcpMemCollector) Update() ([]*metric.Data, error) {
|
||||
tcpMem, tcpMemBytes, tcpMemLimit, err := c.getTCPMem()
|
||||
if err != nil {
|
||||
log.Infof("couldn't get tcpmem: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
|
@ -16,10 +16,36 @@ package collector
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"huatuo-bamai/internal/log"
|
||||
)
|
||||
|
||||
// xfs_util maps superblocks of XFS devices to retrieve
|
||||
// essential information from superblock.
|
||||
const (
|
||||
XFS_SB_MAGIC = 0x58465342
|
||||
XFSLABEL_MAX = 12
|
||||
)
|
||||
|
||||
// Construct the XFS superblock, hiding unused variables
|
||||
type xfsSuperBlock struct {
|
||||
SbMagicnum uint32
|
||||
SbBlocksize uint32
|
||||
_ [16]byte
|
||||
_ [7]uint64
|
||||
_ [4]uint32
|
||||
SbLogblocks uint32
|
||||
_ [6]uint16
|
||||
_ [XFSLABEL_MAX]byte
|
||||
_ [12]uint8
|
||||
_ [8]uint64
|
||||
_ [12]uint32
|
||||
_ [16]byte
|
||||
}
|
||||
|
||||
func fileLineCounter(filePath string) (int, error) {
|
||||
count := 0
|
||||
buf := make([]byte, 8*20*4096)
|
||||
|
@ -47,3 +73,29 @@ func fileLineCounter(filePath string) (int, error) {
|
|||
|
||||
return count, nil
|
||||
}
|
||||
|
||||
// Calculate the Xlog size from superblock
|
||||
func xfsLogSize(path string) (float64, error) {
|
||||
file, err := os.Open(path)
|
||||
if err != nil {
|
||||
log.Infof("open failed: %v", err)
|
||||
return -1, err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
var sb xfsSuperBlock
|
||||
err = binary.Read(file, binary.BigEndian, &sb)
|
||||
if err != nil {
|
||||
log.Infof("read superblock failed: err%v", err)
|
||||
return -1, err
|
||||
}
|
||||
|
||||
// Check Magic Number of Super Block
|
||||
if sb.SbMagicnum != XFS_SB_MAGIC {
|
||||
log.Infof("Not a valid XFS superblock (Magic: 0x%x)", sb.SbMagicnum)
|
||||
return -1, err
|
||||
}
|
||||
|
||||
xlogBytes := float64(sb.SbLogblocks * sb.SbBlocksize)
|
||||
return xlogBytes, nil
|
||||
}
|
||||
|
|
|
@ -72,6 +72,7 @@ func (c *vmStatCollector) Update() ([]*metric.Data, error) {
|
|||
|
||||
file, err := os.Open("/proc/vmstat")
|
||||
if err != nil {
|
||||
log.Error("Fail to open vmstat")
|
||||
return nil, err
|
||||
}
|
||||
defer file.Close()
|
||||
|
@ -85,6 +86,7 @@ func (c *vmStatCollector) Update() ([]*metric.Data, error) {
|
|||
}
|
||||
value, err := strconv.ParseFloat(parts[1], 64)
|
||||
if err != nil {
|
||||
log.Error("Fail to strconv")
|
||||
return nil, err
|
||||
}
|
||||
metrics = append(metrics,
|
||||
|
|
|
@ -1,63 +0,0 @@
|
|||
### 概述
|
||||
- **类型**:异常事件驱动(tracing/autotracing)
|
||||
- **功能**:自动跟踪系统异常状态,并在异常发生时再触发抓取现场上下文信息
|
||||
- **特点**:
|
||||
- 当系统出现异常时,`autotracing` 会自动触发,捕获相关的上下文信息
|
||||
- 事件数据会实时存储在本地并存储到远端ES,同时你也可以生成Prometheus 统计指标进行观测。
|
||||
- 适用于获取现场时**性能开销较大的场景**,例如检测到指标上升到一定阈值、上升速度过快再触发抓取
|
||||
- **已集成**:cpu 异常使用跟踪(cpu idle)、D状态跟踪(dload)、容器内外部争抢(waitrate)、内存突发分配(memburst)、磁盘异常跟踪(iotracer)
|
||||
|
||||
### 如何添加 Autotracing ?
|
||||
`AutoTracing` 只需实现 `ITracingEvent` 接口并完成注册,即可将事件添加到系统中。
|
||||
>`AutoTracing` 与 `Event` 类型在框架实现上没有任何区别,只是针对不同的场景进行了实际应用的区分。
|
||||
|
||||
```go
|
||||
// ITracingEvent represents a autotracing or event
|
||||
type ITracingEvent interface {
|
||||
Start(ctx context.Context) error
|
||||
}
|
||||
```
|
||||
|
||||
#### 1. 创建结构体
|
||||
```go
|
||||
type exampleTracing struct{}
|
||||
```
|
||||
|
||||
#### 2. 注册回调函数
|
||||
```go
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("example", newExample)
|
||||
}
|
||||
|
||||
func newExample() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &exampleTracing{},
|
||||
Internal: 10, // 再次开启 tracing 的间隔时间 seconds
|
||||
Flag: tracing.FlagTracing, // 标记为 tracing 类型; | tracing.FlagMetric(可选)
|
||||
}, nil
|
||||
}
|
||||
```
|
||||
|
||||
#### 3. 实现接口 ITracingEvent
|
||||
```go
|
||||
func (t *exampleTracing) Start(ctx context.Context) error {
|
||||
// detect your care about
|
||||
...
|
||||
|
||||
// 存储数据到 ES 和 本地
|
||||
storage.Save("example", ccontainerID, time.Now(), tracerData)
|
||||
}
|
||||
```
|
||||
|
||||
另外也可同时实现接口 Collector 以 Prometheus 格式输出 (可选)
|
||||
|
||||
```go
|
||||
func (c *exampleTracing) Update() ([]*metric.Data, error) {
|
||||
// from tracerData to prometheus.Metric
|
||||
...
|
||||
|
||||
return data, nil
|
||||
}
|
||||
```
|
||||
|
||||
在项目 `core/autotracing` 目录下已集成了多种实际场景的 `autotracing` 示例,以及框架提供的丰富底层接口,包括 bpf prog,map 数据交互、容器信息等,更多详情可参考对应代码实现。
|
|
@ -1,64 +0,0 @@
|
|||
### 概述
|
||||
|
||||
- **类型**:异常事件驱动(tracing/event)
|
||||
- **功能**:常态运行在系统达到预设阈值后抓取上下文信息
|
||||
- **特点**:
|
||||
- 与 `autotracing` 不同,`event` 是常态运行,而不是在异常时再触发。
|
||||
- 事件数据会实时存储在本地并存储到远端ES,同时你也可以生成Prometheus 统计指标进行观测。
|
||||
- 适合用于**常态监控**和**实时分析**,能够及时发现系统中的异常行为, `event` 类型的采集对系统性能影响可忽略。
|
||||
- **已集成**:软中断异常(softirq)、内存异常分配(oom)、软锁定(softlockup)、D 状态进程(hungtask)、内存回收(memreclaim)、异常丢包(dropwatch)、网络入向延迟(netrecvlat) 等
|
||||
|
||||
### 如何添加事件指标
|
||||
只需实现 `ITracingEvent` 接口并完成注册,即可将事件添加到系统。
|
||||
>`AutoTracing` 与 `Event` 类型在框架实现上没有任何区别,只是针对不同的场景进行了实际应用的区分。
|
||||
|
||||
```go
|
||||
// ITracingEvent represents a tracing/event
|
||||
type ITracingEvent interface {
|
||||
Start(ctx context.Context) error
|
||||
}
|
||||
```
|
||||
|
||||
#### 1. 创建 Event 结构体
|
||||
```go
|
||||
type exampleTracing struct{}
|
||||
```
|
||||
|
||||
#### 2. 注册回调函数
|
||||
```go
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("example", newExample)
|
||||
}
|
||||
|
||||
func newExample() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &exampleTracing{},
|
||||
Internal: 10, // 再次开启 tracing 的间隔时间 seconds
|
||||
Flag: tracing.FlagTracing, // 标记为 tracing 类型;| tracing.FlagMetric(可选)
|
||||
}, nil
|
||||
}
|
||||
```
|
||||
|
||||
#### 3. 实现接口 ITracingEvent
|
||||
```go
|
||||
func (t *exampleTracing) Start(ctx context.Context) error {
|
||||
// do something
|
||||
...
|
||||
|
||||
// 存储数据到 ES 和 本地
|
||||
storage.Save("example", ccontainerID, time.Now(), tracerData)
|
||||
}
|
||||
```
|
||||
|
||||
另外也可同时实现接口 Collector 以 Prometheus 格式输出 (可选)
|
||||
|
||||
```go
|
||||
func (c *exampleTracing) Update() ([]*metric.Data, error) {
|
||||
// from tracerData to prometheus.Metric
|
||||
...
|
||||
|
||||
return data, nil
|
||||
}
|
||||
```
|
||||
|
||||
在项目 `core/events` 目录下已集成了多种实际场景的 `events` 示例,以及框架提供的丰富底层接口,包括 bpf prog, map 数据交互、容器信息等,更多详情可参考对应代码实现。
|
|
@ -1,65 +0,0 @@
|
|||
### 概述
|
||||
|
||||
Metrics 类型用于采集系统性能等指标数据,可输出为 Prometheus 格式,作为服务端对外提供数据,通过接口 `/metrics` (`curl localhost:<port>/metrics`) 获取。
|
||||
|
||||
- **类型**:指标数据采集
|
||||
- **功能**:采集各子系统的性能指标数据
|
||||
- **特点**:
|
||||
- metrics 主要用于采集系统的性能指标,如 CPU 使用率、内存使用率、网络等,适合用于监控系统的性能指标,支持实时分析和长期趋势观察。
|
||||
- 指标数据可以来自常规 procfs/sysfs 采集,也可以从 tracing (autotracing, event) 类型生成指标数据
|
||||
- Prometheus 格式输出,便于无缝集成到 Prometheus 观测体系
|
||||
|
||||
- **已集成**:
|
||||
- cpu (sys, usr, util, load, nr_running...)
|
||||
- memory(vmstat, memory_stat, directreclaim, asyncreclaim...)
|
||||
- IO (d2c, q2c, freeze, flush...)
|
||||
- 网络(arp, socket mem, qdisc, netstat, netdev, socketstat...)
|
||||
|
||||
### 如何添加统计指标
|
||||
|
||||
只需实现 `Collector` 接口并完成注册,即可将指标添加到系统中。
|
||||
|
||||
```go
|
||||
type Collector interface {
|
||||
// Get new metrics and expose them via prometheus registry.
|
||||
Update() ([]*Data, error)
|
||||
}
|
||||
```
|
||||
|
||||
#### 1. 创建结构体
|
||||
在 `core/metrics` 目录下创建实现 `Collector` 接口的结构体:
|
||||
|
||||
```go
|
||||
type exampleMetric struct{
|
||||
}
|
||||
```
|
||||
|
||||
#### 2. 注册回调函数
|
||||
```go
|
||||
func init() {
|
||||
tracing.RegisterEventTracing("example", newExample)
|
||||
}
|
||||
|
||||
func newExample() (*tracing.EventTracingAttr, error) {
|
||||
return &tracing.EventTracingAttr{
|
||||
TracingData: &exampleMetric{},
|
||||
Flag: tracing.FlagMetric, // 标记为 Metric 类型
|
||||
}, nil
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
#### 3. 实现 `Update` 方法
|
||||
|
||||
```go
|
||||
func (c *exampleMetric) Update() ([]*metric.Data, error) {
|
||||
// do something
|
||||
...
|
||||
return []*metric.Data{
|
||||
metric.NewGaugeData("example", value, "description of example", nil),
|
||||
}, nil
|
||||
|
||||
}
|
||||
```
|
||||
|
||||
在项目 `core/metrics` 目录下已集成了多种实际场景的 `Metrics` 示例,以及框架提供的丰富底层接口,包括 bpf prog, map 数据交互、容器信息等,更多详情可参考对应代码实现。
|
Before Width: | Height: | Size: 781 KiB |
Before Width: | Height: | Size: 629 KiB After Width: | Height: | Size: 629 KiB |
Before Width: | Height: | Size: 1.7 MiB After Width: | Height: | Size: 883 KiB |
Before Width: | Height: | Size: 1.4 MiB After Width: | Height: | Size: 322 KiB |
Before Width: | Height: | Size: 20 KiB |
Before Width: | Height: | Size: 786 KiB After Width: | Height: | Size: 174 KiB |
|
@ -129,13 +129,13 @@
|
|||
| network | netstat_TcpExt_DelayedACKLocked | A delayed ACK timer expires, but the TCP stack can’t send an ACK immediately due to the socket is locked by a userspace program. The TCP stack will send a pure ACK later (after the userspace program unlock the socket). When the TCP stack sends the pure ACK later, the TCP stack will also update TcpExtDelayedACKs and exit the delayed ACK mode | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_DelayedACKLost | It will be updated when the TCP stack receives a packet which has been ACKed. A Delayed ACK loss might cause this issue, but it would also be triggered by other reasons, such as a packet is duplicated in the network | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_DelayedACKs | A delayed ACK timer expires. The TCP stack will send a pure ACK packet and exit the delayed ACK mode | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_EmbryonicRsts | resets received for embryonic SYN_RECV sockets | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_EmbryonicRsts | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_IPReversePathFilter | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_ListenDrops | When kernel receives a SYN from a client, and if the TCP accept queue is full, kernel will drop the SYN and add 1 to TcpExtListenOverflows. At the same time kernel will also add 1 to TcpExtListenDrops. When a TCP socket is in LISTEN state, and kernel need to drop a packet, kernel would always add 1 to TcpExtListenDrops. So increase TcpExtListenOverflows would let TcpExtListenDrops increasing at the same time, but TcpExtListenDrops would also increase without TcpExtListenOverflows increasing, e.g. a memory allocation fail would also let TcpExtListenDrops increase | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_ListenOverflows | When kernel receives a SYN from a client, and if the TCP accept queue is full, kernel will drop the SYN and add 1 to TcpExtListenOverflows. At the same time kernel will also add 1 to TcpExtListenDrops. When a TCP socket is in LISTEN state, and kernel need to drop a packet, kernel would always add 1 to TcpExtListenDrops. So increase TcpExtListenOverflows would let TcpExtListenDrops increasing at the same time, but TcpExtListenDrops would also increase without TcpExtListenOverflows increasing, e.g. a memory allocation fail would also let TcpExtListenDrops increase | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_LockDroppedIcmps | ICMP packets dropped because socket was locked | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_LockDroppedIcmps | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_OfoPruned | The TCP stack tries to discard packet on the out of order queue | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_OutOfWindowIcmps | ICMP pkts dropped because they were out-of-window | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_OutOfWindowIcmps | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_PAWSActive | Packets are dropped by PAWS in Syn-Sent status | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_PAWSEstab | Packets are dropped by PAWS in any status other than Syn-Sent | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_PFMemallocDrop | \- | count | host,container | proc fs |
|
||||
|
@ -166,7 +166,7 @@
|
|||
| network | netstat_TcpExt_TCPDSACKOfoSent | The TCP stack receives an out of order duplicate packet, so it sends a DSACK to the sender | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPDSACKOldSent | The TCP stack receives a duplicate packet which has been acked, so it sends a DSACK to the sender | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPDSACKRecv | The TCP stack receives a DSACK, which indicates an acknowledged duplicate packet is received | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPDSACKUndo | Congestion window recovered without slow start using DSACK | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPDSACKUndo | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPDeferAcceptDrop | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPDelivered | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPDeliveredCE | \- | count | host,container | proc fs |
|
||||
|
@ -197,7 +197,7 @@
|
|||
| network | netstat_TcpExt_TCPMD5Unexpected | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPMTUPFail | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPMTUPSuccess | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPMemoryPressures | Number of times TCP ran low on memory | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPMemoryPressures | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPMemoryPressuresChrono | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPMinTTLDrop | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPOFODrop | The TCP layer receives an out of order packet but doesn’t have enough memory, so drops it. Such packets won’t be counted into TcpExtTCPOFOQueue | count | host,container | proc fs |
|
||||
|
@ -234,7 +234,7 @@
|
|||
| network | netstat_TcpExt_TCPTimeouts | TCP timeout events | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPToZeroWindowAdv | The TCP receive window is set to zero from a no-zero value | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPWantZeroWindowAdv | Depending on current memory usage, the TCP stack tries to set receive window to zero. But the receive window might still be a no-zero value | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPWinProbe | Number of ACK packets to be sent at regular intervals to make sure a reverse ACK packet opening back a window has not been lost | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPWinProbe | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TCPWqueueTooBig | \- | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TW | TCP sockets finished time wait in fast timer | count | host,container | proc fs |
|
||||
| network | netstat_TcpExt_TWKilled | TCP sockets finished time wait in slow timer | count | host,container | proc fs |
|
|
@ -1,37 +0,0 @@
|
|||
### 概述
|
||||
HUATUO 已支持自动追踪指标如下:
|
||||
|
||||
| 追踪名称 | 核心功能 | 场景 |
|
||||
| ---------------| --------------------- |-------------------------------------- |
|
||||
| cpusys | 宿主 sys 突增检测 | 由于系统负载异常导致业务毛刺问题 |
|
||||
| cpuidle | 容器 cpu idle 掉底检测,提供调用栈,火焰图,进程上下文信息等 | 容器 cpu 使用异常,帮助业务描绘进程热点 |
|
||||
| dload | 跟踪容器loadavg状态进程状态,自动抓取容器 D 状态进程调用信息 | 系统 D 状态突增通常和资源不可用或者锁被长期持有相关,R 状态进程数量突增往往是业务代码设计不合理导致 |
|
||||
| waitrate | 容器资源争抢检测,容器调度被争抢时提供正在争抢的容器信息 | 容器被争抢可能会引起业务毛刺,已存在争抢指标缺乏具体正在争抢的容器信息,通过 waitrate 追踪可以获取参与争抢的容器信息,给混部资源隔离提供参考 |
|
||||
| memburst | 记录内存突发分配时上下文信息 | 宿主机短时间内大量分配内存,检测宿主机上短时间内大量分配内存事件。突发性内存分配可能引发直接回收或者 oom 等 |
|
||||
| iotracing | 检测宿主磁盘 IO 延迟异常。输出访问的文件名和路径、磁盘设备、inode 号、容器等上下文信息 | 频繁出现磁盘 IO 带宽打满、磁盘访问突增,进而导致应用请求延迟或者系统性能抖动 |
|
||||
|
||||
### CPUSYS
|
||||
系统态 CPU 时间反映内核执行开销,包括系统调用、中断处理、内核线程调度、内存管理及锁竞争等操作。该指标异常升高,通常表明存在内核级性能瓶颈:高频系统调用、硬件设备异常、锁争用或内存回收压力(kswapd 直接回收)等。
|
||||
|
||||
cpusys 检测到该指标异常时,自动会触发抓取系统的调用栈并生成火焰图,帮助定位问题根因。 既考虑到系统 cpu sys 达到阈值,或者sys 突发毛刺带来的问题,其中触发条件如下:
|
||||
- CPU Sys 使用率 > 阈值 A
|
||||
- CPU Sys 使用率单位时间内增长 > 阈值 B
|
||||
|
||||
### CPUIDLE
|
||||
K8S 容器环境,CPU idle 时间(即 CPU 处于空闲状态的时间比例)的突然下降通常表明容器内进程正在过度消耗 CPU 资源,可能引发业务延迟、调度争抢甚至整体系统性能下降。
|
||||
|
||||
cpuidle 自动会触发抓取调用栈生成火焰图,触发条件:
|
||||
- CPU Sys 使用率 > 阈值 A
|
||||
- CPU User 使用率 > 阈值 B && CPU User 使用率单位时间增长 > 阈值 C
|
||||
- CPU Usage > 阈值 D && CPU Usage 单位时间增长 > 阈值 E
|
||||
|
||||
### DLOAD
|
||||
D 状态是一种特殊的进程状态,指进程因等待内核或硬件资源而进入的一种特殊阻塞状态。与普通睡眠(S 状态)不同,D 状态进程无法被强制终止(包括 SIGKILL),也不会响应中断信号。该状态通常发生在 I/O 操作(如直接读写磁盘)、硬件驱动故障时。系统 D 状态突增往往和资源不可用或者锁被长期持有导致,可运行进程突增往往是业务代码设计不合理导致。dload 借助 netlink 获取容器 running + uninterruptible 进程数量,通过滑动窗口算法计算出过去 1 分钟内容器 D 进程对负载做出的贡献值,当平滑计算后的 D 状态进程负载值超过阈值的时候,表示容器内的 D 状态进程数量出现异常,开始触发收集容器运行情况、D 状态进程信息。
|
||||
|
||||
### MemBurst
|
||||
memburst 用于检测宿主机上短时间内大量分配内存的情况,突发性内存分配可能引发直接回收甚至 OOM,所以一旦突发性内存分配就需要记录相关信息。
|
||||
|
||||
### IOTracing
|
||||
当 I/O 带宽被占满 或 磁盘访问量突增 时,系统可能因 I/O 资源竞争而出现 请求延迟升高、性能抖动,甚至影响整个系统的稳定性。
|
||||
|
||||
iotracing 在宿主磁盘负载高、IO 延迟异常时,输出异常时 IO 访问的文件名和路径、磁盘设备、inode 号,容器名等上下文信息。
|
|
@ -1,510 +0,0 @@
|
|||
### 总览
|
||||
HUATUO 目前支持的异常上下文捕获事件如下:
|
||||
|
||||
| 事件名称 | 核心功能 | 场景 |
|
||||
| ---------------| --------------------- |----------------------------------------|
|
||||
| softirq | 宿主软中断延迟响应或长期关闭,输出长时间关闭软中断的内核调用栈,进程信息等 | 该类问题会严重影响网络收发,进而导致业务毛刺或者超时等其他问题 |
|
||||
| dropwatch | TCP 数据包丢包检测,输出发生丢包时主机、网络上下文信息等 | 该类问题主要会引起业务毛刺和延迟 |
|
||||
| netrecvlat | 在网络收方向获取数据包从驱动、协议栈、到用户主动收过程的延迟事件 | 网络延迟问题中有一类是数据传输阶段收方向存在延迟,但不清楚是延迟位置,netrecvlat 根据 skb 入网卡时间戳依次在驱动、协议栈和用户拷贝数据等路径计算延迟,通过预先设定的阈值过滤超时的数据包,定位延迟位置 |
|
||||
| oom | 检测宿主或容器内 oom 事件 | 当宿主机层面或者容器维度发生 oom 事件时,能够获取触发 oom 的进程信息、被 kill 的进程信息以及容器信息,便于定位进程内存泄漏、异常退出等问题 |
|
||||
| softlockup | 当系统上发生 softlockup 时,收集目标进程信息以及 cpu 信息,同时获取各个 cpu 上的内核栈信息 | 系统发生 softlockup |
|
||||
| hungtask | 提供系统内所有 D 状态进程数量、内核栈信息 | 用于定位瞬时出现 D 进程的场景,能及时保留现场便于后期问题跟踪 |
|
||||
| memreclaim | 进程进入直接回收的耗时,超过时间阈值,记录进程信息 | 内存压力过大时,如果此时进程申请内存,有可能进入直接回收,此时处于同步回收阶段,可能会造成业务进程的卡顿,此时记录进程进入直接回收的时间,有助于我们判断此进程被直接回收影响的剧烈程度 |
|
||||
| netdev | 检测网卡状态变化 | 网卡抖动、bond 环境下 slave 异常等 |
|
||||
| lacp | 检测 lacp 状态变化 | bond 模式 4 下,监控 lacp 协商状态 |
|
||||
|
||||
|
||||
### 软中断关闭过长检测
|
||||
|
||||
**功能介绍**
|
||||
|
||||
Linux 内核存在进程上下文,中断上下文,软中断上下文,NMI 上下文等概念,这些上下文之间可能存在共享数据情况,因此为了确保数据的一致性,正确性,内核代码可能会关闭软中断或者硬中断。从理论角度,单次关闭中断或者软中断时间不能太长,但高频的系统调用,陷入内核态频繁执行关闭中断或软中断,同样会造"长时间关闭"的现象,拖慢了系统的响应。“关闭中断,软中断时间过长”这类问题非常隐蔽,且定位手段有限,同时影响又非常大,体现在业务应用上一般为接收数据超时。针对这种场景我们基于BPF技术构建了检测硬件中断,软件中断关闭过长的能力。
|
||||
|
||||
**示例**
|
||||
|
||||
如下为抓取到的关闭中断过长的实例,这些信息被自动上传到 ES.
|
||||
|
||||
```
|
||||
{
|
||||
"_index": "***_2025-06-11",
|
||||
"_type": "_doc",
|
||||
"_id": "***",
|
||||
"_score": 0,
|
||||
"_source": {
|
||||
"uploaded_time": "2025-06-11T16:05:16.251152703+08:00",
|
||||
"hostname": "***",
|
||||
"tracer_data": {
|
||||
"comm": "observe-agent",
|
||||
"stack": "stack:\nscheduler_tick/ffffffffa471dbc0 [kernel]\nupdate_process_times/ffffffffa4789240 [kernel]\ntick_sched_handle.isra.8/ffffffffa479afa0 [kernel]\ntick_sched_timer/ffffffffa479b000 [kernel]\n__hrtimer_run_queues/ffffffffa4789b60 [kernel]\nhrtimer_interrupt/ffffffffa478a610 [kernel]\n__sysvec_apic_timer_interrupt/ffffffffa4661a60 [kernel]\nasm_call_sysvec_on_stack/ffffffffa5201130 [kernel]\nsysvec_apic_timer_interrupt/ffffffffa5090500 [kernel]\nasm_sysvec_apic_timer_interrupt/ffffffffa5200d30 [kernel]\ndump_stack/ffffffffa506335e [kernel]\ndump_header/ffffffffa5058eb0 [kernel]\noom_kill_process.cold.9/ffffffffa505921a [kernel]\nout_of_memory/ffffffffa48a1740 [kernel]\nmem_cgroup_out_of_memory/ffffffffa495ff70 [kernel]\ntry_charge/ffffffffa4964ff0 [kernel]\nmem_cgroup_charge/ffffffffa4968de0 [kernel]\n__add_to_page_cache_locked/ffffffffa4895c30 [kernel]\nadd_to_page_cache_lru/ffffffffa48961a0 [kernel]\npagecache_get_page/ffffffffa4897ad0 [kernel]\ngrab_cache_page_write_begin/ffffffffa4899d00 [kernel]\niomap_write_begin/ffffffffa49fddc0 [kernel]\niomap_write_actor/ffffffffa49fe980 [kernel]\niomap_apply/ffffffffa49fbd20 [kernel]\niomap_file_buffered_write/ffffffffa49fc040 [kernel]\nxfs_file_buffered_aio_write/ffffffffc0f3bed0 [xfs]\nnew_sync_write/ffffffffa497ffb0 [kernel]\nvfs_write/ffffffffa4982520 [kernel]\nksys_write/ffffffffa4982880 [kernel]\ndo_syscall_64/ffffffffa508d190 [kernel]\nentry_SYSCALL_64_after_hwframe/ffffffffa5200078 [kernel]",
|
||||
"now": 5532940660025295,
|
||||
"offtime": 237328905,
|
||||
"cpu": 1,
|
||||
"threshold": 100000000,
|
||||
"pid": 688073
|
||||
},
|
||||
"tracer_time": "2025-06-11 16:05:16.251 +0800",
|
||||
"tracer_type": "auto",
|
||||
"time": "2025-06-11 16:05:16.251 +0800",
|
||||
"region": "***",
|
||||
"tracer_name": "softirq",
|
||||
"es_index_time": 1749629116268
|
||||
},
|
||||
"fields": {
|
||||
"time": [
|
||||
"2025-06-11T08:05:16.251Z"
|
||||
]
|
||||
},
|
||||
"_ignored": [
|
||||
"tracer_data.stack"
|
||||
],
|
||||
"_version": 1,
|
||||
"sort": [
|
||||
1749629116251
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
本地物理机也会存储一份相同的数据:
|
||||
|
||||
```
|
||||
2025-06-11 16:05:16 *** Region=***
|
||||
{
|
||||
"hostname": "***",
|
||||
"region": "***",
|
||||
"uploaded_time": "2025-06-11T16:05:16.251152703+08:00",
|
||||
"time": "2025-06-11 16:05:16.251 +0800",
|
||||
"tracer_name": "softirq",
|
||||
"tracer_time": "2025-06-11 16:05:16.251 +0800",
|
||||
"tracer_type": "auto",
|
||||
"tracer_data": {
|
||||
"offtime": 237328905,
|
||||
"threshold": 100000000,
|
||||
"comm": "observe-agent",
|
||||
"pid": 688073,
|
||||
"cpu": 1,
|
||||
"now": 5532940660025295,
|
||||
"stack": "stack:\nscheduler_tick/ffffffffa471dbc0 [kernel]\nupdate_process_times/ffffffffa4789240 [kernel]\ntick_sched_handle.isra.8/ffffffffa479afa0 [kernel]\ntick_sched_timer/ffffffffa479b000 [kernel]\n__hrtimer_run_queues/ffffffffa4789b60 [kernel]\nhrtimer_interrupt/ffffffffa478a610 [kernel]\n__sysvec_apic_timer_interrupt/ffffffffa4661a60 [kernel]\nasm_call_sysvec_on_stack/ffffffffa5201130 [kernel]\nsysvec_apic_timer_interrupt/ffffffffa5090500 [kernel]\nasm_sysvec_apic_timer_interrupt/ffffffffa5200d30 [kernel]\ndump_stack/ffffffffa506335e [kernel]\ndump_header/ffffffffa5058eb0 [kernel]\noom_kill_process.cold.9/ffffffffa505921a [kernel]\nout_of_memory/ffffffffa48a1740 [kernel]\nmem_cgroup_out_of_memory/ffffffffa495ff70 [kernel]\ntry_charge/ffffffffa4964ff0 [kernel]\nmem_cgroup_charge/ffffffffa4968de0 [kernel]\n__add_to_page_cache_locked/ffffffffa4895c30 [kernel]\nadd_to_page_cache_lru/ffffffffa48961a0 [kernel]\npagecache_get_page/ffffffffa4897ad0 [kernel]\ngrab_cache_page_write_begin/ffffffffa4899d00 [kernel]\niomap_write_begin/ffffffffa49fddc0 [kernel]\niomap_write_actor/ffffffffa49fe980 [kernel]\niomap_apply/ffffffffa49fbd20 [kernel]\niomap_file_buffered_write/ffffffffa49fc040 [kernel]\nxfs_file_buffered_aio_write/ffffffffc0f3bed0 [xfs]\nnew_sync_write/ffffffffa497ffb0 [kernel]\nvfs_write/ffffffffa4982520 [kernel]\nksys_write/ffffffffa4982880 [kernel]\ndo_syscall_64/ffffffffa508d190 [kernel]\nentry_SYSCALL_64_after_hwframe/ffffffffa5200078 [kernel]"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 协议栈丢包检测
|
||||
|
||||
**功能介绍**
|
||||
|
||||
在数据包收发过程中由于各类原因,可能出现丢包的现象,丢包可能会导致业务请求延迟,甚至超时。dropwatch 借助 eBPF 观测内核网络数据包丢弃情况,输出丢包网络上下文,如:源目的地址,源目的端口,seq, seqack, pid, comm, stack 信息等。dorpwatch 主要用于检测 TCP 协议相关的丢包,通过预先埋点过滤数据包,确定丢包位置以便于排查丢包根因。
|
||||
|
||||
**示例**
|
||||
|
||||
通过 dropwatch 抓取到的相关信息会自动上传到 ES。如下为抓取到的一案例:kubelet 在发送 SYN 时,由于设备丢包,导致数据包发送失败。
|
||||
|
||||
```
|
||||
{
|
||||
"_index": "***_2025-06-11",
|
||||
"_type": "_doc",
|
||||
"_id": "***",
|
||||
"_score": 0,
|
||||
"_source": {
|
||||
"uploaded_time": "2025-06-11T16:58:15.100223795+08:00",
|
||||
"hostname": "***",
|
||||
"tracer_data": {
|
||||
"comm": "kubelet",
|
||||
"stack": "kfree_skb/ffffffff9a0cd5c0 [kernel]\nkfree_skb/ffffffff9a0cd5c0 [kernel]\nkfree_skb_list/ffffffff9a0cd670 [kernel]\n__dev_queue_xmit/ffffffff9a0ea020 [kernel]\nip_finish_output2/ffffffff9a18a720 [kernel]\n__ip_queue_xmit/ffffffff9a18d280 [kernel]\n__tcp_transmit_skb/ffffffff9a1ad890 [kernel]\ntcp_connect/ffffffff9a1ae610 [kernel]\ntcp_v4_connect/ffffffff9a1b3450 [kernel]\n__inet_stream_connect/ffffffff9a1d25f0 [kernel]\ninet_stream_connect/ffffffff9a1d2860 [kernel]\n__sys_connect/ffffffff9a0c1170 [kernel]\n__x64_sys_connect/ffffffff9a0c1240 [kernel]\ndo_syscall_64/ffffffff9a2ea9f0 [kernel]\nentry_SYSCALL_64_after_hwframe/ffffffff9a400078 [kernel]",
|
||||
"saddr": "10.79.68.62",
|
||||
"pid": 1687046,
|
||||
"type": "common_drop",
|
||||
"queue_mapping": 11,
|
||||
"dport": 2052,
|
||||
"pkt_len": 74,
|
||||
"ack_seq": 0,
|
||||
"daddr": "10.179.142.26",
|
||||
"state": "SYN_SENT",
|
||||
"src_hostname": "***",
|
||||
"sport": 15402,
|
||||
"dest_hostname": "***",
|
||||
"seq": 1902752773,
|
||||
"max_ack_backlog": 0
|
||||
},
|
||||
"tracer_time": "2025-06-11 16:58:15.099 +0800",
|
||||
"tracer_type": "auto",
|
||||
"time": "2025-06-11 16:58:15.099 +0800",
|
||||
"region": "***",
|
||||
"tracer_name": "dropwatch",
|
||||
"es_index_time": 1749632295120
|
||||
},
|
||||
"fields": {
|
||||
"time": [
|
||||
"2025-06-11T08:58:15.099Z"
|
||||
]
|
||||
},
|
||||
"_ignored": [
|
||||
"tracer_data.stack"
|
||||
],
|
||||
"_version": 1,
|
||||
"sort": [
|
||||
1749632295099
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
本地物理机也会存储一份相同的数据:
|
||||
|
||||
```
|
||||
2025-06-11 16:58:15 Host=*** Region=***
|
||||
{
|
||||
"hostname": "***",
|
||||
"region": "***",
|
||||
"uploaded_time": "2025-06-11T16:58:15.100223795+08:00",
|
||||
"time": "2025-06-11 16:58:15.099 +0800",
|
||||
"tracer_name": "dropwatch",
|
||||
"tracer_time": "2025-06-11 16:58:15.099 +0800",
|
||||
"tracer_type": "auto",
|
||||
"tracer_data": {
|
||||
"type": "common_drop",
|
||||
"comm": "kubelet",
|
||||
"pid": 1687046,
|
||||
"saddr": "10.79.68.62",
|
||||
"daddr": "10.179.142.26",
|
||||
"sport": 15402,
|
||||
"dport": 2052,
|
||||
"src_hostname": ***",
|
||||
"dest_hostname": "***",
|
||||
"max_ack_backlog": 0,
|
||||
"seq": 1902752773,
|
||||
"ack_seq": 0,
|
||||
"queue_mapping": 11,
|
||||
"pkt_len": 74,
|
||||
"state": "SYN_SENT",
|
||||
"stack": "kfree_skb/ffffffff9a0cd5c0 [kernel]\nkfree_skb/ffffffff9a0cd5c0 [kernel]\nkfree_skb_list/ffffffff9a0cd670 [kernel]\n__dev_queue_xmit/ffffffff9a0ea020 [kernel]\nip_finish_output2/ffffffff9a18a720 [kernel]\n__ip_queue_xmit/ffffffff9a18d280 [kernel]\n__tcp_transmit_skb/ffffffff9a1ad890 [kernel]\ntcp_connect/ffffffff9a1ae610 [kernel]\ntcp_v4_connect/ffffffff9a1b3450 [kernel]\n__inet_stream_connect/ffffffff9a1d25f0 [kernel]\ninet_stream_connect/ffffffff9a1d2860 [kernel]\n__sys_connect/ffffffff9a0c1170 [kernel]\n__x64_sys_connect/ffffffff9a0c1240 [kernel]\ndo_syscall_64/ffffffff9a2ea9f0 [kernel]\nentry_SYSCALL_64_after_hwframe/ffffffff9a400078 [kernel]"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 协议栈收包延迟
|
||||
|
||||
**功能介绍**
|
||||
|
||||
线上业务网络延迟问题是比较难定位的,任何方向,任何的阶段都有可能出现问题。比如收方向的延迟,驱动、协议栈、用户程序等都有可能出现问题,因此我们开发了 netrecvlat 检测功能,借助 skb 入网卡的时间戳,在驱动,协议栈层,用户态层检查延迟时间,当收包延迟达到阈值时,借助 eBPF 获取网络上下文信息(五元组、延迟位置、进程信息等)。收方向传输路径示意:**网卡 -> 驱动 -> 协议栈 -> 用户主动收**
|
||||
|
||||
**示例**
|
||||
|
||||
一个业务容器从内核收包延迟超过 90s,通过 netrecvlat 追踪,ES 查询输出如下:
|
||||
|
||||
```
|
||||
{
|
||||
"_index": "***_2025-06-11",
|
||||
"_type": "_doc",
|
||||
"_id": "***",
|
||||
"_score": 0,
|
||||
"_source": {
|
||||
"tracer_data": {
|
||||
"dport": 49000,
|
||||
"pkt_len": 26064,
|
||||
"comm": "nginx",
|
||||
"ack_seq": 689410995,
|
||||
"saddr": "10.156.248.76",
|
||||
"pid": 2921092,
|
||||
"where": "TO_USER_COPY",
|
||||
"state": "ESTABLISHED",
|
||||
"daddr": "10.134.72.4",
|
||||
"sport": 9213,
|
||||
"seq": 1009085774,
|
||||
"latency_ms": 95973
|
||||
},
|
||||
"container_host_namespace": "***",
|
||||
"container_hostname": "***.docker",
|
||||
"es_index_time": 1749628496541,
|
||||
"uploaded_time": "2025-06-11T15:54:56.404864955+08:00",
|
||||
"hostname": "***",
|
||||
"container_type": "normal",
|
||||
"tracer_time": "2025-06-11 15:54:56.404 +0800",
|
||||
"time": "2025-06-11 15:54:56.404 +0800",
|
||||
"region": "***",
|
||||
"container_level": "1",
|
||||
"container_id": "***",
|
||||
"tracer_name": "netrecvlat"
|
||||
},
|
||||
"fields": {
|
||||
"time": [
|
||||
"2025-06-11T07:54:56.404Z"
|
||||
]
|
||||
},
|
||||
"_version": 1,
|
||||
"sort": [
|
||||
1749628496404
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
本地物理机也会存储一份相同的数据:
|
||||
|
||||
```
|
||||
2025-06-11 15:54:46 Host=*** Region=*** ContainerHost=***.docker ContainerID=*** ContainerType=normal ContainerLevel=1
|
||||
{
|
||||
"hostname": "***",
|
||||
"region": "***",
|
||||
"container_id": "***",
|
||||
"container_hostname": "***.docker",
|
||||
"container_host_namespace": "***",
|
||||
"container_type": "normal",
|
||||
"container_level": "1",
|
||||
"uploaded_time": "2025-06-11T15:54:46.129136232+08:00",
|
||||
"time": "2025-06-11 15:54:46.129 +0800",
|
||||
"tracer_time": "2025-06-11 15:54:46.129 +0800",
|
||||
"tracer_name": "netrecvlat",
|
||||
"tracer_data": {
|
||||
"comm": "nginx",
|
||||
"pid": 2921092,
|
||||
"where": "TO_USER_COPY",
|
||||
"latency_ms": 95973,
|
||||
"state": "ESTABLISHED",
|
||||
"saddr": "10.156.248.76",
|
||||
"daddr": "10.134.72.4",
|
||||
"sport": 9213,
|
||||
"dport": 49000,
|
||||
"seq": 1009024958,
|
||||
"ack_seq": 689410995,
|
||||
"pkt_len": 20272
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 物理机、容器内存超用
|
||||
|
||||
**功能介绍**
|
||||
|
||||
程序运行时申请的内存超过了系统或进程可用的内存上限,导致系统或应用程序崩溃。常见于内存泄漏、大数据处理或资源配置不足的场景。通过在 oom 的内核流程插入 BPF 钩子,获取 oom 上下文的详细信息并传递到用户态。这些信息包括进程信息、被 kill 的进程信息、容器信息。
|
||||
|
||||
**示例**
|
||||
|
||||
一个容器内发生 oom 时,被抓取的信息如下:
|
||||
|
||||
```
|
||||
{
|
||||
"_index": "***_cases_2025-06-11",
|
||||
"_type": "_doc",
|
||||
"_id": "***",
|
||||
"_score": 0,
|
||||
"_source": {
|
||||
"uploaded_time": "2025-06-11T17:09:07.236482841+08:00",
|
||||
"hostname": "***",
|
||||
"tracer_data": {
|
||||
"victim_process_name": "java",
|
||||
"trigger_memcg_css": "0xff4b8d8be3818000",
|
||||
"victim_container_hostname": "***.docker",
|
||||
"victim_memcg_css": "0xff4b8d8be3818000",
|
||||
"trigger_process_name": "java",
|
||||
"victim_pid": 3218745,
|
||||
"trigger_pid": 3218804,
|
||||
"trigger_container_hostname": "***.docker",
|
||||
"victim_container_id": "***",
|
||||
"trigger_container_id": "***",
|
||||
"tracer_time": "2025-06-11 17:09:07.236 +0800",
|
||||
"tracer_type": "auto",
|
||||
"time": "2025-06-11 17:09:07.236 +0800",
|
||||
"region": "***",
|
||||
"tracer_name": "oom",
|
||||
"es_index_time": 1749632947258
|
||||
},
|
||||
"fields": {
|
||||
"time": [
|
||||
"2025-06-11T09:09:07.236Z"
|
||||
]
|
||||
},
|
||||
"_version": 1,
|
||||
"sort": [
|
||||
1749632947236
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
另外 oom event 还实现了 `Collector` 接口,这样还会通过 Prometheus 统计宿主 oom 发生的次数,并区分宿主机和容器的事件。
|
||||
|
||||
### 内核 softlockup
|
||||
|
||||
**功能介绍**
|
||||
|
||||
softlockup 是 Linux 内核检测到的一种异常状态,指某个 CPU 核心上的内核线程(或进程)长时间占用 CPU 且不调度,导致系统无法正常响应其他任务。如内核代码 bug、cpu 过载、设备驱动问题等都会导致 softlockup。当系统发生 softlockup 时,收集目标进程的信息以及 cpu 信息,获取各个 cpu 上的内核栈信息同时保存问题的发生次数。
|
||||
|
||||
### 进程阻塞
|
||||
|
||||
**功能介绍**
|
||||
|
||||
D 状态进程(也称为不可中断睡眠状态,Uninterruptible)是一种特殊的进程状态,表示进程因等待某些系统资源而阻塞,且不能被信号或外部中断唤醒。常见场景如:磁盘 I/O 操作、内核阻塞、硬件故障等。hungtask 捕获系统内所有 D 状态进程的内核栈并保存 D 进程的数量。用于定位瞬间出现一些 D 进程的场景,可以在现场消失后仍然分析到问题根因。
|
||||
|
||||
**示例**
|
||||
|
||||
```
|
||||
{
|
||||
"_index": "***_2025-06-10",
|
||||
"_type": "_doc",
|
||||
"_id": "8yyOV5cBGoYArUxjSdvr",
|
||||
"_score": 0,
|
||||
"_source": {
|
||||
"uploaded_time": "2025-06-10T09:57:12.202191192+08:00",
|
||||
"hostname": "***",
|
||||
"tracer_data": {
|
||||
"cpus_stack": "2025-06-10 09:57:14 sysrq: Show backtrace of all active CPUs\n2025-06-10 09:57:14 NMI backtrace for cpu 33\n2025-06-10 09:57:14 CPU: 33 PID: 768309 Comm: huatuo-bamai Kdump: loaded Tainted: G S W OEL 5.10.0-216.0.0.115.v1.0.x86_64 #1\n2025-06-10 09:57:14 Hardware name: Inspur SA5212M5/YZMB-00882-104, BIOS 4.1.12 11/27/2019\n2025-06-10 09:57:14 Call Trace:\n2025-06-10 09:57:14 dump_stack+0x57/0x6e\n2025-06-10 09:57:14 nmi_cpu_backtrace.cold.0+0x30/0x65\n2025-06-10 09:57:14 ? lapic_can_unplug_cpu+0x80/0x80\n2025-06-10 09:57:14 nmi_trigger_cpumask_backtrace+0xdf/0xf0\n2025-06-10 09:57:14 arch_trigger_cpumask_backtrace+0x15/0x20\n2025-06-10 09:57:14 sysrq_handle_showallcpus+0x14/0x90\n2025-06-10 09:57:14 __handle_sysrq.cold.8+0x77/0xe8\n2025-06-10 09:57:14 write_sysrq_trigger+0x3d/0x60\n2025-06-10 09:57:14 proc_reg_write+0x38/0x80\n2025-06-10 09:57:14 vfs_write+0xdb/0x250\n2025-06-10 09:57:14 ksys_write+0x59/0xd0\n2025-06-10 09:57:14 do_syscall_64+0x39/0x80\n2025-06-10 09:57:14 entry_SYSCALL_64_after_hwframe+0x62/0xc7\n2025-06-10 09:57:14 RIP: 0033:0x4088ae\n2025-06-10 09:57:14 Code: 48 83 ec 38 e8 13 00 00 00 48 83 c4 38 5d c3 cc cc cc cc cc cc cc cc cc cc cc cc cc 49 89 f2 48 89 fa 48 89 ce 48 89 df 0f 05 <48> 3d 01 f0 ff ff 76 15 48 f7 d8 48 89 c1 48 c7 c0 ff ff ff ff 48\n2025-06-10 09:57:14 RSP: 002b:000000c000adcc60 EFLAGS: 00000212 ORIG_RAX: 0000000000000001\n2025-06-10 09:57:14 RAX: ffffffffffffffda RBX: 0000000000000013 RCX: 00000000004088ae\n2025-06-10 09:57:14 RDX: 0000000000000001 RSI: 000000000274ab18 RDI: 0000000000000013\n2025-06-10 09:57:14 RBP: 000000c000adcca0 R08: 0000000000000000 R09: 0000000000000000\n2025-06-10 09:57:14 R10: 0000000000000000 R11: 0000000000000212 R12: 000000c000adcdc0\n2025-06-10 09:57:14 R13: 0000000000000002 R14: 000000c000caa540 R15: 0000000000000000\n2025-06-10 09:57:14 Sending NMI from CPU 33 to CPUs 0-32,34-95:\n2025-06-10 09:57:14 NMI backtrace for cpu 52 skipped: idling at intel_idle+0x6f/0xc0\n2025-06-10 09:57:14 NMI backtrace for cpu 54 skipped: idling at intel_idle+0x6f/0xc0\n2025-06-10 09:57:14 NMI backtrace for cpu 7 skipped: idling at intel_idle+0x6f/0xc0\n2025-06-10 09:57:14 NMI backtrace for cpu 81 skipped: idling at intel_idle+0x6f/0xc0\n2025-06-10 09:57:14 NMI backtrace for cpu 60 skipped: idling at intel_idle+0x6f/0xc0\n2025-06-10 09:57:14 NMI backtrace for cpu 2 skipped: idling at intel_idle+0x6f/0xc0\n2025-06-10 09:57:14 NMI backtrace for cpu 21 skipped: idling at intel_idle+0x6f/0xc0\n2025-06-10 09:57:14 NMI backtrace for cpu 69 skipped: idling at intel_idle+0x6f/0xc0\n2025-06-10 09:57:14 NMI backtrace for cpu 58 skipped: idling at intel_idle+0x6f/
|
||||
...
|
||||
"pid": 2567042
|
||||
},
|
||||
"tracer_time": "2025-06-10 09:57:12.202 +0800",
|
||||
"tracer_type": "auto",
|
||||
"time": "2025-06-10 09:57:12.202 +0800",
|
||||
"region": "***",
|
||||
"tracer_name": "hungtask",
|
||||
"es_index_time": 1749520632297
|
||||
},
|
||||
"fields": {
|
||||
"time": [
|
||||
"2025-06-10T01:57:12.202Z"
|
||||
]
|
||||
},
|
||||
"_ignored": [
|
||||
"tracer_data.blocked_processes_stack",
|
||||
"tracer_data.cpus_stack"
|
||||
],
|
||||
"_version": 1,
|
||||
"sort": [
|
||||
1749520632202
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
另外 hungtask event 还实现了 `Collector` 接口,这样还会通过 Prometheus 统计宿主 hungtask 发生的次数。
|
||||
|
||||
### 容器、物理机内存回收
|
||||
|
||||
**功能介绍**
|
||||
|
||||
内存压力过大时,如果此时进程申请内存,有可能进入直接回收,此时处于同步回收阶段,可能会造成业务进程的卡顿,在此记录进程进入直接回收的时间,有助于我们判断此进程被直接回收影响的剧烈程度。memreclaim event 计算同一个进程在 1s 周期,若进程处在直接回收状态超过 900ms, 则记录其上下文信息。
|
||||
|
||||
**示例**
|
||||
|
||||
业务容器的 chrome 进程进入直接回收状态,ES 查询输出如下:
|
||||
|
||||
```
|
||||
{
|
||||
"_index": "***_cases_2025-06-11",
|
||||
"_type": "_doc",
|
||||
"_id": "***",
|
||||
"_score": 0,
|
||||
"_source": {
|
||||
"tracer_data": {
|
||||
"comm": "chrome",
|
||||
"deltatime": 1412702917,
|
||||
"pid": 1896137
|
||||
},
|
||||
"container_host_namespace": "***",
|
||||
"container_hostname": "***.docker",
|
||||
"es_index_time": 1749641583290,
|
||||
"uploaded_time": "2025-06-11T19:33:03.26754495+08:00",
|
||||
"hostname": "***",
|
||||
"container_type": "normal",
|
||||
"tracer_time": "2025-06-11 19:33:03.267 +0800",
|
||||
"time": "2025-06-11 19:33:03.267 +0800",
|
||||
"region": "***",
|
||||
"container_level": "102",
|
||||
"container_id": "921d0ec0a20c",
|
||||
"tracer_name": "directreclaim"
|
||||
},
|
||||
"fields": {
|
||||
"time": [
|
||||
"2025-06-11T11:33:03.267Z"
|
||||
]
|
||||
},
|
||||
"_version": 1,
|
||||
"sort": [
|
||||
1749641583267
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### 网络设备状态
|
||||
|
||||
**功能介绍**
|
||||
|
||||
网卡状态变化通常容易造成严重的网络问题,直接影响整机网络质量,如 down/up, MTU 改变等。以 down 状态为例,可能是有权限的进程操作、底层线缆、光模块、对端交换机等问题导致,netdev event 用于检测网络设备的状态变化,目前已实现网卡 down, up 的监控,并区分管理员或底层原因导致的网卡状态变化。
|
||||
|
||||
**示例**
|
||||
|
||||
一次管理员操作导致 eth1 网卡 down 时,ES 查询到事件输出如下:
|
||||
|
||||
```
|
||||
{
|
||||
"_index": "***_cases_2025-05-30",
|
||||
"_type": "_doc",
|
||||
"_id": "***",
|
||||
"_score": 0,
|
||||
"_source": {
|
||||
"uploaded_time": "2025-05-30T17:47:50.406913037+08:00",
|
||||
"hostname": "localhost.localdomain",
|
||||
"tracer_data": {
|
||||
"ifname": "eth1",
|
||||
"start": false,
|
||||
"index": 3,
|
||||
"linkstatus": "linkStatusAdminDown, linkStatusCarrierDown",
|
||||
"mac": "5c:6f:69:34:dc:72"
|
||||
},
|
||||
"tracer_time": "2025-05-30 17:47:50.406 +0800",
|
||||
"tracer_type": "auto",
|
||||
"time": "2025-05-30 17:47:50.406 +0800",
|
||||
"region": "***",
|
||||
"tracer_name": "netdev_event",
|
||||
"es_index_time": 1748598470407
|
||||
},
|
||||
"fields": {
|
||||
"time": [
|
||||
"2025-05-30T09:47:50.406Z"
|
||||
]
|
||||
},
|
||||
"_version": 1,
|
||||
"sort": [
|
||||
1748598470406
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### LACP 协议状态
|
||||
|
||||
**功能介绍**
|
||||
|
||||
Bond 是 Linux 系统内核提供的一种将多个物理网络接口绑定为一个逻辑接口的技术。通过绑定,可以实现带宽叠加、故障切换或负载均衡。LACP 是 IEEE 802.3ad 标准定义的协议,用于动态管理链路聚合组(LAG)。目前没有优雅获取物理机LACP 协议协商异常事件的方法,HUATUO 实现了 lacp event,通过 BPF 在协议关键路径插桩检测到链路聚合状态发生变化时,触发事件记录相关信息。
|
||||
|
||||
**示例**
|
||||
|
||||
在宿主网卡 eth1 出现物理层 down/up 抖动时,lacp 动态协商状态异常,ES 查询输出如下:
|
||||
|
||||
```
|
||||
{
|
||||
"_index": "***_cases_2025-05-30",
|
||||
"_type": "_doc",
|
||||
"_id": "***",
|
||||
"_score": 0,
|
||||
"_source": {
|
||||
"uploaded_time": "2025-05-30T17:47:48.513318579+08:00",
|
||||
"hostname": "***",
|
||||
"tracer_data": {
|
||||
"content": "/proc/net/bonding/bond0\nEthernet Channel Bonding Driver: v4.18.0 (Apr 7, 2025)\n\nBonding Mode: load balancing (round-robin)\nMII Status: down\nMII Polling Interval (ms): 0\nUp Delay (ms): 0\nDown Delay (ms): 0\nPeer Notification Delay (ms): 0\n/proc/net/bonding/bond4\nEthernet Channel Bonding Driver: v4.18.0 (Apr 7, 2025)\n\nBonding Mode: IEEE 802.3ad Dynamic link aggregation\nTransmit Hash Policy: layer3+4 (1)\nMII Status: up\nMII Polling Interval (ms): 100\nUp Delay (ms): 0\nDown Delay (ms): 0\nPeer Notification Delay (ms): 1000\n\n802.3ad info\nLACP rate: fast\nMin links: 0\nAggregator selection policy (ad_select): stable\nSystem priority: 65535\nSystem MAC address: 5c:6f:69:34:dc:72\nActive Aggregator Info:\n\tAggregator ID: 1\n\tNumber of ports: 2\n\tActor Key: 21\n\tPartner Key: 50013\n\tPartner Mac Address: 00:00:5e:00:01:01\n\nSlave Interface: eth0\nMII Status: up\nSpeed: 25000 Mbps\nDuplex: full\nLink Failure Count: 0\nPermanent HW addr: 5c:6f:69:34:dc:72\nSlave queue ID: 0\nSlave active: 1\nSlave sm_vars: 0x172\nAggregator ID: 1\nAggregator active: 1\nActor Churn State: none\nPartner Churn State: none\nActor Churned Count: 0\nPartner Churned Count: 0\ndetails actor lacp pdu:\n system priority: 65535\n system mac address: 5c:6f:69:34:dc:72\n port key: 21\n port priority: 255\n port number: 1\n port state: 63\ndetails partner lacp pdu:\n system priority: 200\n system mac address: 00:00:5e:00:01:01\n oper key: 50013\n port priority: 32768\n port number: 16397\n port state: 63\n\nSlave Interface: eth1\nMII Status: up\nSpeed: 25000 Mbps\nDuplex: full\nLink Failure Count: 17\nPermanent HW addr: 5c:6f:69:34:dc:73\nSlave queue ID: 0\nSlave active: 0\nSlave sm_vars: 0x172\nAggregator ID: 1\nAggregator active: 1\nActor Churn State: monitoring\nPartner Churn State: monitoring\nActor Churned Count: 2\nPartner Churned Count: 2\ndetails actor lacp pdu:\n system priority: 65535\n system mac address: 5c:6f:69:34:dc:72\n port key: 21\n port priority: 255\n port number: 2\n port state: 15\ndetails partner lacp pdu:\n system priority: 200\n system mac address: 00:00:5e:00:01:01\n oper key: 50013\n port priority: 32768\n port number: 32781\n port state: 31\n"
|
||||
},
|
||||
"tracer_time": "2025-05-30 17:47:48.513 +0800",
|
||||
"tracer_type": "auto",
|
||||
"time": "2025-05-30 17:47:48.513 +0800",
|
||||
"region": "***",
|
||||
"tracer_name": "lacp",
|
||||
"es_index_time": 1748598468514
|
||||
},
|
||||
"fields": {
|
||||
"time": [
|
||||
"2025-05-30T09:47:48.513Z"
|
||||
]
|
||||
},
|
||||
"_ignored": [
|
||||
"tracer_data.content"
|
||||
],
|
||||
"_version": 1,
|
||||
"sort": [
|
||||
1748598468513
|
||||
]
|
||||
}
|
||||
```
|
|
@ -1,271 +0,0 @@
|
|||
该文档汇总了当前 v1.0 版本支持的所有的指标,涉及CPU,内存,网络,IO。
|
||||
|
||||
|子系统|指标|描述|单位|统计纬度|指标来源|
|
||||
|-------|-------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------|-----|-------------------------------------------------------------------|
|
||||
|cpu|cpu_util_sys|cpu 系统态利用率|%|宿主|基于 cgroup cpuacct.stat 和 cpuacct.usage 计算|
|
||||
|cpu|cpu_util_usr|cpu 用户态利用率|%|宿主|基于 cgroup cpuacct.stat 和 cpuacct.usage 计算|
|
||||
|cpu|cpu_util_total|容器 cpu 总利用率|%|宿主|基于 cgroup cpuacct.stat 和 cpuacct.usage 计算|
|
||||
|cpu|cpu_util_container_sys|容器 cpu 系统态利用率|%|容器|基于 cgroup cpuacct.stat 和 cpuacct.usage 计算|
|
||||
|cpu|cpu_util_container_usr|容器 cpu 用户态利用率|%|容器|基于 cgroup cpuacct.stat 和 cpuacct.usage 计算|
|
||||
|cpu|cpu_util_container_total|容器 cpu 总利用率|%|容器|基于 cgroup cpuacct.stat 和 cpuacct.usage 计算|
|
||||
|cpu|cpu_stat_container_burst_time|累计墙时(以纳秒为单位),周期内突发超出配额的时间|纳秒(ns)|容器|基于 cpu.stat 读取|
|
||||
|cpu|cpu_stat_container_nr_bursts|周期内突发次数|计数|容器|基于 cpu.stat 读取|
|
||||
|cpu|cpu_stat_container_nr_throttled|cgroup 被 throttled/limited 的次数|计数|容器|基于 cpu.stat 读取|
|
||||
|cpu|cpu_stat_container_exter_wait_rate|容器外进程导致的等待率|%|容器|基于 cpu.stat 读取的 throttled_time hierarchy_wait_sum inner_wait_sum 计算|
|
||||
|cpu|cpu_stat_container_inner_wait_rate|容器内部进程导致的等待率|%|容器|基于 cpu.stat 读取的 throttled_time hierarchy_wait_sum inner_wait_sum 计算|
|
||||
|cpu|cpu_stat_container_throttle_wait_rate|容器被限制而引起的等待率|%|容器|基于 cpu.stat 读取的 throttled_time hierarchy_wait_sum inner_wait_sum 计算|
|
||||
|cpu|cpu_stat_container_wait_rate|总的等待率: exter_wait_rate + inner_wait_rate + throttle_wait_rate|%|容器|基于 cpu.stat 读取的 throttled_time hierarchy_wait_sum inner_wait_sum 计算|
|
||||
|cpu|loadavg_container_container_nr_running|容器中运行的任务数量|计数|容器|从内核通过 netlink 获取|
|
||||
|cpu|loadavg_container_container_nr_uninterruptible|容器中不可中断任务的数量|计数|容器|从内核通过 netlink 获取|
|
||||
|cpu|loadavg_load1|系统过去 1 分钟的平均负载|计数|宿主|procfs|
|
||||
|cpu|loadavg_load5|系统过去 5 分钟的平均负载|计数|宿主|procfs|
|
||||
|cpu|loadavg_load15|系统过去 15 分钟的平均负载|计数|宿主|procfs|
|
||||
|cpu|softirq_latency|在不同时间域发生的 NET_RX/NET_TX 中断延迟次数:<br>0~10 us<br>100us ~ 1ms<br>10us ~ 100us<br>1ms ~ inf|计数|宿主|BPF 软中断埋点统计|
|
||||
|cpu|runqlat_container_nlat_01|容器中进程调度延迟在 0~10 毫秒内的次数|计数|容器|bpf 调度切换埋点统计|
|
||||
|cpu|runqlat_container_nlat_02|容器中进程调度延迟在 10~20 毫秒之间的次数|计数|容器|bpf 调度切换埋点统计|
|
||||
|cpu|runqlat_container_nlat_03|容器中进程调度延迟在 20~50 毫秒之间的次数|计数|容器|bpf 调度切换埋点统计|
|
||||
|cpu|runqlat_container_nlat_04|容器中进程调度延迟超过 50 毫秒的次数|计数|容器|bpf 调度切换埋点统计|
|
||||
|cpu|runqlat_g_nlat_01|宿主中进程调度延迟在范围内 0~10 毫秒的次数|计数|宿主|bpf 调度切换埋点统计|
|
||||
|cpu|runqlat_g_nlat_02|宿主中进程调度延迟在范围内 10~20 毫秒的次数|计数|宿主|bpf 调度切换埋点统计|
|
||||
|cpu|runqlat_g_nlat_03|宿主中进程调度延迟在范围内 20~50 毫秒的次数|计数|宿主|bpf 调度切换埋点统计|
|
||||
|cpu|runqlat_g_nlat_04|宿主中进程调度延迟超过 50 毫秒的次数|计数|宿主|bpf 调度切换埋点统计|
|
||||
|cpu|reschedipi_oversell_probability|vm 中 cpu 超卖检测|0-1|宿主|bpf 调度 ipi 埋点统计|
|
||||
|memory|buddyinfo_blocks|内核伙伴系统内存分配|页计数|宿主|procfs|
|
||||
|memory|memory_events_container_watermark_inc|内存水位计数|计数|容器|memory.events|
|
||||
|memory|memory_events_container_watermark_dec|内存水位计数|计数|容器|memory.events|
|
||||
|memory|memory_others_container_local_direct_reclaim_time|cgroup 中页分配速度|纳秒(ns)|容器|memory.local_direct_reclaim_time|
|
||||
|memory|memory_others_container_directstall_time|直接回收时间|纳秒(ns)|容器|memory.directstall_stat|
|
||||
|memory|memory_others_container_asyncreclaim_time|异步回收时间|纳秒(ns)|容器|memory.asynreclaim_stat|
|
||||
|memory|memory_stat_container_writeback|匿名/文件 cache sync 到磁盘排队字节数|字节(Bytes)|容器|memory.stat|
|
||||
|memory|memory_stat_container_unevictable|无法回收的内存(如 mlocked)|字节(Bytes)|容器|memory.stat|
|
||||
|memory|memory_stat_container_shmem|共享内存字节数|字节(Bytes)|容器|memory.stat|
|
||||
|memory|memory_stat_container_pgsteal_kswapd|kswapd 和 cswapd 回收的内存字节数|字节(Bytes)|容器|memory.stat|
|
||||
|memory|memory_stat_container_pgsteal_globalkswapd|由 kswapd 回收的字节数|字节(Bytes)|容器|memory.stat|
|
||||
|memory|memory_stat_container_pgsteal_globaldirect|过页面分配直接回收的内存字节数|字节(Bytes)|容器|memory.stat|
|
||||
|memory|memory_stat_container_pgsteal_direct|页分配和 try_charge 期间直接回收的内存字节数|字节(Bytes)|容器|memory.stat|
|
||||
|memory|memory_stat_container_pgsteal_cswapd|由 cswapd 回收的字节数|字节(Bytes)|容器|memory.stat|
|
||||
|memory|memory_stat_container_pgscan_kswapd|kswapd 和 cswapd 扫描的内存字节数|字节(Bytes)|容器|memory.stat|
|
||||
|memory|memory_stat_container_pgscan_globalkswapd|kswapd 扫描的内存字节数|字节(Bytes)|容器|memory.stat|
|
||||
|memory|memory_stat_container_pgscan_globaldirect|扫描内存中通过直接回收在页面分配期间的字节数|字节(Bytes)|容器|memory.stat|
|
||||
|memory|memory_stat_container_pgscan_direct|扫描内存的字节数,在页面分配和 try_charge 期间通过直接回收的字节数|字节(Bytes)|容器|memory.stat|
|
||||
|memory|memory_stat_container_pgscan_cswapd|由 cswapd 扫描内存的字节数|字节(Bytes)|容器|memory.stat|
|
||||
|memory|memory_stat_container_pgrefill|内存中扫描的字节数|字节(Bytes)|容器|memory.stat|
|
||||
|memory|memory_stat_container_pgdeactivate|内存中未激活的部分被添加到非活动列表中|字节(Bytes)|容器|memory.stat|
|
||||
|memory|memory_stat_container_inactive_file|文件内存中不活跃的 LRU 列表的字节数|字节(Bytes)|容器|memory.stat|
|
||||
|memory|memory_stat_container_inactive_anon|匿名和交换缓存内存中不活跃的 LRU 列表的字节数|字节(Bytes)|容器|memory.stat|
|
||||
|memory|memory_stat_container_dirty|等待写入磁盘的字节|字节(Bytes)|容器|memory.stat|
|
||||
|memory|memory_stat_container_active_file|活跃内存中文件内存的大小|字节(Bytes)|容器|memory.stat|
|
||||
|memory|memory_stat_container_active_anon|活跃内存中匿名和交换内存的大小|字节(Bytes)|容器|memory.stat|
|
||||
|memory|mountpoint_perm_ro|挂在点是否为只读|布尔(bool)|宿主|procfs|
|
||||
|memory|vmstat_allocstall_normal|宿主在 normal 域直接回收|计数|宿主|/proc/vmstat|
|
||||
|memory|vmstat_allocstall_movable|宿主在 movable 域直接回收|计数|宿主|/proc/vmstat|
|
||||
|memory|vmstat_compact_stall|内存压缩计数|计数|宿主|/proc/vmstat|
|
||||
|memory|vmstat_nr_active_anon|活跃的匿名页数量|页计数|宿主|/proc/vmstat|
|
||||
|memory|vmstat_nr_active_file|活跃的文件页数量|页计数|宿主|/proc/vmstat|
|
||||
|memory|vmstat_nr_boost_pages|kswapd boosting 页数量|页计数|宿主|/proc/vmstat|
|
||||
|memory|vmstat_nr_dirty|脏页数量|页计数|宿主|/proc/vmstat|
|
||||
|memory|vmstat_nr_free_pages|释放的页数量|页计数|宿主|/proc/vmstat|
|
||||
|memory|vmstat_nr_inactive_anon|非活跃的匿名页数量|页计数|宿主|/proc/vmstat|
|
||||
|memory|vmstat_nr_inactive_file|非活跃的文件页数量|页计数|宿主|/proc/vmstat|
|
||||
|memory|vmstat_nr_kswapd_boost|kswapd boosting 次数计数|页计数|宿主|/proc/vmstat|
|
||||
|memory|vmstat_nr_mlock|锁定的页面数量|页计数|宿主|/proc/vmstat|
|
||||
|memory|vmstat_nr_shmem|共享内存页面数|页计数|宿主|/proc/vmstat|
|
||||
|memory|vmstat_nr_slab_reclaimable|可回收的 slab 页数量|页计数|宿主|/proc/vmstat|
|
||||
|memory|vmstat_nr_slab_unreclaimable|无法回收的 slab 页数量|页计数|宿主|/proc/vmstat|
|
||||
|memory|vmstat_nr_unevictable|不可驱逐页面数量|页计数|宿主|/proc/vmstat|
|
||||
|memory|vmstat_nr_writeback|写入页面数|页计数|宿主|/proc/vmstat|
|
||||
|memory|vmstat_numa_pages_migrated|NUMA 迁移中的页面数|页计数|宿主|/proc/vmstat|
|
||||
|memory|vmstat_pgdeactivate|页数被停用进入非活动 LRU|页计数|宿主|/proc/vmstat|
|
||||
|memory|vmstat_pgrefill|扫描的活跃 LRU 页面数|页计数|宿主|/proc/vmstat|
|
||||
|memory|vmstat_pgscan_direct|扫描的页数|页计数|宿主|/proc/vmstat|
|
||||
|memory|vmstat_pgscan_kswapd|扫描的页面数量,由 kswapd 回收的数量|页计数|宿主|/proc/vmstat|
|
||||
|memory|vmstat_pgsteal_direct|直接回收的页面|页计数|宿主|/proc/vmstat|
|
||||
|memory|vmstat_pgsteal_kswapd|被 kswapd 回收的数量|页计数|宿主|/proc/vmstat|
|
||||
|memory|hungtask_counter|hungtask 事件计数|计数|宿主|BPF 埋点统计|
|
||||
|memory|oom_host_counter|oom 事件计数|计数|宿主|BPF 埋点统计|
|
||||
|memory|oom_container_counter|oom 事件计数|计数|容器|BPF 埋点统计|
|
||||
|memory|softlockup_counter|softlockup 事件计数|计数|宿主|BPF 埋点统计|
|
||||
|memory|memory_free_compaction|内存压缩的速度|纳秒(ns)|宿主|bpf 埋点统计|
|
||||
|memory|memory_free_allocstall|内存中主机直接回收速度|纳秒(ns)|宿主|bpf 埋点统计|
|
||||
|memory|memory_cgroup_container_directstall|cgroup 尝试直接回收的计数|计数|容器|bpf 埋点统计|
|
||||
|IO|iolatency_disk_d2c|磁盘访问时的 io 延迟统计,包括驱动程序和硬件组件消耗的时间|计数|宿主|bpf 埋点统计|
|
||||
|IO|iolatency_disk_q2c|磁盘访问整个 I/O 生命周期时的 I/O 延迟统计|计数|宿主|bpf 埋点统计|
|
||||
|IO|iolatency_container_d2c|磁盘访问时的 I/O 延迟统计,包括驱动程序和硬件组件消耗的时间|计数|容器|bpf 埋点统计|
|
||||
|IO|iolatency_container_q2c|磁盘访问整个 I/O 生命周期时的 I/O 延迟统计|计数|容器|bpf 埋点统计|
|
||||
|IO|iolatency_disk_flush|磁盘 RAID 设备刷新操作延迟统计|计数|宿主|bpf 埋点统计|
|
||||
|IO|iolatency_container_flush|磁盘 RAID 设备上由容器引起的刷新操作延迟统计|计数|容器|bpf 埋点统计|
|
||||
|IO|iolatency_disk_freeze|磁盘 freese 事件|计数|宿主|bpf 埋点统计|
|
||||
|network|tcp_mem_limit_pages|系统 TCP 总内存大小限制|页计数|系统|procfs|
|
||||
|network|tcp_mem_usage_bytes|系统使用的 TCP 内存总字节数|字节(Bytes)|系统|tcp_mem_usage_pages \* page_size|
|
||||
|network|tcp_mem_usage_pages|系统使用的 TCP 内存总量|页计数|系统|procfs|
|
||||
|network|tcp_mem_usage_percent|系统使用的 TCP 内存百分比(相对 TCP 内存总限制)|%|系统|tcp_mem_usage_pages / tcp_mem_limit_pages|
|
||||
|network|arp_entries|arp 缓存条目数量|计数|宿主,容器|procfs|
|
||||
|network|arp_total|总 arp 缓存条目数|计数|系统|procfs|
|
||||
|network|qdisc_backlog|待发送的字节数|字节(Bytes)|宿主|netlink qdisc 统计|
|
||||
|network|qdisc_bytes_total|已发送的字节数|字节(Bytes)|宿主|netlink qdisc 统计|
|
||||
|network|qdisc_current_queue_length|排队等待发送的包数量|计数|宿主|netlink qdisc 统计|
|
||||
|network|qdisc_drops_total|丢弃的数据包数量|计数|宿主|netlink qdisc 统计|
|
||||
|network|qdisc_overlimits_total|排队数据包里超限的数量|计数|宿主|netlink qdisc 统计|
|
||||
|network|qdisc_packets_total|已发送的包数量|计数|宿主|netlink qdisc 统计|
|
||||
|network|qdisc_requeues_total|重新入队的数量|计数|宿主|netlink qdisc 统计|
|
||||
|network|ethtool_hardware_rx_dropped_errors|接口接收丢包统计|计数|宿主|硬件驱动相关, 如 mlx, ixgbe, bnxt_en, etc.|
|
||||
|network|netdev_receive_bytes_total|接口接收的字节数|字节(Bytes)|宿主,容器|procfs|
|
||||
|network|netdev_receive_compressed_total|接口接收的压缩包数量|计数|宿主,容器|procfs|
|
||||
|network|netdev_receive_dropped_total|接口接收丢弃的包数量|计数|宿主,容器|procfs|
|
||||
|network|netdev_receive_errors_total|接口接收检测到错误的包数量|计数|宿主,容器|procfs|
|
||||
|network|netdev_receive_fifo_total|接口接收 fifo 缓冲区错误数量|计数|宿主,容器|procfs|
|
||||
|network|netdev_receive_frame_total|接口接收帧对齐错误|计数|宿主,容器|procfs|
|
||||
|network|netdev_receive_multicast_total|多播数据包已接收的包数量,对于硬件接口,此统计通常在设备层计算(与 rx_packets 不同),因此可能包括未到达的数据包|计数|宿主,容器|procfs|
|
||||
|network|netdev_receive_packets_total|接口接收到的有效数据包数量|计数|宿主,容器|procfs|
|
||||
|network|netdev_transmit_bytes_total|接口发送的字节数|字节(Bytes)|宿主,容器|procfs|
|
||||
|network|netdev_transmit_carrier_total|接口发送过程中由于载波丢失导致的帧传输错误数量|计数|宿主,容器|procfs|
|
||||
|network|netdev_transmit_colls_total|接口发送碰撞计数|计数|宿主,容器|procfs|
|
||||
|network|netdev_transmit_compressed_total|接口发送压缩数据包数量|计数|宿主,容器|procfs|
|
||||
|network|netdev_transmit_dropped_total|数据包在传输过程中丢失的数量,如资源不足|计数|宿主,容器|procfs|
|
||||
|network|netdev_transmit_errors_total|发送错误计数|计数|宿主,容器|procfs|
|
||||
|network|netdev_transmit_fifo_total|帧传输错误数量|计数|宿主,容器|procfs|
|
||||
|network|netdev_transmit_packets_total|发送数据包计数|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_ArpFilter|因 ARP 过滤规则而被拒绝的 ARP 请求/响应包数量|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_BusyPollRxPackets|通过 busy polling 机制接收到的网络数据包数量|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_DelayedACKLocked|由于用户态锁住了sock,而无法发送delayed ack的次数|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_DelayedACKLost|当收到已确认的包时,它将被更新。延迟 ACK 丢失可能会引起这个问题,但其他原因也可能触发,例如网络中重复的包。|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_DelayedACKs|延迟的 ACK 定时器已过期。TCP 堆栈将发送一个纯 ACK 数据包并退出延迟 ACK 模式|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_EmbryonicRsts|收到初始 SYN_RECV 套接字的重置|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_IPReversePathFilter|\-|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_ListenDrops|当内核收到客户端的 SYN 请求时,如果 TCP 接受队列已满,内核将丢弃 SYN 并将 TcpExtListenOverflows 加 1。同时,内核也会将 TcpExtListenDrops 加 1。当一个 TCP 套接字处于监听状态,且内核需要丢弃一个数据包时,内核会始终将 TcpExtListenDrops 加 1。因此,增加 TcpExtListenOverflows 会导致 TcpExtListenDrops 同时增加,但 TcpExtListenDrops 也会在没有 TcpExtListenOverflows 增加的情况下增加,例如内存分配失败也会导致 TcpExtListenDrops 增加。|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_ListenOverflows|当内核收到客户端的 SYN 请求时,如果 TCP 接受队列已满,内核将丢弃 SYN 并将 TcpExtListenOverflows 加 1。同时,内核也会将 TcpExtListenDrops 加 1。当一个 TCP 套接字处于监听状态,且内核需要丢弃一个数据包时,内核会始终将 TcpExtListenDrops 加 1。因此,增加 TcpExtListenOverflows 会导致 TcpExtListenDrops 同时增加,但 TcpExtListenDrops 也会在没有 TcpExtListenOverflows 增加的情况下增加,例如内存分配失败也会导致 TcpExtListenDrops 增加。|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_LockDroppedIcmps|由于套接字被锁定,ICMP 数据包被丢弃|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_OfoPruned|协议栈尝试在乱序队列中丢弃数据包|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_OutOfWindowIcmps|ICMP 数据包因超出窗口而被丢弃|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_PAWSActive|数据包在 Syn-Sent 状态被 PAWS 丢弃|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_PAWSEstab|数据包在除 Syn-Sent 之外的所有状态下都会被 PAWS 丢弃|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_PFMemallocDrop|\-|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_PruneCalled|协议栈尝试回收套接字内存。更新此计数器后,将尝试合并乱序队列和接收队列。如果内存仍然不足,将尝试丢弃乱序队列中的数据包(并更新 TcpExtOfoPruned 计数器)。|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_RcvPruned|在从顺序错误的队列中‘collapse’和丢弃数据包后,如果实际使用的内存仍然大于最大允许内存,则此计数器将被更新。这意味着‘prune’失败|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_SyncookiesFailed|MSS 从 SYN cookie 解码出来的无效。当这个计数器更新时,接收到的数据包不会被当作 SYN cookie 处理,并且 TcpExtSyncookiesRecv 计数器不会更新|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_SyncookiesRecv|接收了多少个 SYN cookies 的回复数据包|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_SyncookiesSent|发送了多少个 SYN cookies|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPACKSkippedChallenge|ACK 为 challenge ACK 时,将跳过 ACK|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPACKSkippedFinWait2|ACK 在 Fin-Wait-2 状态被跳过,原因可能是 PAWS 检查失败或接收到的序列号超出窗口|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPACKSkippedPAWS|由于 PAWS(保护包装序列号)检查失败,ACK 被跳过|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPACKSkippedSeq|序列号超出窗口范围,时间戳通过 PAWS 检查,TCP 状态不是 Syn-Recv、Fin-Wait-2 和 Time-Wait|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPACKSkippedSynRecv|ACK 在 Syn-Recv 状态中被跳过。Syn-Recv 状态表示协议栈收到一个 SYN 并回复 SYN+ACK|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPACKSkippedTimeWait|CK 在 Time-Wait 状态中被跳过,原因可能是 PAWS 检查失败或接收到的序列号超出窗口|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPAbortFailed|内核 TCP 层将在满足 RFC2525 2.17 节时发送 RST。如果在处理过程中发生内部错误,TcpExtTCPAbortFailed 将增加|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPAbortOnClose|用户模式程序缓冲区中有数据时关闭的套接字数量|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPAbortOnData|TCP 层有正在传输的数据,但需要关闭连接|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPAbortOnLinger|当 TCP 连接进入 FIN_WAIT_2 状态时,内核不会等待来自另一侧的 fin 包,而是发送 RST 并立即删除套接字|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPAbortOnMemory|当一个应用程序关闭 TCP 连接时,内核仍然需要跟踪该连接,让它完成 TCP 断开过程|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPAbortOnTimeout|此计数器将在任何 TCP 计时器到期时增加。在这种情况下,内核不会发送 RST,而是放弃连接|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPAckCompressed|\-|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPAutoCorking|发送数据包时,TCP 层会尝试将小数据包合并成更大的一个|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPBacklogDrop|\-|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPChallengeACK|challenge ack 发送的数量|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPDSACKIgnoredNoUndo|当 DSACK 块无效时,这两个计数器中的一个将被更新。哪个计数器将被更新取决于 TCP 套接字的 undo_marker 标志|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPDSACKIgnoredOld|当 DSACK 块无效时,这两个计数器中的一个将被更新。哪个计数器将被更新取决于 TCP 套接字的 undo_marker 标志|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPDSACKOfoRecv|收到一个 DSACK,表示收到一个顺序错误的重复数据包|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPDSACKOfoSent|收到一个乱序的重复数据包,因此向发送者发送 DSACK|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPDSACKOldSent|收到一个已确认的重复数据包,因此向发送者发送 DSACK|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPDSACKRecv|收到一个 DSACK,表示收到了一个已确认的重复数据包|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPDSACKUndo|\-|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPDeferAcceptDrop|\-|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPDelivered|\-|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPDeliveredCE|\-|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPFastOpenActive|当 TCP 栈在 SYN-SENT 状态接收到一个 ACK 包,并且 ACK 包确认了 SYN 包中的数据,理解 TFO cookie 已被对方接受,然后它更新这个计数器|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPFastOpenActiveFail|Fast Open 失败|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPFastOpenBlackhole|\-|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPFastOpenCookieReqd|客户端想要请求 TFO cookie 的次数|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPFastOpenListenOverflow|挂起的 Fast Open 请求数量大于 fastopenq->max_qlen 时,协议栈将拒绝 Fast Open 请求并更新此计数器|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPFastOpenPassive|指示 TCP 堆栈接受 Fast Open 请求的次数|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPFastOpenPassiveFail|协议栈拒绝 Fast Open 的次数,这是由于 TFO cookie 无效或 在创建套接字过程中发现错误所引起的|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPFastRetrans|快速重传|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPFromZeroWindowAdv|TCP 接收窗口设置为非零值|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPFullUndo|\-|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPHPAcks|如果数据包设置了 ACK 标志且没有数据,则是一个纯 ACK 数据包,如果内核在快速路径中处理它,TcpExtTCPHPAcks 将增加 1|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPHPHits|如果 TCP 数据包包含数据(这意味着它不是一个纯 ACK 数据包),并且此数据包在快速路径中处理,TcpExtTCPHPHits 将增加 1|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPHystartDelayCwnd|CWND 检测到的包延迟总和。将此值除以 TcpExtTCPHystartDelayDetect,即为通过包延迟检测到的平均 CWND|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPHystartDelayDetect|检测到数据包延迟阈值次数|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPHystartTrainCwnd|TCP Hystart 训练中使用的拥塞窗口大小,将此值除以 TcpExtTCPHystartTrainDetect 得到由 ACK 训练长度检测到的平均 CWND|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPHystartTrainDetect|TCP Hystart 训练检测的次数|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPKeepAlive|此计数器指示已发送的保活数据包。默认情况下不会启用保活功能。用户空间程序可以通过设置 SO_KEEPALIVE 套接字选项来启用它。|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPLossFailures|丢失数据包而进行恢复失败的次数|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPLossProbeRecovery|检测到丢失的数据包恢复的次数|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPLossProbes|TCP 检测到丢失的数据包数量,通常用于检测网络拥塞或丢包|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPLossUndo|TCP重传数据包成功到达目标端口,但之前已经由于超时或拥塞丢失,因此被视为“撤销”丢失的数据包数量|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPLostRetransmit|丢包重传个数|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPMD5Failure|校验错误|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPMD5NotFound|校验错误|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPMD5Unexpected|校验错误|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPMTUPFail|使用 DSACK 无需慢启动即可恢复拥塞窗口|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPMTUPSuccess|\-|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPMemoryPressures|到达 tcp 内存压力位 low 的次数|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPMemoryPressuresChrono|\-|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPMinTTLDrop|\-|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPOFODrop|TCP 层接收到一个乱序的数据包,但内存不足,因此丢弃它。此类数据包不会计入 TcpExtTCPOFOQueue 计数|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPOFOMerge|接收到的顺序错误的包与上一个包有重叠。重叠部分将被丢弃。所有 TcpExtTCPOFOMerge 包也将计入 TcpExtTCPOFOQueue|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPOFOQueue|TCP 层接收到一个乱序的数据包,并且有足够的内存来排队它|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPOrigDataSent|发送原始数据(不包括重传但包括 SYN 中的数据)的包数量。此计数器与 TcpOutSegs 不同,因为 TcpOutSegs 还跟踪纯 ACK。TCPOrigDataSent 更有助于跟踪 TCP 重传率|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPPartialUndo|检测到一些错误的重传,在我们快速重传的同时,收到了部分确认,因此能够部分撤销我们的一些 CWND 减少|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPPureAcks|如果数据包设置了 ACK 标志且没有数据,则是一个纯 ACK 数据包,如果内核在快速路径中处理它,TcpExtTCPHPAcks 将增加 1,如果内核在慢速路径中处理它,TcpExtTCPPureAcks 将增加 1|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPRcvCoalesce|当数据包被 TCP 层接收但未被应用程序读取时,TCP 层会尝试合并它们。这个计数器表示在这种情况下合并了多少个数据包。如果启用了 GRO,GRO 会合并大量数据包,这些数据包不会被计算到 TcpExtTCPRcvCoalesce 中|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPRcvCollapsed|在“崩溃”过程中释放了多少个 skbs|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPRenoFailures|TCP_CA_Disorder 阶段进入并经历 RTO 的重传失败次数|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPRenoRecovery|当拥塞控制进入恢复状态时,如果使用 sack,TcpExtTCPSackRecovery 增加 1,如果不使用 sack,TcpExtTCPRenoRecovery 增加 1。这两个计数器意味着协议栈开始重传丢失的数据包|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPRenoRecoveryFail|进入恢复阶段并 RTO 的连接数|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPRenoReorder|重排序数据包被快速恢复检测到。只有在 SACK 被禁用时才会使用|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPReqQFullDoCookies|\-|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPReqQFullDrop|\-|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPRetransFail|尝试将重传数据包发送到下层,但下层返回错误|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPSACKDiscard|有多少个 SACK 块无效。如果无效的 SACK 块是由 ACK 记录引起的,tcp 栈只会忽略它,而不会更新此计数器|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPSACKReneging|一个数据包被 SACK 确认,但接收方已丢弃此数据包,因此发送方需要重传此数据包|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPSACKReorder|SACK 检测到的重排序数据包|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPSYNChallenge|响应 SYN 数据包发送的 Challenge ack 数|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPSackFailures|TCP_CA_Disorder 阶段进入并经历 RTO 的重传失败次数|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPSackMerged|skb 已合并计数|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPSackRecovery|当拥塞控制进入恢复状态时,如果使用 sack,TcpExtTCPSackRecovery 增加 1,如果不使用 sack,TcpExtTCPRenoRecovery 增加 1。这两个计数器意味着 TCP 栈开始重传丢失的数据包|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPSackRecoveryFail|SACK 恢复失败的次数|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPSackShiftFallback|skb 应该被移动或合并,但由于某些原因,TCP 堆栈没有这样做|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPSackShifted|skb 被移位|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPSlowStartRetrans|重新传输一个数据包,拥塞控制状态为“丢失”|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPSpuriousRTOs|虚假重传超时|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPSpuriousRtxHostQueues|当 TCP 栈想要重传一个数据包,发现该数据包并未在网络中丢失,但数据包尚未发送,TCP 栈将放弃重传并更新此计数器。这可能会发生在数据包在 qdisc 或驱动程序队列中停留时间过长的情况下|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPSynRetrans|SYN 和 SYN/ACK 重传次数,将重传分解为 SYN、快速重传、超时重传等|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPTSReorder|tcp 栈在接收到时间截包而进行乱序包阀值调整的次数|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPTimeWaitOverflow|TIME_WAIT 状态的套接字因超出限制而无法分配的数量|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPTimeouts|TCP 超时事件|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPToZeroWindowAdv|TCP 接收窗口从非零值设置为零|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPWantZeroWindowAdv|根据当前内存使用情况,TCP 栈尝试将接收窗口设置为零。但接收窗口可能仍然是一个非零值|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPWinProbe|定期发送的 ACK 数据包数量,以确保打开窗口的反向 ACK 数据包没有丢失|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TCPWqueueTooBig|\-|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TW|TCP 套接字在快速计时器中完成 time wait 状态|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TWKilled|TCP 套接字在慢速计时器中完成 time wait 状态|计数|宿主,容器|procfs|
|
||||
|network|netstat_TcpExt_TWRecycled|等待套接字通过时间戳回收|计数|宿主,容器|procfs|
|
||||
|network|netstat_Tcp_ActiveOpens|TCP 层发送一个 SYN,进入 SYN-SENT 状态。每当 TcpActiveOpens 增加 1 时,TcpOutSegs 应该始终增加 1|计数|宿主,容器|procfs|
|
||||
|network|netstat_Tcp_AttemptFails|TCP 连接从 SYN-SENT 状态或 SYN-RCVD 状态直接过渡到 CLOSED 状态次数,加上 TCP 连接从 SYN-RCVD 状态直接过渡到 LISTEN 状态次数|计数|宿主,容器|procfs|
|
||||
|network|netstat_Tcp_CurrEstab|TCP 连接数,当前状态为 ESTABLISHED 或 CLOSE-WAIT|计数|宿主,容器|procfs|
|
||||
|network|netstat_Tcp_EstabResets|TCP 连接从 ESTABLISHED 状态或 CLOSE-WAIT 状态直接过渡到 CLOSED 状态次数|计数|宿主,容器|procfs|
|
||||
|network|netstat_Tcp_InCsumErrors|TCP 校验和错误|计数|宿主,容器|procfs|
|
||||
|network|netstat_Tcp_InErrs|错误接收到的段总数(例如,错误的 TCP 校验和)|计数|宿主,容器|procfs|
|
||||
|network|netstat_Tcp_InSegs|TCP 层接收到的数据包数量。如 RFC1213 所述,包括接收到的错误数据包,如校验和错误、无效 TCP 头等|计数|宿主,容器|procfs|
|
||||
|network|netstat_Tcp_MaxConn|可以支持的总 TCP 连接数限制,在最大连接数动态的实体中,此对象应包含值-1|计数|宿主,容器|procfs|
|
||||
|network|netstat_Tcp_OutRsts|TCP 段中包含 RST 标志的数量|计数|宿主,容器|procfs|
|
||||
|network|netstat_Tcp_OutSegs|发送的总段数,包括当前连接上的段,但不包括仅包含重传字节的段|计数|宿主,容器|procfs|
|
||||
|network|netstat_Tcp_PassiveOpens|TCP 连接从监听状态直接过渡到 SYN-RCVD 状态的次数|计数|宿主,容器|procfs|
|
||||
|network|netstat_Tcp_RetransSegs|总重传段数 - 即包含一个或多个先前已传输字节的 TCP 段传输的数量|计数|宿主,容器|procfs|
|
||||
|network|netstat_Tcp_RtoAlgorithm|The algorithm used to determine the timeout value used for retransmitting unacknowledged octets|计数|宿主,容器|procfs|
|
||||
|network|netstat_Tcp_RtoMax|TCP 实现允许的重传超时最大值,以毫秒为单位|毫秒|宿主,容器|procfs|
|
||||
|network|netstat_Tcp_RtoMin|TCP 实现允许的重传超时最小值,以毫秒为单位|毫秒|宿主,容器|procfs|
|
||||
|network|sockstat_FRAG_inuse|\-|计数|宿主,容器|procfs|
|
||||
|network|sockstat_FRAG_memory|\-|页计数|宿主,容器|procfs|
|
||||
|network|sockstat_RAW_inuse|使用的 RAW 套接字数量|计数|宿主,容器|procfs|
|
||||
|network|sockstat_TCP_alloc|TCP 已分配的套接字数量|计数|宿主,容器|procfs|
|
||||
|network|sockstat_TCP_inuse|已建立的 TCP 套接字数量|计数|宿主,容器|procfs|
|
||||
|network|sockstat_TCP_mem|系统使用的 TCP 内存总量|页计数|系统|procfs|
|
||||
|network|sockstat_TCP_mem_bytes|系统使用的 TCP 内存总量|字节(Bytes)|系统|sockstat_TCP_mem \* page_size|
|
||||
|network|sockstat_TCP_orphan|TCP 等待关闭的连接数|计数|宿主,容器|procfs|
|
||||
|network|sockstat_TCP_tw|TCP 套接字终止数量|计数|宿主,容器|procfs|
|
||||
|network|sockstat_UDPLITE_inuse|\-|计数|宿主,容器|procfs|
|
||||
|network|sockstat_UDP_inuse|使用的 UDP 套接字数量|计数|宿主,容器|procfs|
|
||||
|network|sockstat_UDP_mem|系统使用的 UDP 内存总量|页计数|系统|procfs|
|
||||
|network|sockstat_UDP_mem_bytes|系统使用的 UDP 内存字节数总和|字节(Bytes)|系统|sockstat_UDP_mem \* page_size|
|
||||
|network|sockstat_sockets_used|系统使用 socket 数量|计数|系统|procfs|
|
Before Width: | Height: | Size: 111 KiB |
Before Width: | Height: | Size: 111 KiB |
70
go.mod
|
@ -3,6 +3,7 @@ module huatuo-bamai
|
|||
go 1.22.4
|
||||
|
||||
require (
|
||||
git.xiaojukeji.com/kernel/huatuo v1.3.0
|
||||
github.com/cilium/ebpf v0.16.0
|
||||
github.com/containerd/cgroups/v3 v3.0.3
|
||||
github.com/deckarep/golang-set v1.8.0
|
||||
|
@ -13,7 +14,10 @@ require (
|
|||
github.com/gin-gonic/gin v1.10.0
|
||||
github.com/go-playground/validator/v10 v10.22.1
|
||||
github.com/google/cadvisor v0.50.0
|
||||
github.com/gopacket/gopacket v1.2.0
|
||||
github.com/grafana/grafana-plugin-sdk-go v0.251.0
|
||||
github.com/grafana/pyroscope v1.7.1
|
||||
github.com/grafana/pyroscope/api v0.4.0
|
||||
github.com/jsimonetti/rtnetlink v1.4.2
|
||||
github.com/mdlayher/netlink v1.7.2
|
||||
github.com/opencontainers/runtime-spec v1.2.0
|
||||
|
@ -21,24 +25,28 @@ require (
|
|||
github.com/pkg/errors v0.9.1
|
||||
github.com/prometheus/client_golang v1.20.3
|
||||
github.com/prometheus/procfs v0.15.1
|
||||
github.com/safchain/ethtool v0.4.1
|
||||
github.com/shirou/gopsutil v2.21.11+incompatible
|
||||
github.com/sirupsen/logrus v1.9.3
|
||||
github.com/tklauser/numcpus v0.6.1
|
||||
github.com/tidwall/gjson v1.14.2
|
||||
github.com/urfave/cli/v2 v2.27.4
|
||||
github.com/vishvananda/netlink v1.3.0
|
||||
github.com/vishvananda/netns v0.0.4
|
||||
golang.org/x/net v0.31.0
|
||||
golang.org/x/sys v0.27.0
|
||||
golang.org/x/time v0.6.0
|
||||
gopkg.in/natefinch/lumberjack.v2 v2.2.1
|
||||
k8s.io/api v0.31.3
|
||||
k8s.io/cri-client v0.31.3
|
||||
k8s.io/kubelet v0.29.0
|
||||
sigs.k8s.io/yaml v1.5.0
|
||||
)
|
||||
|
||||
require (
|
||||
connectrpc.com/connect v1.16.2 // indirect
|
||||
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect
|
||||
github.com/Microsoft/go-winio v0.6.2 // indirect
|
||||
github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9 // indirect
|
||||
github.com/apache/arrow/go/v15 v15.0.2 // indirect
|
||||
github.com/armon/go-metrics v0.4.1 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/blang/semver/v4 v4.0.0 // indirect
|
||||
github.com/bytedance/sonic v1.12.4 // indirect
|
||||
|
@ -48,39 +56,67 @@ require (
|
|||
github.com/cheekybits/genny v1.0.0 // indirect
|
||||
github.com/cloudwego/base64x v0.1.4 // indirect
|
||||
github.com/cloudwego/iasm v0.2.0 // indirect
|
||||
github.com/coreos/go-semver v0.3.0 // indirect
|
||||
github.com/coreos/go-systemd/v22 v22.5.0 // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
|
||||
github.com/dennwc/varint v1.0.0 // indirect
|
||||
github.com/distribution/reference v0.6.0 // indirect
|
||||
github.com/docker/go-connections v0.5.0 // indirect
|
||||
github.com/docker/go-units v0.5.0 // indirect
|
||||
github.com/dustin/go-humanize v1.0.1 // indirect
|
||||
github.com/fatih/color v1.15.0 // indirect
|
||||
github.com/felixge/httpsnoop v1.0.4 // indirect
|
||||
github.com/fxamacker/cbor/v2 v2.7.0 // indirect
|
||||
github.com/gabriel-vasile/mimetype v1.4.6 // indirect
|
||||
github.com/gin-contrib/sse v0.1.0 // indirect
|
||||
github.com/go-kit/log v0.2.1 // indirect
|
||||
github.com/go-logfmt/logfmt v0.6.0 // indirect
|
||||
github.com/go-logr/logr v1.4.2 // indirect
|
||||
github.com/go-logr/stdr v1.2.2 // indirect
|
||||
github.com/go-ole/go-ole v1.2.6 // indirect
|
||||
github.com/go-openapi/swag v0.22.9 // indirect
|
||||
github.com/go-playground/locales v0.14.1 // indirect
|
||||
github.com/go-playground/universal-translator v0.18.1 // indirect
|
||||
github.com/goccy/go-json v0.10.3 // indirect
|
||||
github.com/godbus/dbus/v5 v5.0.6 // indirect
|
||||
github.com/gogo/googleapis v1.4.1 // indirect
|
||||
github.com/gogo/protobuf v1.3.2 // indirect
|
||||
github.com/gogo/status v1.1.1 // indirect
|
||||
github.com/golang/protobuf v1.5.4 // indirect
|
||||
github.com/golang/snappy v0.0.4 // indirect
|
||||
github.com/google/btree v1.1.2 // indirect
|
||||
github.com/google/flatbuffers v23.5.26+incompatible // indirect
|
||||
github.com/google/go-cmp v0.6.0 // indirect
|
||||
github.com/google/gofuzz v1.2.0 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/gorilla/mux v1.8.1 // indirect
|
||||
github.com/grafana/dskit v0.0.0-20231221015914-de83901bf4d6 // indirect
|
||||
github.com/grafana/regexp v0.0.0-20221123153739-15dc172cd2db // indirect
|
||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect
|
||||
github.com/inconshreveable/mousetrap v1.1.0 // indirect
|
||||
github.com/hashicorp/consul/api v1.28.2 // indirect
|
||||
github.com/hashicorp/errwrap v1.1.0 // indirect
|
||||
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
|
||||
github.com/hashicorp/go-hclog v1.6.3 // indirect
|
||||
github.com/hashicorp/go-immutable-radix v1.3.1 // indirect
|
||||
github.com/hashicorp/go-msgpack v1.1.5 // indirect
|
||||
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
||||
github.com/hashicorp/go-rootcerts v1.0.2 // indirect
|
||||
github.com/hashicorp/go-sockaddr v1.0.6 // indirect
|
||||
github.com/hashicorp/golang-lru v0.6.0 // indirect
|
||||
github.com/hashicorp/memberlist v0.5.0 // indirect
|
||||
github.com/hashicorp/serf v0.10.1 // indirect
|
||||
github.com/josharian/native v1.1.0 // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/klauspost/compress v1.17.9 // indirect
|
||||
github.com/klauspost/cpuid/v2 v2.2.9 // indirect
|
||||
github.com/leodido/go-urn v1.4.0 // indirect
|
||||
github.com/mattetti/filebuffer v1.0.1 // indirect
|
||||
github.com/mattn/go-colorable v0.1.13 // indirect
|
||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||
github.com/mattn/go-runewidth v0.0.14 // indirect
|
||||
github.com/mdlayher/socket v0.4.1 // indirect
|
||||
github.com/miekg/dns v1.1.58 // indirect
|
||||
github.com/mitchellh/go-homedir v1.1.0 // indirect
|
||||
github.com/mitchellh/mapstructure v1.5.0 // indirect
|
||||
github.com/moby/docker-image-spec v1.3.1 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
|
@ -88,22 +124,36 @@ require (
|
|||
github.com/olekukonko/tablewriter v0.0.5 // indirect
|
||||
github.com/opencontainers/go-digest v1.0.0 // indirect
|
||||
github.com/opencontainers/image-spec v1.1.0 // indirect
|
||||
github.com/opentracing-contrib/go-stdlib v1.0.0 // indirect
|
||||
github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b // indirect
|
||||
github.com/pelletier/go-toml/v2 v2.2.3 // indirect
|
||||
github.com/pierrec/lz4/v4 v4.1.18 // indirect
|
||||
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
|
||||
github.com/prometheus/client_model v0.6.1 // indirect
|
||||
github.com/prometheus/common v0.55.0 // indirect
|
||||
github.com/prometheus/prometheus v0.51.2 // indirect
|
||||
github.com/rivo/uniseg v0.4.3 // indirect
|
||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||
github.com/spf13/cobra v1.8.1 // indirect
|
||||
github.com/samber/lo v1.38.1 // indirect
|
||||
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 // indirect
|
||||
github.com/spf13/pflag v1.0.5 // indirect
|
||||
github.com/tidwall/match v1.1.1 // indirect
|
||||
github.com/tidwall/pretty v1.2.0 // indirect
|
||||
github.com/tklauser/go-sysconf v0.3.12 // indirect
|
||||
github.com/tklauser/numcpus v0.6.1 // indirect
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
||||
github.com/uber/jaeger-client-go v2.30.0+incompatible // indirect
|
||||
github.com/uber/jaeger-lib v2.4.1+incompatible // indirect
|
||||
github.com/ugorji/go/codec v1.2.12 // indirect
|
||||
github.com/vishvananda/netns v0.0.4 // indirect
|
||||
github.com/valyala/bytebufferpool v1.0.0 // indirect
|
||||
github.com/x448/float16 v0.8.4 // indirect
|
||||
github.com/xlab/treeprint v1.2.0 // indirect
|
||||
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
|
||||
github.com/yusufpapurcu/wmi v1.2.4 // indirect
|
||||
github.com/zeebo/xxh3 v1.0.2 // indirect
|
||||
go.etcd.io/etcd/api/v3 v3.5.7 // indirect
|
||||
go.etcd.io/etcd/client/pkg/v3 v3.5.7 // indirect
|
||||
go.etcd.io/etcd/client/v3 v3.5.7 // indirect
|
||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.53.0 // indirect
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 // indirect
|
||||
go.opentelemetry.io/otel v1.29.0 // indirect
|
||||
|
@ -113,12 +163,13 @@ require (
|
|||
go.opentelemetry.io/otel/sdk v1.29.0 // indirect
|
||||
go.opentelemetry.io/otel/trace v1.29.0 // indirect
|
||||
go.opentelemetry.io/proto/otlp v1.3.1 // indirect
|
||||
go.yaml.in/yaml/v2 v2.4.2 // indirect
|
||||
go.uber.org/atomic v1.11.0 // indirect
|
||||
go.uber.org/multierr v1.11.0 // indirect
|
||||
go.uber.org/zap v1.26.0 // indirect
|
||||
golang.org/x/arch v0.12.0 // indirect
|
||||
golang.org/x/crypto v0.29.0 // indirect
|
||||
golang.org/x/exp v0.0.0-20240823005443-9b4947da3948 // indirect
|
||||
golang.org/x/mod v0.20.0 // indirect
|
||||
golang.org/x/net v0.31.0 // indirect
|
||||
golang.org/x/oauth2 v0.23.0 // indirect
|
||||
golang.org/x/sync v0.9.0 // indirect
|
||||
golang.org/x/text v0.20.0 // indirect
|
||||
|
@ -131,7 +182,6 @@ require (
|
|||
gopkg.in/inf.v0 v0.9.1 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
gotest.tools/v3 v3.5.1 // indirect
|
||||
k8s.io/apimachinery v0.31.3 // indirect
|
||||
k8s.io/client-go v0.31.3 // indirect
|
||||
k8s.io/component-base v0.31.3 // indirect
|
||||
|
|
283
go.sum
|
@ -1,13 +1,35 @@
|
|||
connectrpc.com/connect v1.16.2 h1:ybd6y+ls7GOlb7Bh5C8+ghA6SvCBajHwxssO2CGFjqE=
|
||||
connectrpc.com/connect v1.16.2/go.mod h1:n2kgwskMHXC+lVqb18wngEpF95ldBHXjZYJussz5FRc=
|
||||
git.xiaojukeji.com/kernel/huatuo v1.3.0 h1:dCtjHnQg+2b2SEhXi3AuEWbdH3sC0j70xaDCvuOdFGs=
|
||||
git.xiaojukeji.com/kernel/huatuo v1.3.0/go.mod h1:oMnjctv7Dp754Vz1cZm5/k/8Eke0I2DijQNzBul3bTc=
|
||||
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0=
|
||||
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
|
||||
github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8=
|
||||
github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
|
||||
github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=
|
||||
github.com/HdrHistogram/hdrhistogram-go v1.1.2 h1:5IcZpTvzydCQeHzK4Ef/D5rrSqwxob0t8PQPMybUNFM=
|
||||
github.com/HdrHistogram/hdrhistogram-go v1.1.2/go.mod h1:yDgFjdqOqDEKOvasDdhWNXYg9BVp4O+o5f6V/ehm6Oo=
|
||||
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
|
||||
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
|
||||
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
||||
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
||||
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
||||
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
||||
github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9 h1:ez/4by2iGztzR4L0zgAOR8lTQK9VlyBVVd7G4omaOQs=
|
||||
github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9/go.mod h1:OMCwj8VM1Kc9e19TLln2VL61YJF0x1XFtfdL4JdbSyE=
|
||||
github.com/apache/arrow/go/v15 v15.0.2 h1:60IliRbiyTWCWjERBCkO1W4Qun9svcYoZrSLcyOsMLE=
|
||||
github.com/apache/arrow/go/v15 v15.0.2/go.mod h1:DGXsR3ajT524njufqf95822i+KTh+yea1jass9YXgjA=
|
||||
github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
|
||||
github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY=
|
||||
github.com/armon/go-metrics v0.4.1 h1:hR91U9KYmb6bLBYLQjyM+3j+rcd/UhE+G78SFnF8gJA=
|
||||
github.com/armon/go-metrics v0.4.1/go.mod h1:E6amYzXo6aW1tqzoZGT755KkbgrJsSdpwZ+3JqfkOG4=
|
||||
github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
|
||||
github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
|
||||
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
|
||||
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
|
||||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
|
||||
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
|
||||
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
|
||||
github.com/bytedance/sonic v1.12.4 h1:9Csb3c9ZJhfUWeMtpCDCq6BUoH5ogfDFLUgQ/jG+R0k=
|
||||
|
@ -17,6 +39,7 @@ github.com/bytedance/sonic/loader v0.2.1 h1:1GgorWTqf12TA8mma4DDSbaQigE2wOgQo7iC
|
|||
github.com/bytedance/sonic/loader v0.2.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
|
||||
github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
|
||||
github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
|
||||
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
||||
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/cheekybits/genny v1.0.0 h1:uGGa4nei+j20rOSeDeP5Of12XVm7TGUd4dJA9RDitfE=
|
||||
|
@ -25,6 +48,8 @@ github.com/chromedp/cdproto v0.0.0-20220208224320-6efb837e6bc2 h1:XCdvHbz3LhewBH
|
|||
github.com/chromedp/cdproto v0.0.0-20220208224320-6efb837e6bc2/go.mod h1:At5TxYYdxkbQL0TSefRjhLE3Q0lgvqKKMSFUglJ7i1U=
|
||||
github.com/cilium/ebpf v0.16.0 h1:+BiEnHL6Z7lXnlGUsXQPPAE7+kenAd4ES8MQ5min0Ok=
|
||||
github.com/cilium/ebpf v0.16.0/go.mod h1:L7u2Blt2jMM/vLAVgjxluxtBKlz3/GWjB0dMOEngfwE=
|
||||
github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag=
|
||||
github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I=
|
||||
github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
|
||||
github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
|
||||
github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
|
||||
|
@ -33,6 +58,8 @@ github.com/containerd/cgroups/v3 v3.0.3 h1:S5ByHZ/h9PMe5IOQoN7E+nMc2UcLEM/V48DGD
|
|||
github.com/containerd/cgroups/v3 v3.0.3/go.mod h1:8HBe7V3aWGLFPd/k03swSIsGjZhHI2WzJmticMgVuz0=
|
||||
github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
|
||||
github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=
|
||||
github.com/coreos/go-semver v0.3.0 h1:wkHLiw0WNATZnSG7epLsujiMCgPAc9xhjJ4tgnAxmfM=
|
||||
github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
|
||||
github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs=
|
||||
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4=
|
||||
|
@ -43,6 +70,8 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1
|
|||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/deckarep/golang-set v1.8.0 h1:sk9/l/KqpunDwP7pSjUg0keiOOLEnOBHzykLrsPppp4=
|
||||
github.com/deckarep/golang-set v1.8.0/go.mod h1:5nI87KwE7wgsBU1F4GKAw2Qod7p5kyS383rP6+o6qqo=
|
||||
github.com/dennwc/varint v1.0.0 h1:kGNFFSSw8ToIy3obO/kKr8U9GZYUAxQEVuix4zfDWzE=
|
||||
github.com/dennwc/varint v1.0.0/go.mod h1:hnItb35rvZvJrbTALZtY/iQfDs48JKRG1RPpgziApxA=
|
||||
github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
|
||||
github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
|
||||
github.com/docker/docker v27.2.0+incompatible h1:Rk9nIVdfH3+Vz4cyI/uhbINhEZ/oLmc+CBXmH6fbNk4=
|
||||
|
@ -51,14 +80,21 @@ github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj
|
|||
github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc=
|
||||
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
|
||||
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
|
||||
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
|
||||
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
|
||||
github.com/elastic/go-elasticsearch/v7 v7.17.10 h1:TCQ8i4PmIJuBunvBS6bwT2ybzVFxxUhhltAs3Gyu1yo=
|
||||
github.com/elastic/go-elasticsearch/v7 v7.17.10/go.mod h1:OJ4wdbtDNk5g503kvlHLyErCgQwwzmDtaFC4XyOxXA4=
|
||||
github.com/elazarl/goproxy v0.0.0-20230731152917-f99041a5c027 h1:1L0aalTpPz7YlMxETKpmQoWMBkeiuorElZIXoNmgiPE=
|
||||
github.com/elazarl/goproxy v0.0.0-20230731152917-f99041a5c027/go.mod h1:Ro8st/ElPeALwNFlcTpWmkr6IoMFfkjXAvTHpevnDsM=
|
||||
github.com/ema/qdisc v1.0.0 h1:EHLG08FVRbWLg8uRICa3xzC9Zm0m7HyMHfXobWFnXYg=
|
||||
github.com/ema/qdisc v1.0.0/go.mod h1:FhIc0fLYi7f+lK5maMsesDqwYojIOh3VfRs8EVd5YJQ=
|
||||
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
|
||||
github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU=
|
||||
github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk=
|
||||
github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs=
|
||||
github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw=
|
||||
github.com/felixge/fgprof v0.9.4-0.20221116204635-ececf7638e93 h1:S8ZdFFDRXUKs3fHpMDPVh9oWd46hKqEEt/X3oxhtF5Q=
|
||||
github.com/felixge/fgprof v0.9.4-0.20221116204635-ececf7638e93/go.mod h1:RdbpDgzqYVh/T9fPELJyV7EYJuHB55UTEULNun8eiPw=
|
||||
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
|
||||
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
|
||||
github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E=
|
||||
|
@ -73,6 +109,14 @@ github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE
|
|||
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
|
||||
github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
|
||||
github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
|
||||
github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
|
||||
github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
|
||||
github.com/go-kit/log v0.2.1 h1:MRVx0/zhvdseW+Gza6N9rVzU/IVzaeE1SFI4raAhmBU=
|
||||
github.com/go-kit/log v0.2.1/go.mod h1:NwTd00d/i8cPZ3xOwwiv2PO5MOcx78fFErGNcVmBjv0=
|
||||
github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
|
||||
github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
|
||||
github.com/go-logfmt/logfmt v0.6.0 h1:wGYYu3uicYdqXVgoYbvnkrPVXkuLM1p1ifugDMEdRi4=
|
||||
github.com/go-logfmt/logfmt v0.6.0/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs=
|
||||
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
|
||||
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
|
||||
|
@ -94,49 +138,122 @@ github.com/go-playground/validator/v10 v10.22.1 h1:40JcKH+bBNGFczGuoBYgX4I6m/i27
|
|||
github.com/go-playground/validator/v10 v10.22.1/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
|
||||
github.com/go-quicktest/qt v1.101.0 h1:O1K29Txy5P2OK0dGo59b7b0LR6wKfIhttaAhHUyn7eI=
|
||||
github.com/go-quicktest/qt v1.101.0/go.mod h1:14Bz/f7NwaXPtdYEgzsx46kqSxVwTbzVZsDC26tQJow=
|
||||
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
|
||||
github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
|
||||
github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
|
||||
github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA=
|
||||
github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
|
||||
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||
github.com/godbus/dbus/v5 v5.0.6 h1:mkgN1ofwASrYnJ5W6U/BxG15eXXXjirgZc7CLqkcaro=
|
||||
github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||
github.com/gogo/googleapis v0.0.0-20180223154316-0cd9801be74a/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s=
|
||||
github.com/gogo/googleapis v1.4.1 h1:1Yx4Myt7BxzvUr5ldGSbwYiZG6t9wGBZ+8/fX3Wvtq0=
|
||||
github.com/gogo/googleapis v1.4.1/go.mod h1:2lpHqI5OcWCtVElxXnPt+s8oJvMpySlOyM6xDCrzib4=
|
||||
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
|
||||
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
|
||||
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
|
||||
github.com/gogo/status v1.1.1 h1:DuHXlSFHNKqTQ+/ACf5Vs6r4X/dH2EgIzR9Vr+H65kg=
|
||||
github.com/gogo/status v1.1.1/go.mod h1:jpG3dM5QPcqu19Hg8lkUhBFBa3TcLs1DG7+2Jqci7oU=
|
||||
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
|
||||
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
|
||||
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
|
||||
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
|
||||
github.com/google/btree v1.1.2 h1:xf4v41cLI2Z6FxbKm+8Bu+m8ifhj15JuZ9sa0jZCMUU=
|
||||
github.com/google/btree v1.1.2/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4=
|
||||
github.com/google/cadvisor v0.50.0 h1:7w/hKIbJKBWqQsRTy+Hpj2vj+fnxrLXcEXFy+LW0Bsg=
|
||||
github.com/google/cadvisor v0.50.0/go.mod h1:VxCDwZalpFyENvmfabFqaIGsqNKLtDzE62a19rfVTB8=
|
||||
github.com/google/flatbuffers v23.5.26+incompatible h1:M9dgRyhJemaM4Sw8+66GHBu8ioaQmyPLg1b8VwK5WJg=
|
||||
github.com/google/flatbuffers v23.5.26+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
|
||||
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
||||
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
|
||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||
github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
|
||||
github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||
github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af h1:kmjWCqn2qkEml422C2Rrd27c3VGxi6a/6HNq8QmHRKM=
|
||||
github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/gopacket/gopacket v1.2.0 h1:eXbzFad7f73P1n2EJHQlsKuvIMJjVXK5tXoSca78I3A=
|
||||
github.com/gopacket/gopacket v1.2.0/go.mod h1:BrAKEy5EOGQ76LSqh7DMAr7z0NNPdczWm2GxCG7+I8M=
|
||||
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
|
||||
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
|
||||
github.com/grafana/dskit v0.0.0-20231221015914-de83901bf4d6 h1:Z78JZ7pa6InQ5BcMB27M+NMTZ7LV+MXgOd3dZPfEdG4=
|
||||
github.com/grafana/dskit v0.0.0-20231221015914-de83901bf4d6/go.mod h1:kkWM4WUV230bNG3urVRWPBnSJHs64y/0RmWjftnnn0c=
|
||||
github.com/grafana/grafana-plugin-sdk-go v0.251.0 h1:gnOtxrC/1rqFvpSbQYyoZqkr47oWDlz4Q2L6Ozmsi3w=
|
||||
github.com/grafana/grafana-plugin-sdk-go v0.251.0/go.mod h1:gCGN9kHY3KeX4qyni3+Kead38Q+85pYOrsDcxZp6AIk=
|
||||
github.com/grafana/otel-profiling-go v0.5.1 h1:stVPKAFZSa7eGiqbYuG25VcqYksR6iWvF3YH66t4qL8=
|
||||
github.com/grafana/otel-profiling-go v0.5.1/go.mod h1:ftN/t5A/4gQI19/8MoWurBEtC6gFw8Dns1sJZ9W4Tls=
|
||||
github.com/grafana/pyroscope v1.7.1 h1:aGXOVNwUGXK3dNSpc40/IOtOG/ACvaS2C/mJ7jUxMFg=
|
||||
github.com/grafana/pyroscope v1.7.1/go.mod h1:RuSiNg8N9iufpHbScIFU4kU4LbWHaU7G1knyVDw/V5s=
|
||||
github.com/grafana/pyroscope-go v1.0.3 h1:8WWmItzLfg4m8G+j//ElSjMeMr88Y6Lvblar6qeTyKk=
|
||||
github.com/grafana/pyroscope-go/godeltaprof v0.1.8 h1:iwOtYXeeVSAeYefJNaxDytgjKtUuKQbJqgAIjlnicKg=
|
||||
github.com/grafana/pyroscope-go/godeltaprof v0.1.8/go.mod h1:2+l7K7twW49Ct4wFluZD3tZ6e0SjanjcUUBPVD/UuGU=
|
||||
github.com/grafana/pyroscope/api v0.4.0 h1:J86DxoNeLOvtJhB1Cn65JMZkXe682D+RqeoIUiYc/eo=
|
||||
github.com/grafana/pyroscope/api v0.4.0/go.mod h1:MFnZNeUM4RDsDOnbgKW3GWoLSBpLzMMT9nkvhHHo81o=
|
||||
github.com/grafana/regexp v0.0.0-20221123153739-15dc172cd2db h1:7aN5cccjIqCLTzedH7MZzRZt5/lsAHch6Z3L2ZGn5FA=
|
||||
github.com/grafana/regexp v0.0.0-20221123153739-15dc172cd2db/go.mod h1:M5qHK+eWfAv8VR/265dIuEpL3fNfeC21tXXp9itM24A=
|
||||
github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1 h1:qnpSQwGEnkcRpTqNOIR6bJbR0gAorgP9CSALpRcKoAA=
|
||||
github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1/go.mod h1:lXGCsh6c22WGtjr+qGHj1otzZpV/1kwTMAqkwZsnWRU=
|
||||
github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.1.0 h1:pRhl55Yx1eC7BZ1N+BBWwnKaMyD8uC+34TLdndZMAKk=
|
||||
github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.1.0/go.mod h1:XKMd7iuf/RGPSMJ/U4HP0zS2Z9Fh8Ps9a+6X26m/tmI=
|
||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0=
|
||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k=
|
||||
github.com/hashicorp/consul/api v1.28.2 h1:mXfkRHrpHN4YY3RqL09nXU1eHKLNiuAN4kHvDQ16k/8=
|
||||
github.com/hashicorp/consul/api v1.28.2/go.mod h1:KyzqzgMEya+IZPcD65YFoOVAgPpbfERu4I/tzG6/ueE=
|
||||
github.com/hashicorp/consul/sdk v0.16.0 h1:SE9m0W6DEfgIVCJX7xU+iv/hUl4m/nxqMTnCdMxDpJ8=
|
||||
github.com/hashicorp/consul/sdk v0.16.0/go.mod h1:7pxqqhqoaPqnBnzXD1StKed62LqJeClzVsUEy85Zr0A=
|
||||
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
|
||||
github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
|
||||
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
|
||||
github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
|
||||
github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ=
|
||||
github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
|
||||
github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k=
|
||||
github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M=
|
||||
github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=
|
||||
github.com/hashicorp/go-immutable-radix v1.3.1 h1:DKHmCUm2hRBK510BaiZlwvpD40f8bJFeZnpfm2KLowc=
|
||||
github.com/hashicorp/go-immutable-radix v1.3.1/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=
|
||||
github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM=
|
||||
github.com/hashicorp/go-msgpack v1.1.5 h1:9byZdVjKTe5mce63pRVNP1L7UAmdHOTEMGehn6KvJWs=
|
||||
github.com/hashicorp/go-msgpack v1.1.5/go.mod h1:gWVc3sv/wbDmR3rQsj1CAktEZzoz1YNK9NfGLXJ69/4=
|
||||
github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk=
|
||||
github.com/hashicorp/go-multierror v1.1.0/go.mod h1:spPvp8C1qA32ftKqdAHm4hHTbPw+vmowP0z+KUhOZdA=
|
||||
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
|
||||
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
|
||||
github.com/hashicorp/go-plugin v1.6.1 h1:P7MR2UP6gNKGPp+y7EZw2kOiq4IR9WiqLvp0XOsVdwI=
|
||||
github.com/hashicorp/go-plugin v1.6.1/go.mod h1:XPHFku2tFo3o3QKFgSYo+cghcUhw1NA1hZyMK0PWAw0=
|
||||
github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs=
|
||||
github.com/hashicorp/go-rootcerts v1.0.2 h1:jzhAVGtqPKbwpyCPELlgNWhE1znq+qwJtW5Oi2viEzc=
|
||||
github.com/hashicorp/go-rootcerts v1.0.2/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8=
|
||||
github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU=
|
||||
github.com/hashicorp/go-sockaddr v1.0.6 h1:RSG8rKU28VTUTvEKghe5gIhIQpv8evvNpnDEyqO4u9I=
|
||||
github.com/hashicorp/go-sockaddr v1.0.6/go.mod h1:uoUUmtwU7n9Dv3O4SNLeFvg0SxQ3lyjsj6+CCykpaxI=
|
||||
github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4=
|
||||
github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
|
||||
github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
|
||||
github.com/hashicorp/go-uuid v1.0.3 h1:2gKiV6YVmrJ1i2CKKa9obLvRieoRGviZFL26PcT/Co8=
|
||||
github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
|
||||
github.com/hashicorp/go-version v1.6.0 h1:feTTfFNnjP967rlCxM/I9g701jU+RN74YKx2mOkIeek=
|
||||
github.com/hashicorp/go-version v1.6.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
|
||||
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
|
||||
github.com/hashicorp/golang-lru v0.6.0 h1:uL2shRDx7RTrOrTCUZEGP/wJUFiUI8QT6E7z5o8jga4=
|
||||
github.com/hashicorp/golang-lru v0.6.0/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
|
||||
github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64=
|
||||
github.com/hashicorp/mdns v1.0.4/go.mod h1:mtBihi+LeNXGtG8L9dX59gAEa12BDtBQSp4v/YAJqrc=
|
||||
github.com/hashicorp/memberlist v0.5.0 h1:EtYPN8DpAURiapus508I4n9CzHs2W+8NZGbmmR/prTM=
|
||||
github.com/hashicorp/memberlist v0.5.0/go.mod h1:yvyXLpo0QaGE59Y7hDTsTzDD25JYBZ4mHgHUZ8lrOI0=
|
||||
github.com/hashicorp/serf v0.10.1 h1:Z1H2J60yRKvfDYAOZLd2MU0ND4AH/WDz7xYHDWQsIPY=
|
||||
github.com/hashicorp/serf v0.10.1/go.mod h1:yL2t6BqATOLGc5HF7qbFkTfXoPIY0WZdWHfEvMqbG+4=
|
||||
github.com/hashicorp/yamux v0.1.1 h1:yrQxtgseBDrq9Y652vSRDvsKCJKOUD+GzTS4Y0Y8pvE=
|
||||
github.com/hashicorp/yamux v0.1.1/go.mod h1:CtWFDAQgb7dxtzFs4tWbplKIe2jSi3+5vKbgIO0SLnQ=
|
||||
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
|
||||
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
|
||||
github.com/invopop/yaml v0.2.0 h1:7zky/qH+O0DwAyoobXUqvVBwgBFRxKoQ/3FjcVpjTMY=
|
||||
github.com/invopop/yaml v0.2.0/go.mod h1:2XuRLgs/ouIrW3XNzuNj7J3Nvu/Dig5MXvbCEdiBN3Q=
|
||||
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
|
||||
|
@ -147,8 +264,11 @@ github.com/jsimonetti/rtnetlink v1.4.2 h1:Df9w9TZ3npHTyDn0Ev9e1uzmN2odmXd0QX+J5G
|
|||
github.com/jsimonetti/rtnetlink v1.4.2/go.mod h1:92s6LJdE+1iOrw+F2/RO7LYI2Qd8pPpFNNUYW06gcoM=
|
||||
github.com/jsimonetti/rtnetlink/v2 v2.0.1 h1:xda7qaHDSVOsADNouv7ukSuicKZO7GgVUCXxpaIEIlM=
|
||||
github.com/jsimonetti/rtnetlink/v2 v2.0.1/go.mod h1:7MoNYNbb3UaDHtF8udiJo/RH6VsTKP1pqKLUTVCvToE=
|
||||
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
|
||||
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
||||
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
|
||||
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
|
||||
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
||||
github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
|
||||
|
@ -157,8 +277,13 @@ github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa02
|
|||
github.com/klauspost/cpuid/v2 v2.2.9 h1:66ze0taIn2H33fBvCkXuv9BmCwDfafmiIVpKV9kKGuY=
|
||||
github.com/klauspost/cpuid/v2 v2.2.9/go.mod h1:rqkxqrZ1EhYM9G+hXH7YdowN5R5RGN6NK4QwQ3WMXF8=
|
||||
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
|
||||
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
||||
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
|
||||
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
|
||||
|
@ -171,19 +296,41 @@ github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0
|
|||
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
|
||||
github.com/mattetti/filebuffer v1.0.1 h1:gG7pyfnSIZCxdoKq+cPa8T0hhYtD9NxCdI4D7PTjRLM=
|
||||
github.com/mattetti/filebuffer v1.0.1/go.mod h1:YdMURNDOttIiruleeVr6f56OrMc+MydEnTcXwtkxNVs=
|
||||
github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
|
||||
github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
|
||||
github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
|
||||
github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
|
||||
github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
|
||||
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
|
||||
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
|
||||
github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
|
||||
github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
|
||||
github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE=
|
||||
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
|
||||
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
|
||||
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
|
||||
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
||||
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
|
||||
github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
|
||||
github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
|
||||
github.com/mdlayher/netlink v1.7.2 h1:/UtM3ofJap7Vl4QWCPDGXY8d3GIY2UGSDbK+QWmY8/g=
|
||||
github.com/mdlayher/netlink v1.7.2/go.mod h1:xraEF7uJbxLhc5fpHL4cPe221LI2bdttWlU+ZGLfQSw=
|
||||
github.com/mdlayher/socket v0.4.1 h1:eM9y2/jlbs1M615oshPQOHZzj6R6wMT7bX5NPiQvn2U=
|
||||
github.com/mdlayher/socket v0.4.1/go.mod h1:cAqeGjoufqdxWkD7DkpyS+wcefOtmu5OQ8KuoJGIReA=
|
||||
github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso=
|
||||
github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI=
|
||||
github.com/miekg/dns v1.1.58 h1:ca2Hdkz+cDg/7eNF6V56jjzuZ4aCAE+DbVkILdQWG/4=
|
||||
github.com/miekg/dns v1.1.58/go.mod h1:Ypv+3b/KadlvW9vJfXOTf300O4UqaHFzFCuHz+rPkBY=
|
||||
github.com/mitchellh/cli v1.1.0/go.mod h1:xcISNoH86gajksDmfB23e/pu+B+GeFRMYmoHXxx3xhI=
|
||||
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
|
||||
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
|
||||
github.com/mitchellh/go-testing-interface v1.14.1 h1:jrgshOhYAUVNMAJiKbEu7EqAwgJJ2JqpQmpLJOu07cU=
|
||||
github.com/mitchellh/go-testing-interface v1.14.1/go.mod h1:gfgS7OtZj6MA4U1UrDRp04twqAjfvlZyCfX3sDjEym8=
|
||||
github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
|
||||
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
|
||||
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
|
||||
github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
|
||||
github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
|
||||
github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
|
||||
|
@ -191,6 +338,8 @@ github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3
|
|||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
|
||||
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
||||
github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 h1:RWengNIwukTxcDr9M+97sNutRR1RKhG96O6jWumTTnw=
|
||||
|
@ -199,16 +348,29 @@ github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
|
|||
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
||||
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
|
||||
github.com/oklog/run v1.1.0 h1:GEenZ1cK0+q0+wsJew9qUg/DyD8k3JzYsZAi5gYi2mA=
|
||||
github.com/oklog/run v1.1.0/go.mod h1:sVPdnTZT1zYwAJeCMu2Th4T21pA3FPOQRfWjQlk7DVU=
|
||||
github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
|
||||
github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
|
||||
github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA=
|
||||
github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To=
|
||||
github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk=
|
||||
github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0=
|
||||
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
|
||||
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
|
||||
github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug=
|
||||
github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM=
|
||||
github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk=
|
||||
github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
|
||||
github.com/opentracing-contrib/go-stdlib v1.0.0 h1:TBS7YuVotp8myLon4Pv7BtCBzOTo1DeZCld0Z63mW2w=
|
||||
github.com/opentracing-contrib/go-stdlib v1.0.0/go.mod h1:qtI1ogk+2JhVPIXVc6q+NHziSmy2W5GbdQZFUHADCBU=
|
||||
github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
|
||||
github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b h1:FfH+VrHHk6Lxt9HdVS0PXzSXFyS2NbZKXv33FYPol0A=
|
||||
github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b/go.mod h1:AC62GU6hc0BrNm+9RK9VSiwa/EUe1bkIeFORAMcHvJU=
|
||||
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
|
||||
github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY=
|
||||
github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
|
||||
github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8=
|
||||
github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
|
||||
github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNHvL12M=
|
||||
|
@ -217,19 +379,38 @@ github.com/perimeterx/marshmallow v1.1.5 h1:a2LALqQ1BlHM8PZblsDdidgv1mWi1DgC2UmX
|
|||
github.com/perimeterx/marshmallow v1.1.5/go.mod h1:dsXbUu8CRzfYP5a87xpp0xq9S3u0Vchtcl8we9tYaXw=
|
||||
github.com/pierrec/lz4/v4 v4.1.18 h1:xaKrnTkyoqfh1YItXl56+6KJNVYWlEEPuAQW9xsplYQ=
|
||||
github.com/pierrec/lz4/v4 v4.1.18/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
|
||||
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo=
|
||||
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
|
||||
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI=
|
||||
github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSgv7Sy7s/s=
|
||||
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
|
||||
github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
|
||||
github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU=
|
||||
github.com/prometheus/client_golang v1.20.3 h1:oPksm4K8B+Vt35tUhw6GbSNSgVlVSBH0qELP/7u83l4=
|
||||
github.com/prometheus/client_golang v1.20.3/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
|
||||
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
|
||||
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
||||
github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
||||
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
|
||||
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
|
||||
github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
|
||||
github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4=
|
||||
github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc=
|
||||
github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8=
|
||||
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
|
||||
github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
|
||||
github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A=
|
||||
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
|
||||
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
|
||||
github.com/prometheus/prometheus v0.51.2 h1:U0faf1nT4CB9DkBW87XLJCBi2s8nwWXdTbyzRUAkX0w=
|
||||
github.com/prometheus/prometheus v0.51.2/go.mod h1:yv4MwOn3yHMQ6MZGHPg/U7Fcyqf+rxqiZfSur6myVtc=
|
||||
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
|
||||
github.com/rivo/uniseg v0.4.3 h1:utMvzDsuh3suAEnhH0RdHmoPbU648o6CvXxTx4SBMOw=
|
||||
github.com/rivo/uniseg v0.4.3/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
|
||||
|
@ -237,30 +418,54 @@ github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU
|
|||
github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
|
||||
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
|
||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
|
||||
github.com/safchain/ethtool v0.4.1 h1:S6mEleTADqgynileXoiapt/nKnatyR6bmIHoF+h2ADo=
|
||||
github.com/safchain/ethtool v0.4.1/go.mod h1:XLLnZmy4OCRTkksP/UiMjij96YmIsBfmBQcs7H6tA48=
|
||||
github.com/samber/lo v1.38.1 h1:j2XEAqXKb09Am4ebOg31SpvzUTTs6EN3VfgeLUhPdXM=
|
||||
github.com/samber/lo v1.38.1/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA=
|
||||
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I=
|
||||
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
|
||||
github.com/shirou/gopsutil v2.21.11+incompatible h1:lOGOyCG67a5dv2hq5Z1BLDUqqKp3HkbjPcz5j6XMS0U=
|
||||
github.com/shirou/gopsutil v2.21.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
|
||||
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
|
||||
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
|
||||
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
|
||||
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
||||
github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
|
||||
github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
|
||||
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
|
||||
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
|
||||
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
|
||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals=
|
||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
|
||||
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/tidwall/gjson v1.14.2 h1:6BBkirS0rAHjumnjHF6qgy5d2YAJ1TLIaFE2lzfOLqo=
|
||||
github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
|
||||
github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
|
||||
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
|
||||
github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
|
||||
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
|
||||
github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU=
|
||||
github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI=
|
||||
github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk=
|
||||
github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY=
|
||||
github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM=
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
|
||||
github.com/uber/jaeger-client-go v2.30.0+incompatible h1:D6wyKGCecFaSRUpo8lCVbaOOb6ThwMmTEbhRwtKR97o=
|
||||
github.com/uber/jaeger-client-go v2.30.0+incompatible/go.mod h1:WVhlPFC8FDjOFMMWRy2pZqQJSXxYSwNYOkTr/Z6d3Kk=
|
||||
github.com/uber/jaeger-lib v2.4.1+incompatible h1:td4jdvLcExb4cBISKIpHuGoVXh+dVKhn2Um6rjCsSsg=
|
||||
github.com/uber/jaeger-lib v2.4.1+incompatible/go.mod h1:ComeNDZlWwrWnDv8aPp0Ba6+uUTzImX/AauajbLI56U=
|
||||
github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
|
||||
github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
|
||||
github.com/unknwon/bra v0.0.0-20200517080246-1e3013ecaff8 h1:aVGB3YnaS/JNfOW3tiHIlmNmTDg618va+eT0mVomgyI=
|
||||
|
@ -273,12 +478,16 @@ github.com/urfave/cli v1.22.15 h1:nuqt+pdC/KqswQKhETJjo7pvn/k4xMUxgW6liI7XpnM=
|
|||
github.com/urfave/cli v1.22.15/go.mod h1:wSan1hmo5zeyLGBjRJbzRTNk8gwoYa2B9n4q9dmRIc0=
|
||||
github.com/urfave/cli/v2 v2.27.4 h1:o1owoI+02Eb+K107p27wEX9Bb8eqIoZCfLXloLUSWJ8=
|
||||
github.com/urfave/cli/v2 v2.27.4/go.mod h1:m4QzxcD2qpra4z7WhzEGn74WZLViBnMpb1ToCAKdGRQ=
|
||||
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
|
||||
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
|
||||
github.com/vishvananda/netlink v1.3.0 h1:X7l42GfcV4S6E4vHTsw48qbrV+9PVojNfIhZcwQdrZk=
|
||||
github.com/vishvananda/netlink v1.3.0/go.mod h1:i6NetklAujEcC6fK0JPjT8qSwWyO0HLn4UKG+hGqeJs=
|
||||
github.com/vishvananda/netns v0.0.4 h1:Oeaw1EM2JMxD51g9uhtC0D7erkIjgmj8+JZc26m1YX8=
|
||||
github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
|
||||
github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
|
||||
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
|
||||
github.com/xlab/treeprint v1.2.0 h1:HzHnuAF1plUN2zGlAFHbSQP2qJ0ZAD3XF5XD7OesXRQ=
|
||||
github.com/xlab/treeprint v1.2.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0=
|
||||
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4=
|
||||
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM=
|
||||
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||
|
@ -289,6 +498,12 @@ github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ=
|
|||
github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0=
|
||||
github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0=
|
||||
github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA=
|
||||
go.etcd.io/etcd/api/v3 v3.5.7 h1:sbcmosSVesNrWOJ58ZQFitHMdncusIifYcrBfwrlJSY=
|
||||
go.etcd.io/etcd/api/v3 v3.5.7/go.mod h1:9qew1gCdDDLu+VwmeG+iFpL+QlpHTo7iubavdVDgCAA=
|
||||
go.etcd.io/etcd/client/pkg/v3 v3.5.7 h1:y3kf5Gbp4e4q7egZdn5T7W9TSHUvkClN6u+Rq9mEOmg=
|
||||
go.etcd.io/etcd/client/pkg/v3 v3.5.7/go.mod h1:o0Abi1MK86iad3YrWhgUsbGx1pmTS+hrORWc2CamuhY=
|
||||
go.etcd.io/etcd/client/v3 v3.5.7 h1:u/OhpiuCgYY8awOHlhIhmGIGpxfBU/GZBUP3m/3/Iz4=
|
||||
go.etcd.io/etcd/client/v3 v3.5.7/go.mod h1:sOWmj9DZUMyAngS7QQwCyAXXAL6WhgTOPLNS/NabQgw=
|
||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.53.0 h1:9G6E0TXzGFVfTnawRzrPl83iHOAV7L8NJiR8RSGYV1g=
|
||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.53.0/go.mod h1:azvtTADFQJA8mX80jIH/akaE7h+dbm/sVuaHqN13w74=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.53.0 h1:IVtyPth4Rs5P8wIf0mP2KVKFNTJ4paX9qQ4Hkh5gFdc=
|
||||
|
@ -315,15 +530,19 @@ go.opentelemetry.io/otel/trace v1.29.0 h1:J/8ZNK4XgR7a21DZUAsbF8pZ5Jcw1VhACmnYt3
|
|||
go.opentelemetry.io/otel/trace v1.29.0/go.mod h1:eHl3w0sp3paPkYstJOmAimxhiFXPg+MMTlEh3nsQgWQ=
|
||||
go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0=
|
||||
go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8=
|
||||
go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE=
|
||||
go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0=
|
||||
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
|
||||
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
|
||||
go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
|
||||
go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
|
||||
go.yaml.in/yaml/v3 v3.0.3 h1:bXOww4E/J3f66rav3pX3m8w6jDE4knZjGOw8b5Y6iNE=
|
||||
go.yaml.in/yaml/v3 v3.0.3/go.mod h1:tBHosrYAkRZjRAOREWbDnBXUf08JOwYq++0QNwQiWzI=
|
||||
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
|
||||
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
|
||||
go.uber.org/zap v1.26.0 h1:sI7k6L95XOKS281NhVKOFCUNIvv9e0w4BF8N3u+tCRo=
|
||||
go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so=
|
||||
golang.org/x/arch v0.12.0 h1:UsYJhbzPYGsT0HbEdmYcqtCv8UNGvnaL561NnIUvaKg=
|
||||
golang.org/x/arch v0.12.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
|
||||
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20190923035154-9ee001bba392/go.mod h1:/lpIB1dKB+9EgE3H3cr1v9wB50oz8l4C4h62xy7jSTY=
|
||||
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
||||
golang.org/x/crypto v0.29.0 h1:L5SG1JTTXupVV3n6sUqMTeWbjAyfPwoda2DLX8J8FrQ=
|
||||
|
@ -334,39 +553,73 @@ golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
|||
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0=
|
||||
golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20190923162816-aa69164e4478/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
|
||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||
golang.org/x/net v0.0.0-20210410081132-afb366fc7cd1/go.mod h1:9tjilg8BloeKEkVJvy7fQ90B1CfIiPueXVOjqfkSzI8=
|
||||
golang.org/x/net v0.31.0 h1:68CPQngjLL0r2AlUKiSxtQFKvzRVbnzLwMUn5SzcLHo=
|
||||
golang.org/x/net v0.31.0/go.mod h1:P4fl1q7dY2hnZFxEk4pPSkDHF+QqjitcnDjUQyMM+pM=
|
||||
golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs=
|
||||
golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
|
||||
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ=
|
||||
golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190922100055-0a153f010e69/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190924154521-2837fb4f24fe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210303074136-134d130e1a04/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s=
|
||||
golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug=
|
||||
golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4=
|
||||
golang.org/x/time v0.6.0 h1:eTDhh4ZXt5Qf0augr54TN6suAUudPcawVZeIAPU7D4U=
|
||||
golang.org/x/time v0.6.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20190424220101-1e8e1cfdf96b/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
|
||||
golang.org/x/tools v0.0.0-20190907020128-2ca718005c18/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
|
||||
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
|
||||
|
@ -380,15 +633,19 @@ golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 h1:H2TDz8ibqkAF6YGhCdN3j
|
|||
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8=
|
||||
gonum.org/v1/gonum v0.12.0 h1:xKuo6hzt+gMav00meVPUlXwSdoEJP46BR+wdxQEFK2o=
|
||||
gonum.org/v1/gonum v0.12.0/go.mod h1:73TDxJfAAHeA8Mk9mf8NlIppyhQNo5GLTcYeqgo2lvY=
|
||||
google.golang.org/genproto v0.0.0-20180518175338-11a468237815/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20240822170219-fc7c04adadcd h1:BBOTEWLuuEGQy9n1y9MhVJ9Qt0BDu21X8qZs71/uPZo=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20240822170219-fc7c04adadcd/go.mod h1:fO8wJzT2zbQbAjbIoos1285VfEIYKDDY+Dt+WpTkh6g=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240814211410-ddb44dafa142 h1:e7S5W7MGGLaSu8j3YjdezkZ+m1/Nm0uRVRMEMGk26Xs=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240814211410-ddb44dafa142/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU=
|
||||
google.golang.org/grpc v1.12.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw=
|
||||
google.golang.org/grpc v1.66.0 h1:DibZuoBznOxbDQxRINckZcUvnCEvrW9pcWIE2yF9r1c=
|
||||
google.golang.org/grpc v1.66.0/go.mod h1:s3/l6xSSCURdVfAnL+TqCNMyTDAGN6+lZeVxnZR128Y=
|
||||
google.golang.org/protobuf v1.35.2 h1:8Ar7bF+apOIoThw1EdZl0p1oWvMqTHmpA2fRTyZO8io=
|
||||
google.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
|
||||
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
||||
gopkg.in/fsnotify/fsnotify.v1 v1.4.7 h1:XNNYLJHt73EyYiCZi6+xjupS9CpvmiDgjPTAjrBlQbo=
|
||||
|
@ -397,6 +654,10 @@ gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
|
|||
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
|
||||
gopkg.in/natefinch/lumberjack.v2 v2.2.1 h1:bBRl1b0OH9s/DuPhuXpNl+VtCaJXFZ5/uEFST95x9zc=
|
||||
gopkg.in/natefinch/lumberjack.v2 v2.2.1/go.mod h1:YD8tP3GAjkrDg1eZH7EGmyESg/lsYskCTPBJVb9jqSc=
|
||||
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
||||
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
||||
|
@ -419,8 +680,6 @@ k8s.io/cri-client v0.31.3 h1:9ZwddaNJomqkTBYQqSmB+Ccns3beY4HyYDwmRtWTCJM=
|
|||
k8s.io/cri-client v0.31.3/go.mod h1:klbWiYkOatOQOkXOYZMZMGSTM8q9eC/efsYGuXcgPes=
|
||||
k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
|
||||
k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
|
||||
k8s.io/kubelet v0.29.0 h1:SX5hlznTBcGIrS1scaf8r8p6m3e475KMifwt9i12iOk=
|
||||
k8s.io/kubelet v0.29.0/go.mod h1:kvKS2+Bz2tgDOG1S1q0TH2z1DasNuVF+8p6Aw7xvKkI=
|
||||
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A=
|
||||
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
|
||||
nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
|
||||
|
@ -428,5 +687,5 @@ sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMm
|
|||
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0=
|
||||
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4=
|
||||
sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08=
|
||||
sigs.k8s.io/yaml v1.5.0 h1:M10b2U7aEUY6hRtU870n2VTPgR5RZiL/I6Lcc2F4NUQ=
|
||||
sigs.k8s.io/yaml v1.5.0/go.mod h1:wZs27Rbxoai4C0f8/9urLZtZtF3avA3gKvGyPdDqTO4=
|
||||
sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
|
||||
sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY=
|
||||
|
|
|
@ -3,8 +3,19 @@ LogLevel = "Info"
|
|||
# logging filepath
|
||||
# LogFile = ""
|
||||
|
||||
# the blacklist for tracing and metrics
|
||||
Blacklist = ["softlockup", "ethtool"]
|
||||
[APIServer]
|
||||
# TCPAddr is the tcp monitoring information of the huatuo-bamai server
|
||||
TCPAddr = ":19704"
|
||||
|
||||
[HuaTuoConf]
|
||||
UserName = "huatuo-bamai"
|
||||
PassWord = "huatuo-bamai"
|
||||
UnixAddr = "/home/xiaoju/didicloud/huatuo/shared/huatuo.unix"
|
||||
ServerIP = "127.0.0.1:12735"
|
||||
KeepaliveTimeout = 300
|
||||
APIVersion = "v1.3"
|
||||
ReqTimeout = 15
|
||||
|
||||
|
||||
[RuntimeCgroup]
|
||||
LimitInitCPU = 0.5
|
||||
|
@ -14,12 +25,13 @@ Blacklist = ["softlockup", "ethtool"]
|
|||
|
||||
# storage configurations
|
||||
[Storage]
|
||||
# disable ES storage if one of Address, Username, Password empty.
|
||||
# ES & Kibana configurations
|
||||
[Storage.ES]
|
||||
Address = "http://127.0.0.1:9200"
|
||||
Username = "elastic"
|
||||
Password = "huatuo-bamai"
|
||||
Index = "huatuo_bamai"
|
||||
# disable ES storage if one of Address, Username, Password empty.
|
||||
Address = "http://10.88.128.149:30963"
|
||||
Username = "20416"
|
||||
Password = "E4haxbsIUPiUZES"
|
||||
Index = "cn_huatuo_relay_cases"
|
||||
|
||||
# tracer's record data
|
||||
# Path: all but the last element of path for per tracer
|
||||
|
@ -30,78 +42,84 @@ Blacklist = ["softlockup", "ethtool"]
|
|||
RotationSize = 100
|
||||
MaxRotation = 10
|
||||
|
||||
[TaskConfig]
|
||||
MaxRunningTask = 10
|
||||
|
||||
[Tracing]
|
||||
[Tracing.Cpuidle]
|
||||
CgUserth = 75 #75%
|
||||
CgDeltaUserth = 30 #30%
|
||||
CgSysth = 45 #45%
|
||||
CgDeltaSysth = 0 #0
|
||||
CgUsageth = 90 #90%
|
||||
CgDeltaUsageth = 30 #30%
|
||||
CgStep = 10 #10s
|
||||
CgGrace = 1800 #1800s
|
||||
CgUsageToolduration = 10 #10s
|
||||
[Tracing.Cpusys]
|
||||
CPUSysth = 50 #50%
|
||||
CPUSysDelta = 30 #30%
|
||||
CPUSysStep = 1 #1s
|
||||
CPUSysToolduration = 10 #10s
|
||||
[Tracing.Waitrate]
|
||||
[Tracing.Waitrate.SpikeThreshold]
|
||||
"0" = 50.0
|
||||
"101" = 80.0
|
||||
"102" = 120.0
|
||||
"103" = 170.0
|
||||
"1" = 220.0
|
||||
"2" = 270.0
|
||||
"3" = 320.0
|
||||
"4" = 370.0
|
||||
[Tracing.Waitrate.SlopeThreshold]
|
||||
"0" = 0.05
|
||||
"101" = 0.1
|
||||
"102" = 0.2
|
||||
"103" = 0.3
|
||||
"1" = 0.4
|
||||
"2" = 0.5
|
||||
"3" = 0.6
|
||||
"4" = 0.7
|
||||
[Tracing.Waitrate.SampleConfig]
|
||||
# DataSetCapability * SampleInterval is time capability (in seconds)
|
||||
DataSetCapability = 360
|
||||
# Seconds
|
||||
OnceCaptureTime = 15
|
||||
SampleInterval = 5
|
||||
[Tracing.Softirq]
|
||||
ThresholdTime = 100000000
|
||||
[Tracing.Dload]
|
||||
ThresholdLoad = 5.0
|
||||
MonitorGap = 180
|
||||
[Tracing.IOTracing]
|
||||
IOScheduleThreshold = 100 #100ms
|
||||
ReadThreshold = 2000 #MB/s
|
||||
WriteThreshold = 1500 #MB/s
|
||||
IOutilThreshold = 90 #90%
|
||||
IOwaitThreshold = 100 #100ms
|
||||
PeriodSecond = 8
|
||||
MaxStackNumber = 16
|
||||
TopProcessCount = 15
|
||||
TopFilesPerProcess = 10
|
||||
[Tracing.MemoryReclaim]
|
||||
Deltath = 900000000 #900ms
|
||||
[Tracing.MemoryBurst]
|
||||
HistoryWindowLength = 60
|
||||
SampleInterval = 5 # seconds
|
||||
SilencePeriod = 300 # seconds
|
||||
TopNProcesses = 10
|
||||
BurstRatio = 2.0
|
||||
AnonThreshold = 70 # percent
|
||||
# the latency threshold for package receive
|
||||
[Tracing.NetRecvLat]
|
||||
ToNetIf = 5 # ms, from driver to a core recv
|
||||
ToTCPV4 = 10 # ms, from driver to TCP recv, contains ToNetIf
|
||||
ToUserCopy = 115 # ms, from driver to user recv, contains ToNetIf + ToUserCopy
|
||||
IgnoreHost = true # whether to ignore the host process
|
||||
IgnoreContainerLevel = [103, 3, 4]
|
||||
# blacklist
|
||||
BlackList = ["softlockup"]
|
||||
|
||||
[Tracing.Cpuidle]
|
||||
CgUserth = 75 #75%
|
||||
CgDeltaUserth = 30 #30%
|
||||
CgSysth = 45 #45%
|
||||
CgDeltaSysth = 0 #0
|
||||
CgUsageth = 90 #90%
|
||||
CgDeltaUsageth = 30 #30%
|
||||
CgStep = 10 #10s
|
||||
CgGrace = 1800 #1800s
|
||||
CgUsageToolduration = 10 #10s
|
||||
[Tracing.Cpusys]
|
||||
CPUSysth = 50 #50%
|
||||
CPUSysDelta = 30 #30%
|
||||
CPUSysStep = 1 #1s
|
||||
CPUSysToolduration = 10 #10s
|
||||
[Tracing.Waitrate]
|
||||
[Tracing.Waitrate.SpikeThreshold]
|
||||
"0" = 50.0
|
||||
"101" = 80.0
|
||||
"102" = 120.0
|
||||
"103" = 170.0
|
||||
"1" = 220.0
|
||||
"2" = 270.0
|
||||
"3" = 320.0
|
||||
"4" = 370.0
|
||||
[Tracing.Waitrate.SlopeThreshold]
|
||||
"0" = 0.05
|
||||
"101" = 0.1
|
||||
"102" = 0.2
|
||||
"103" = 0.3
|
||||
"1" = 0.4
|
||||
"2" = 0.5
|
||||
"3" = 0.6
|
||||
"4" = 0.7
|
||||
[Tracing.Waitrate.SampleConfig]
|
||||
# DataSetCapability * SampleInterval is time capability (in seconds)
|
||||
DataSetCapability = 360
|
||||
# Seconds
|
||||
OnceCaptureTime = 15
|
||||
SampleInterval = 5
|
||||
[Tracing.Softirq]
|
||||
ThresholdTime = 100000000
|
||||
[Tracing.Dload]
|
||||
ThresholdLoad = 5.0
|
||||
MonitorGap = 180
|
||||
[Tracing.IOTracing]
|
||||
IOScheduleThreshold = 100 #100ms
|
||||
ReadThreshold = 2000 #MB/s
|
||||
WriteThreshold = 1500 #MB/s
|
||||
IOutilThreshold = 90 #90%
|
||||
IOwaitThreshold = 100 #100ms
|
||||
PeriodSecond = 8
|
||||
MaxStackNumber = 16
|
||||
TopProcessCount = 15
|
||||
TopFilesPerProcess = 10
|
||||
[Tracing.MemoryReclaim]
|
||||
Deltath = 900000000 #900ms
|
||||
[Tracing.MemoryBurst]
|
||||
HistoryWindowLength = 60
|
||||
SampleInterval = 5 # seconds
|
||||
SilencePeriod = 300 # seconds
|
||||
TopNProcesses = 10
|
||||
BurstRatio = 2.0
|
||||
AnonThreshold = 70 # percent
|
||||
# the latency threshold for package receive
|
||||
[Tracing.NetRecvLat]
|
||||
ToNetIf = 5 # ms, from driver to a core recv
|
||||
ToTCPV4 = 10 # ms, from driver to TCP recv, contains ToNetIf
|
||||
ToUserCopy = 115 # ms, from driver to user recv, contains ToNetIf + ToUserCopy
|
||||
IgnoreHost = true # whether to ignore the host process
|
||||
IgnoreContainerLevel = [103, 3, 4]
|
||||
[Tracing.Dropwatch]
|
||||
IgnoreNeighInvalidate = true # ignore the error of `neigh_invalidate`
|
||||
[Tracing.Netdev]
|
||||
|
@ -114,6 +132,9 @@ Blacklist = ["softlockup", "ethtool"]
|
|||
|
||||
# Collector Configurations.
|
||||
[MetricCollector]
|
||||
# blacklist
|
||||
BlackList = ["ethtool"]
|
||||
|
||||
# Netdev Configurations.
|
||||
[MetricCollector.Netdev]
|
||||
# Use `netlink` instead of `procfs net/dev` to get netdev statistic.
|
||||
|
@ -133,15 +154,16 @@ Blacklist = ["softlockup", "ethtool"]
|
|||
# 'IgnoredDevices' has higher priority than 'AcceptDevices'.
|
||||
IgnoredDevices = "^(lo)|(docker\\w*)|(veth\\w*)$"
|
||||
#AcceptDevices = ""
|
||||
[MetricCollector.Vmstat]
|
||||
IncludedMetrics = "allocstall|nr_active_anon|nr_active_file|nr_boost_pages|nr_dirty|nr_free_pages|nr_inactive_anon|nr_inactive_file|nr_kswapd_boost|nr_mlock|nr_shmem|nr_slab_reclaimable|nr_slab_unreclaimable|nr_unevictable|nr_writeback|numa_pages_migrated|pgdeactivate|pgrefill|pgscan_direct|pgscan_kswapd|pgsteal_direct|pgsteal_kswapd"
|
||||
ExcludedMetrics = "total"
|
||||
[MetricCollector.MemoryStat]
|
||||
IncludedMetrics = "active_anon|active_file|dirty|inactive_anon|inactive_file|pgdeactivate|pgrefill|pgscan_direct|pgscan_kswapd|pgsteal_direct|pgsteal_kswapd|shmem|unevictable|writeback|pgscan_globaldirect|pgscan_globalkswapd|pgscan_cswapd|pgsteal_cswapd|pgsteal_globaldirect|pgsteal_globalkswapd"
|
||||
ExcludedMetrics = "total"
|
||||
[MetricCollector.MemoryEvents]
|
||||
IncludedMetrics = "watermark_inc|watermark_dec"
|
||||
# ExcludedMetrics = ""
|
||||
[MetricCollector.Vmstat]
|
||||
IncludedMetrics = "allocstall|nr_active_anon|nr_active_file|nr_boost_pages|nr_dirty|nr_free_pages|nr_inactive_anon|nr_inactive_file|nr_kswapd_boost|nr_mlock|nr_shmem|nr_slab_reclaimable|nr_slab_unreclaimable|nr_unevictable|nr_writeback|numa_pages_migrated|pgdeactivate|pgrefill|pgscan_direct|pgscan_kswapd|pgsteal_direct|pgsteal_kswapd"
|
||||
ExcludedMetrics = "total"
|
||||
[MetricCollector.MemoryStat]
|
||||
IncludedMetrics = "active_anon|active_file|dirty|inactive_anon|inactive_file|pgdeactivate|pgrefill|pgscan_direct|pgscan_kswapd|pgsteal_direct|pgsteal_kswapd|shmem|unevictable|writeback|pgscan_globaldirect|pgscan_globalkswapd|pgscan_cswapd|pgsteal_cswapd|pgsteal_globaldirect|pgsteal_globalkswapd"
|
||||
ExcludedMetrics = "total"
|
||||
[MetricCollector.MemoryEvents]
|
||||
IncludedMetrics = "watermark_inc|watermark_dec"
|
||||
# ExcludedMetrics = ""
|
||||
# Netstat Configurations.
|
||||
[MetricCollector.Netstat]
|
||||
# ExcludedMetrics: Ignore keys in this netstat statistic.
|
||||
# IncludedMetrics: Accept keys in this netstat statistic.
|
||||
|
@ -150,5 +172,53 @@ Blacklist = ["softlockup", "ethtool"]
|
|||
# 'ExcludedMetrics' has higher priority than 'IncludedMetrics'.
|
||||
#ExcludedMetrics = ""
|
||||
#IncludedMetrics = ""
|
||||
[MetricCollector.MountPointStat]
|
||||
IncludedMountPoints = "(^/home$)|(^/$)|(^/boot$)"
|
||||
[MetricCollector.MountPointStat]
|
||||
IncludedMountPoints = "(^/home$)|(^/$)|(^/boot$)"
|
||||
|
||||
# Known warning pattern filter
|
||||
#
|
||||
# array[0] - the name of the known issue
|
||||
# array[1] - regex of pattern which help identify the known issues
|
||||
# array[2] & array[3] - regex of known clusters or containers hit the issue
|
||||
#
|
||||
# Example:
|
||||
# ["ep_poll", "ep_scan_ready_list.constprop.21\\+0x217", "athena-predict", ""]
|
||||
# issue name: ep_poll
|
||||
# ep_scan_ready_list.constprop.21\\+0x217 is used to identify the issue base on
|
||||
# the stack backtrace
|
||||
# athena-predict is part of name of cluster which are very known hit this
|
||||
# issue frequently
|
||||
[WarningFilter]
|
||||
PatternList = [
|
||||
[
|
||||
"coredump",
|
||||
"do_exit\\+0x1c9",
|
||||
"",
|
||||
""
|
||||
],
|
||||
[
|
||||
"ep_poll",
|
||||
"ep_scan_ready_list.constprop.21\\+0x217",
|
||||
"athena-predict",
|
||||
""
|
||||
],
|
||||
[
|
||||
"php_parallel_exit",
|
||||
"unlink_anon_vmas\\+0x76"
|
||||
],
|
||||
[
|
||||
"futex",
|
||||
"futex_wait_queue_me\\+0xc1"
|
||||
],
|
||||
[
|
||||
"netrecvlat",
|
||||
"comm=gundam_client:"
|
||||
],
|
||||
[
|
||||
"", "", "", ""
|
||||
]
|
||||
]
|
||||
|
||||
[Pod]
|
||||
KubeletPodListURL = "http://127.0.0.1:10255/pods"
|
||||
DockerAPIVersion = "1.24"
|
||||
|
|
|
@ -1,106 +0,0 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
|
||||
"huatuo-bamai/internal/cgroups/paths"
|
||||
"huatuo-bamai/internal/cgroups/stats"
|
||||
v1 "huatuo-bamai/internal/cgroups/v1"
|
||||
v2 "huatuo-bamai/internal/cgroups/v2"
|
||||
|
||||
extcgroups "github.com/containerd/cgroups/v3"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
var cpuPeriod uint64 = 100000
|
||||
|
||||
// Mode is the cgroups mode of the host system
|
||||
type Mode int
|
||||
|
||||
const (
|
||||
// Unavailable cgroup mountpoint
|
||||
Unavailable Mode = iota
|
||||
// Legacy cgroups v1
|
||||
Legacy
|
||||
// Hybrid with cgroups v1 and v2 controllers mounted
|
||||
Hybrid
|
||||
// Unified with only cgroups v2 mounted
|
||||
Unified
|
||||
)
|
||||
|
||||
type Cgroup interface {
|
||||
// Name returns the cgroup name.
|
||||
Name() string
|
||||
// New a runtime config instance.
|
||||
NewRuntime(path string, spec *specs.LinuxResources) error
|
||||
// Delete a runtime config
|
||||
DeleteRuntime() error
|
||||
// Update a runtime config
|
||||
UpdateRuntime(spec *specs.LinuxResources) error
|
||||
// Add pids to cgroup.procs
|
||||
AddProc(pid uint64) error
|
||||
// Pids return pids of cgroups
|
||||
Pids(path string) ([]int32, error)
|
||||
// CpuUsage return cgroups user/system and total usage.
|
||||
CpuUsage(path string) (*stats.CpuUsage, error)
|
||||
// CpuStatRaw return cpu.stat raw data
|
||||
CpuStatRaw(path string) (map[string]uint64, error)
|
||||
// CpuQuotaAndPeriod cgroup quota and period
|
||||
CpuQuotaAndPeriod(path string) (*stats.CpuQuota, error)
|
||||
// MemoryStatRaw memory.stat
|
||||
MemoryStatRaw(path string) (map[string]uint64, error)
|
||||
// MemoryEventRaw memory.stat
|
||||
MemoryEventRaw(path string) (map[string]uint64, error)
|
||||
}
|
||||
|
||||
func NewCgroupManager() (Cgroup, error) {
|
||||
switch extcgroups.Mode() {
|
||||
case extcgroups.Legacy:
|
||||
return v1.New()
|
||||
case extcgroups.Hybrid, extcgroups.Unified:
|
||||
return v2.New()
|
||||
default:
|
||||
return nil, fmt.Errorf("not supported")
|
||||
}
|
||||
}
|
||||
|
||||
func CgroupMode() Mode {
|
||||
return Mode(extcgroups.Mode())
|
||||
}
|
||||
|
||||
func ToSpec(cpu float64, memory int64) *specs.LinuxResources {
|
||||
spec := &specs.LinuxResources{}
|
||||
|
||||
if cpu != 0 {
|
||||
quota := int64(cpu * float64(cpuPeriod))
|
||||
spec.CPU = &specs.LinuxCPU{
|
||||
Period: &cpuPeriod,
|
||||
Quota: "a,
|
||||
}
|
||||
}
|
||||
|
||||
if memory != 0 {
|
||||
spec.Memory = &specs.LinuxMemory{Limit: &memory}
|
||||
}
|
||||
|
||||
return spec
|
||||
}
|
||||
|
||||
func RootFsFilePath(subsys string) string {
|
||||
return filepath.Join(paths.RootfsDefaultPath, subsys)
|
||||
}
|
|
@ -1,159 +0,0 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package v1
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"huatuo-bamai/internal/cgroups/paths"
|
||||
"huatuo-bamai/internal/cgroups/pids"
|
||||
"huatuo-bamai/internal/cgroups/stats"
|
||||
"huatuo-bamai/internal/utils/parseutil"
|
||||
|
||||
extv1 "github.com/containerd/cgroups/v3/cgroup1"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
var clockTicks = getClockTicks()
|
||||
|
||||
const microsecondsInSecond = 1000000
|
||||
|
||||
// a typed name for a cgroup subsystem
|
||||
const (
|
||||
subsysDevices = "devices"
|
||||
subsysHugetlb = "hugetlb"
|
||||
subsysFreezer = "freezer"
|
||||
subsysPids = "pids"
|
||||
subsysNetCLS = "net_cls"
|
||||
subsysNetPrio = "net_prio"
|
||||
subsysPerfEvent = "perf_event"
|
||||
subsysCpuset = "cpuset"
|
||||
subsysCpu = "cpu"
|
||||
subsysCpuacct = "cpuacct"
|
||||
subsysMemory = "memory"
|
||||
subsysBlkio = "blkio"
|
||||
subsysRdma = "rdma"
|
||||
)
|
||||
|
||||
type CgroupV1 struct {
|
||||
name string
|
||||
cgroup extv1.Cgroup
|
||||
}
|
||||
|
||||
func New() (*CgroupV1, error) {
|
||||
return &CgroupV1{
|
||||
name: "legacy",
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *CgroupV1) Name() string {
|
||||
return c.name
|
||||
}
|
||||
|
||||
func (c *CgroupV1) NewRuntime(path string, spec *specs.LinuxResources) error {
|
||||
cg, err := extv1.New(extv1.StaticPath(path), spec)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c.cgroup = cg
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *CgroupV1) DeleteRuntime() error {
|
||||
rootfs, err := extv1.Load(extv1.RootPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := c.cgroup.MoveTo(rootfs); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return c.cgroup.Delete()
|
||||
}
|
||||
|
||||
func (c *CgroupV1) UpdateRuntime(spec *specs.LinuxResources) error {
|
||||
return c.cgroup.Update(spec)
|
||||
}
|
||||
|
||||
func (c *CgroupV1) AddProc(pid uint64) error {
|
||||
return c.cgroup.AddProc(pid)
|
||||
}
|
||||
|
||||
func (c *CgroupV1) Pids(path string) ([]int32, error) {
|
||||
return pids.Tasks(paths.Path(subsysCpu, path), "tasks")
|
||||
}
|
||||
|
||||
func (c *CgroupV1) CpuUsage(path string) (*stats.CpuUsage, error) {
|
||||
statPath := paths.Path(subsysCpu, path, "cpuacct.stat")
|
||||
raw, err := parseutil.RawKV(statPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
usagePath := paths.Path(subsysCpu, path, "cpuacct.usage")
|
||||
usage, err := parseutil.ReadUint(usagePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
user := (raw["user"] * microsecondsInSecond) / clockTicks
|
||||
system := (raw["system"] * microsecondsInSecond) / clockTicks
|
||||
|
||||
return &stats.CpuUsage{
|
||||
User: user,
|
||||
System: system,
|
||||
Usage: usage / 1000,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *CgroupV1) CpuStatRaw(path string) (map[string]uint64, error) {
|
||||
return parseutil.RawKV(paths.Path(subsysCpu, path, "cpu.stat"))
|
||||
}
|
||||
|
||||
func (c *CgroupV1) CpuQuotaAndPeriod(path string) (*stats.CpuQuota, error) {
|
||||
periodPath := paths.Path(subsysCpu, path, "cpu.cfs_period_us")
|
||||
period, err := parseutil.ReadUint(periodPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
quotaPath := paths.Path(subsysCpu, path, "cpu.cfs_quota_us")
|
||||
quota, err := parseutil.ReadInt(quotaPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if quota == -1 {
|
||||
return &stats.CpuQuota{
|
||||
Quota: math.MaxUint64,
|
||||
Period: period,
|
||||
}, nil
|
||||
}
|
||||
|
||||
return &stats.CpuQuota{
|
||||
Quota: uint64(quota),
|
||||
Period: period,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *CgroupV1) MemoryStatRaw(path string) (map[string]uint64, error) {
|
||||
return parseutil.RawKV(paths.Path(subsysMemory, path, "memory.stat"))
|
||||
}
|
||||
|
||||
func (c *CgroupV1) MemoryEventRaw(path string) (map[string]uint64, error) {
|
||||
return parseutil.RawKV(paths.Path(subsysMemory, path, "memory.events"))
|
||||
}
|
|
@ -1,134 +0,0 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package v2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"strconv"
|
||||
|
||||
"huatuo-bamai/internal/cgroups/paths"
|
||||
"huatuo-bamai/internal/cgroups/pids"
|
||||
"huatuo-bamai/internal/cgroups/stats"
|
||||
"huatuo-bamai/internal/utils/parseutil"
|
||||
|
||||
extv2 "github.com/containerd/cgroups/v3/cgroup2"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
type CgroupV2 struct {
|
||||
name string
|
||||
cgroup *extv2.Manager
|
||||
}
|
||||
|
||||
func New() (*CgroupV2, error) {
|
||||
return &CgroupV2{
|
||||
name: "unified",
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *CgroupV2) Name() string {
|
||||
return c.name
|
||||
}
|
||||
|
||||
func (c *CgroupV2) NewRuntime(path string, spec *specs.LinuxResources) error {
|
||||
m, err := extv2.NewSystemd("/", path+".slice", -1, extv2.ToResources(spec))
|
||||
if err != nil {
|
||||
return fmt.Errorf("cgroup2 new systemd: %w", err)
|
||||
}
|
||||
|
||||
// enable cpu and memory cgroup controllers
|
||||
if err := m.ToggleControllers([]string{"cpu", "memory"}, extv2.Enable); err != nil {
|
||||
_ = m.DeleteSystemd()
|
||||
return fmt.Errorf("cgroup2 enabling cpu and memory controllers: %w", err)
|
||||
}
|
||||
|
||||
c.cgroup = m
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *CgroupV2) DeleteRuntime() error {
|
||||
rootfs, err := extv2.LoadSystemd("/", "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := c.cgroup.MoveTo(rootfs); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := c.cgroup.Delete(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return c.cgroup.DeleteSystemd()
|
||||
}
|
||||
|
||||
func (c *CgroupV2) UpdateRuntime(spec *specs.LinuxResources) error {
|
||||
return c.cgroup.Update(extv2.ToResources(spec))
|
||||
}
|
||||
|
||||
func (c *CgroupV2) AddProc(pid uint64) error {
|
||||
return c.cgroup.AddProc(pid)
|
||||
}
|
||||
|
||||
func (c *CgroupV2) Pids(path string) ([]int32, error) {
|
||||
return pids.Tasks(paths.Path(path), "cgroup.threads")
|
||||
}
|
||||
|
||||
func (c *CgroupV2) CpuStatRaw(path string) (map[string]uint64, error) {
|
||||
return parseutil.RawKV(paths.Path(path, "cpu.stat"))
|
||||
}
|
||||
|
||||
func (c *CgroupV2) CpuUsage(path string) (*stats.CpuUsage, error) {
|
||||
raw, err := c.CpuStatRaw(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &stats.CpuUsage{
|
||||
Usage: raw["usage_usec"],
|
||||
User: raw["user_usec"],
|
||||
System: raw["system_usec"],
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *CgroupV2) CpuQuotaAndPeriod(path string) (*stats.CpuQuota, error) {
|
||||
maxpath := paths.Path(path, "cpu.max")
|
||||
|
||||
maxQuota, period, err := parseutil.KV(maxpath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if maxQuota == "max" {
|
||||
return &stats.CpuQuota{Quota: math.MaxUint64, Period: period}, nil
|
||||
}
|
||||
|
||||
quota, err := strconv.ParseUint(maxQuota, 10, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &stats.CpuQuota{Quota: quota, Period: period}, nil
|
||||
}
|
||||
|
||||
func (c *CgroupV2) MemoryStatRaw(path string) (map[string]uint64, error) {
|
||||
return parseutil.RawKV(paths.Path(path, "memory.stat"))
|
||||
}
|
||||
|
||||
func (c *CgroupV2) MemoryEventRaw(path string) (map[string]uint64, error) {
|
||||
return parseutil.RawKV(paths.Path(path, "memory.events"))
|
||||
}
|
|
@ -32,9 +32,6 @@ type CommonConf struct {
|
|||
LogLevel string `default:"Info"`
|
||||
LogFile string
|
||||
|
||||
// Blacklist for tracing and metrics
|
||||
Blacklist []string
|
||||
|
||||
// APIServer addr
|
||||
APIServer struct {
|
||||
TCPAddr string `default:":19704"`
|
||||
|
@ -82,6 +79,9 @@ type CommonConf struct {
|
|||
}
|
||||
|
||||
Tracing struct {
|
||||
// backlist
|
||||
BlackList []string
|
||||
|
||||
// Cpuidle for cpuidle configuration
|
||||
Cpuidle struct {
|
||||
CgUserth uint64
|
||||
|
@ -176,6 +176,9 @@ type CommonConf struct {
|
|||
}
|
||||
|
||||
MetricCollector struct {
|
||||
// backlist
|
||||
BlackList []string
|
||||
|
||||
Netdev struct {
|
||||
// Use `netlink` instead of `procfs net/dev` to get netdev statistic.
|
||||
// Only support the host environment to use `netlink` now!
|
||||
|
@ -222,11 +225,8 @@ type CommonConf struct {
|
|||
|
||||
// Pod configuration
|
||||
Pod struct {
|
||||
KubeletPodListURL string `default:"http://127.0.0.1:10255/pods"`
|
||||
KubeletPodListHTTPSURL string `default:"https://127.0.0.1:10250/pods"`
|
||||
KubeletPodCACertPath string `default:"/etc/kubernetes/ca.pem"`
|
||||
KubeletPodClientCertPath string `default:"/var/lib/kubelet/pki/kubelet-client-current.pem"`
|
||||
DockerAPIVersion string `default:"1.24"`
|
||||
KubeletPodListURL string `default:"http://127.0.0.1:10255/pods"`
|
||||
DockerAPIVersion string `default:"1.24"`
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,94 +0,0 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package pod
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path"
|
||||
"strings"
|
||||
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultSystemdSuffix = ".slice"
|
||||
defaultNodeCgroupName = "kubepods"
|
||||
)
|
||||
|
||||
// {"kubepods", "burstable", "pod1234-abcd-5678-efgh"}
|
||||
type cgroupPath []string
|
||||
|
||||
func escapeSystemd(part string) string {
|
||||
return strings.ReplaceAll(part, "-", "_")
|
||||
}
|
||||
|
||||
// systemd represents slice hierarchy using `-`, so we need to follow suit when
|
||||
// generating the path of slice.
|
||||
// Essentially, test-a-b.slice becomes /test.slice/test-a.slice/test-a-b.slice.
|
||||
func expandSytemdSlice(slice string) string {
|
||||
var path, prefix string
|
||||
|
||||
sliceName := strings.TrimSuffix(slice, defaultSystemdSuffix)
|
||||
for _, component := range strings.Split(sliceName, "-") {
|
||||
// Append the component to the path and to the prefix.
|
||||
path += "/" + prefix + component + defaultSystemdSuffix
|
||||
prefix += component + "-"
|
||||
}
|
||||
|
||||
return path
|
||||
}
|
||||
|
||||
// {"kubepods", "burstable", "pod1234-abcd-5678-efgh"} becomes
|
||||
// "/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod1234_abcd_5678_efgh.slice"
|
||||
func (paths cgroupPath) ToSystemd() string {
|
||||
newparts := []string{}
|
||||
for _, part := range paths {
|
||||
part = escapeSystemd(part)
|
||||
newparts = append(newparts, part)
|
||||
}
|
||||
|
||||
return expandSytemdSlice(strings.Join(newparts, "-") + defaultSystemdSuffix)
|
||||
}
|
||||
|
||||
func (paths cgroupPath) ToCgroupfs() string {
|
||||
return "/" + path.Join(paths...)
|
||||
}
|
||||
|
||||
func containerCgroupPath(containerID string, pod *corev1.Pod) cgroupPath {
|
||||
paths := []string{defaultNodeCgroupName}
|
||||
|
||||
if pod.Status.QOSClass != corev1.PodQOSGuaranteed {
|
||||
paths = append(paths, strings.ToLower(string(pod.Status.QOSClass)))
|
||||
}
|
||||
|
||||
paths = append(paths, fmt.Sprintf("pod%s", pod.UID))
|
||||
|
||||
if kubeletPodCgroupDriver != "systemd" {
|
||||
paths = append(paths, containerID)
|
||||
}
|
||||
|
||||
return paths
|
||||
}
|
||||
|
||||
// https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/cm/cgroup_manager_linux.go#L81
|
||||
func containerCgroupSuffix(containerID string, pod *corev1.Pod) string {
|
||||
name := containerCgroupPath(containerID, pod)
|
||||
|
||||
if kubeletPodCgroupDriver == "systemd" {
|
||||
return name.ToSystemd()
|
||||
}
|
||||
|
||||
return name.ToCgroupfs()
|
||||
}
|
|
@ -31,8 +31,8 @@ import (
|
|||
"time"
|
||||
|
||||
"huatuo-bamai/internal/bpf"
|
||||
"huatuo-bamai/internal/cgroups"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/utils/cgrouputil"
|
||||
"huatuo-bamai/pkg/types"
|
||||
|
||||
mapset "github.com/deckarep/golang-set"
|
||||
|
@ -192,7 +192,7 @@ func cgroupCssNotify() {
|
|||
rootSet := mapset.NewSet()
|
||||
|
||||
for _, subsys := range cgroupv1SubSysName {
|
||||
root := cgroups.RootFsFilePath(subsys)
|
||||
root := cgrouputil.CgroupRootFsFilePath(subsys)
|
||||
realRoot, err := filepath.EvalSymlinks(root)
|
||||
if err != nil {
|
||||
continue
|
||||
|
|
|
@ -21,6 +21,7 @@ import (
|
|||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
|
@ -29,6 +30,7 @@ import (
|
|||
|
||||
dockertypes "github.com/docker/docker/api/types"
|
||||
dockerclient "github.com/docker/docker/client"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
k8sremote "k8s.io/cri-client/pkg"
|
||||
)
|
||||
|
||||
|
@ -153,3 +155,11 @@ func containerInitPidInContainerd(containerID string) (int, error) {
|
|||
|
||||
return pidutil.Read(filePath)
|
||||
}
|
||||
|
||||
func containerCgroupSuffix(containerID string, pod *corev1.Pod) string {
|
||||
if pod.Status.QOSClass == corev1.PodQOSGuaranteed {
|
||||
return fmt.Sprintf("/kubepods/pod%s/%s", pod.UID, containerID)
|
||||
}
|
||||
|
||||
return fmt.Sprintf("/kubepods/%s/pod%s/%s", strings.ToLower(string(pod.Status.QOSClass)), pod.UID, containerID)
|
||||
}
|
||||
|
|
|
@ -32,6 +32,18 @@ func parseContainerHostname(typ ContainerType, pod *corev1.Pod) (string, error)
|
|||
return hostname, nil
|
||||
}
|
||||
|
||||
// container's hostname example:
|
||||
//
|
||||
// "metadata": {
|
||||
// "name": "level4-sf-4d59e-1",
|
||||
// ...
|
||||
// },
|
||||
// ...
|
||||
// "spec": {
|
||||
// ...
|
||||
// "hostname": "level4-sf-4d59e-1",
|
||||
// ...
|
||||
// },
|
||||
hostname := pod.Spec.Hostname
|
||||
if hostname == "" {
|
||||
hostname = pod.Name
|
||||
|
|
|
@ -15,159 +15,29 @@
|
|||
package pod
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"crypto/x509"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"huatuo-bamai/internal/conf"
|
||||
"huatuo-bamai/internal/log"
|
||||
"huatuo-bamai/internal/utils/procfsutil"
|
||||
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
kubeletconfig "k8s.io/kubelet/config/v1beta1"
|
||||
"sigs.k8s.io/yaml"
|
||||
)
|
||||
|
||||
const (
|
||||
kubeletReqTimeout = 5 * time.Second
|
||||
kubeletDefaultConfPath = "/var/lib/kubelet/config.yaml"
|
||||
kubeletReqTimeout = 5 * time.Second
|
||||
)
|
||||
|
||||
var (
|
||||
kubeletPodListAuthorizationEnabled = false
|
||||
kubeletRunningEnabled = false
|
||||
kubeletPodListURL string
|
||||
kubeletHttpsClient *http.Client
|
||||
kubeletTimeTicker *time.Ticker
|
||||
kubeletDoneCancel context.CancelFunc
|
||||
kubeletPodCgroupDriver = "cgroupfs"
|
||||
)
|
||||
|
||||
type PodContainerInitCtx struct {
|
||||
PodListReadOnlyPort string
|
||||
PodListAuthorizedPort string
|
||||
PodClientCertPath string
|
||||
PodCACertPath string
|
||||
podClientCertPath string
|
||||
podClientCertKey string
|
||||
}
|
||||
|
||||
func kubeletPodListPortUpdate(ctx *PodContainerInitCtx) error {
|
||||
client := &http.Client{
|
||||
Timeout: kubeletReqTimeout,
|
||||
}
|
||||
if _, err := kubeletDoRequest(client, ctx.PodListReadOnlyPort); err == nil {
|
||||
kubeletPodListAuthorizationEnabled = false
|
||||
kubeletPodListURL = ctx.PodListReadOnlyPort
|
||||
kubeletRunningEnabled = true
|
||||
return nil
|
||||
}
|
||||
|
||||
cert, err := tls.LoadX509KeyPair(ctx.podClientCertPath, ctx.podClientCertKey)
|
||||
if err != nil {
|
||||
return fmt.Errorf("loading client key pair [%s,%s]: %w",
|
||||
ctx.podClientCertPath, ctx.podClientCertKey, err)
|
||||
}
|
||||
|
||||
caCert, err := os.ReadFile(ctx.PodCACertPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("reading CA certificate: %w", err)
|
||||
}
|
||||
|
||||
caCertPool := x509.NewCertPool()
|
||||
if ok := caCertPool.AppendCertsFromPEM(caCert); !ok {
|
||||
return fmt.Errorf("parse/append a series of pem")
|
||||
}
|
||||
|
||||
client.Transport = &http.Transport{
|
||||
TLSClientConfig: &tls.Config{
|
||||
Certificates: []tls.Certificate{cert},
|
||||
RootCAs: caCertPool,
|
||||
InsecureSkipVerify: true, // #nosec G402
|
||||
},
|
||||
}
|
||||
|
||||
if _, err := kubeletDoRequest(client, ctx.PodListAuthorizedPort); err != nil {
|
||||
kubeletPodListAuthorizationEnabled = false
|
||||
kubeletRunningEnabled = false
|
||||
return fmt.Errorf("podlist https: %w", err)
|
||||
}
|
||||
|
||||
// update https instance cache
|
||||
kubeletHttpsClient = client
|
||||
kubeletPodListURL = ctx.PodListAuthorizedPort
|
||||
kubeletPodListAuthorizationEnabled = true
|
||||
kubeletRunningEnabled = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func ContainerPodMgrInit(ctx *PodContainerInitCtx) error {
|
||||
if ctx.PodListReadOnlyPort == "" && ctx.PodListAuthorizedPort == "" {
|
||||
log.Warnf("pod sync is not working, we manually turned off this.")
|
||||
return nil
|
||||
}
|
||||
|
||||
s := strings.Split(ctx.PodClientCertPath, ",")
|
||||
if len(s) == 1 {
|
||||
ctx.podClientCertPath, ctx.podClientCertKey = s[0], s[0]
|
||||
} else if len(s) >= 2 {
|
||||
ctx.podClientCertPath, ctx.podClientCertKey = s[0], s[1]
|
||||
}
|
||||
|
||||
_ = kubeletCgroupDriver()
|
||||
|
||||
err := kubeletPodListPortUpdate(ctx)
|
||||
if !errors.Is(err, syscall.ECONNREFUSED) {
|
||||
return err
|
||||
}
|
||||
|
||||
doneCtx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
kubeletDoneCancel = cancel
|
||||
kubeletTimeTicker = time.NewTicker(30 * time.Minute)
|
||||
go func(doneCtx context.Context, t *time.Ticker) {
|
||||
for {
|
||||
select {
|
||||
case <-t.C:
|
||||
if err := kubeletPodListPortUpdate(ctx); err == nil {
|
||||
log.Infof("kubelet is running now")
|
||||
_ = kubeletCgroupDriver()
|
||||
ContainerPodMgrClose()
|
||||
break
|
||||
}
|
||||
case <-doneCtx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}(doneCtx, kubeletTimeTicker)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func ContainerPodMgrClose() {
|
||||
if kubeletTimeTicker != nil {
|
||||
kubeletTimeTicker.Stop()
|
||||
kubeletTimeTicker = nil
|
||||
}
|
||||
|
||||
if kubeletDoneCancel != nil {
|
||||
kubeletDoneCancel()
|
||||
kubeletDoneCancel = nil
|
||||
}
|
||||
}
|
||||
|
||||
func kubeletSyncContainers() error {
|
||||
podList, err := kubeletGetPodList()
|
||||
if err != nil {
|
||||
// ignore all errors and remain old containers.
|
||||
log.Infof("failed to get pod list, err: %v", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -241,28 +111,18 @@ func kubeletSyncContainers() error {
|
|||
}
|
||||
|
||||
func kubeletGetPodList() (corev1.PodList, error) {
|
||||
if !kubeletRunningEnabled {
|
||||
return corev1.PodList{}, fmt.Errorf("kubelet not running")
|
||||
}
|
||||
kubeletPodListURL := conf.Get().Pod.KubeletPodListURL
|
||||
|
||||
if !kubeletPodListAuthorizationEnabled {
|
||||
client := &http.Client{
|
||||
Timeout: kubeletReqTimeout,
|
||||
}
|
||||
|
||||
return kubeletDoRequest(client, kubeletPodListURL)
|
||||
}
|
||||
|
||||
return kubeletDoRequest(kubeletHttpsClient, kubeletPodListURL)
|
||||
}
|
||||
|
||||
func kubeletDoRequest(client *http.Client, kubeletPodListURL string) (corev1.PodList, error) {
|
||||
podList := corev1.PodList{}
|
||||
req, err := http.NewRequest(http.MethodGet, kubeletPodListURL, http.NoBody)
|
||||
if err != nil {
|
||||
return podList, err
|
||||
}
|
||||
|
||||
client := &http.Client{
|
||||
Timeout: kubeletReqTimeout,
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return podList, err
|
||||
|
@ -368,10 +228,52 @@ func parseContainerIDInPodStatus(data string) (string, error) {
|
|||
}
|
||||
|
||||
func parseContainerIPAddress(pod *corev1.Pod) string {
|
||||
// podIP example:
|
||||
//
|
||||
// "status": {
|
||||
// ...
|
||||
// "hostIP": "10.74.164.13",
|
||||
// "podIP": "10.74.164.13",
|
||||
// "podIPs": [
|
||||
// {
|
||||
// "ip": "10.74.164.13"
|
||||
// }
|
||||
// ],
|
||||
// ...
|
||||
// },
|
||||
return pod.Status.PodIP
|
||||
}
|
||||
|
||||
func isRuningPod(pod *corev1.Pod) bool {
|
||||
// running pod example:
|
||||
//
|
||||
// "status": {
|
||||
// ...
|
||||
// "phase": "Running",
|
||||
// ...
|
||||
// "containerStatuses": [
|
||||
// {
|
||||
// "name": "taxi-invoice-center-zjy",
|
||||
// "state": {
|
||||
// "running": {
|
||||
// "startedAt": "2024-05-28T03:10:30Z"
|
||||
// },
|
||||
// ...
|
||||
// },
|
||||
// ...
|
||||
// },
|
||||
// {
|
||||
// "name": "agent-taxi-invoice-center-zjy",
|
||||
// "state": {
|
||||
// "running": {
|
||||
// "startedAt": "2024-05-28T03:10:30Z"
|
||||
// },
|
||||
// ...
|
||||
// },
|
||||
// ...
|
||||
// },
|
||||
//}
|
||||
|
||||
// The Pod has been bound to a node, and all of the containers have been created.
|
||||
// At least one container is still running, or is in the process of starting or
|
||||
// restarting.
|
||||
|
@ -389,25 +291,3 @@ func isRuningPod(pod *corev1.Pod) bool {
|
|||
|
||||
return true
|
||||
}
|
||||
|
||||
func kubeletCgroupDriver() error {
|
||||
data, err := os.ReadFile(kubeletDefaultConfPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("read %s: %w", kubeletDefaultConfPath, err)
|
||||
}
|
||||
|
||||
var config kubeletconfig.KubeletConfiguration
|
||||
|
||||
if err := yaml.Unmarshal(data, &config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// cgroupfs as default of kubelet
|
||||
// config.CgroupDriver is read from config file, which may be any
|
||||
// string, such as systemdxxx (in this case, kubelet use cgroupfs)
|
||||
if config.CgroupDriver == "systemd" {
|
||||
kubeletPodCgroupDriver = config.CgroupDriver
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -23,8 +23,6 @@ type ContainerType uint32
|
|||
const (
|
||||
ContainerTypeSidecar ContainerType = 1 << iota
|
||||
ContainerTypeDaemonSet
|
||||
ContainerTypeNode
|
||||
ContainerTypeStatic
|
||||
ContainerTypeNormal
|
||||
ContainerTypeUnknown
|
||||
_containerTypeAll
|
||||
|
@ -37,8 +35,6 @@ var containerType2String = map[ContainerType]string{
|
|||
ContainerTypeSidecar: "Sidecar",
|
||||
ContainerTypeDaemonSet: "DaemonSet",
|
||||
ContainerTypeNormal: "Normal",
|
||||
ContainerTypeNode: "Node",
|
||||
ContainerTypeStatic: "Static",
|
||||
ContainerTypeUnknown: "Unknown",
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,79 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cgrouputil
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
|
||||
"huatuo-bamai/internal/utils/parseutil"
|
||||
)
|
||||
|
||||
// NewCPU new cpu obj with defalut rootfs
|
||||
func NewCPU() *CPU {
|
||||
return &CPU{
|
||||
root: V1CpuPath(),
|
||||
}
|
||||
}
|
||||
|
||||
// CPU cgroup obj
|
||||
type CPU struct {
|
||||
root string
|
||||
}
|
||||
|
||||
// Path join path with cgroup v1 rootfs
|
||||
func (c *CPU) Path(path string) string {
|
||||
return filepath.Join(c.root, path)
|
||||
}
|
||||
|
||||
// StatRaw return kv slice in cpu.stat
|
||||
func (c *CPU) StatRaw(path string) (map[string]uint64, error) {
|
||||
return parseutil.ParseRawKV(filepath.Join(c.Path(path), "cpu.stat"))
|
||||
}
|
||||
|
||||
// CPUCount return cgroup v1 cpu num
|
||||
func (c *CPU) CPUNum(path string) (int, error) {
|
||||
period, err := parseutil.ReadInt(filepath.Join(c.Path(path), "cpu.cfs_period_us"))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if period == -1 {
|
||||
return 0, fmt.Errorf("no limited")
|
||||
}
|
||||
|
||||
quota, err := parseutil.ReadUint(filepath.Join(c.Path(path), "cpu.cfs_quota_us"))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return int(quota / uint64(period)), nil
|
||||
}
|
|
@ -0,0 +1,137 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cgrouputil
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"huatuo-bamai/internal/utils/parseutil"
|
||||
)
|
||||
|
||||
const nanosecondsInSecond = 1000000000
|
||||
|
||||
var clockTicks = getClockTicks()
|
||||
|
||||
// NewCPUAcct new obj with rootfs
|
||||
func NewCPUAcct(root string) *CPUAcct {
|
||||
return &CPUAcct{
|
||||
root: root,
|
||||
}
|
||||
}
|
||||
|
||||
// NewCPUAcctDefault new obj with default rootfs
|
||||
func NewCPUAcctDefault() *CPUAcct {
|
||||
return &CPUAcct{
|
||||
root: V1CpuPath(),
|
||||
}
|
||||
}
|
||||
|
||||
// CPUAcct cgroup obj
|
||||
type CPUAcct struct {
|
||||
root string
|
||||
}
|
||||
|
||||
// Path join file path
|
||||
func (c *CPUAcct) Path(path string) string {
|
||||
return filepath.Join(c.root, path)
|
||||
}
|
||||
|
||||
// PercpuUsage return values in cpuacct.usage_percpu
|
||||
func (c *CPUAcct) PercpuUsage(path string) ([]uint64, error) {
|
||||
var usage []uint64
|
||||
data, err := os.ReadFile(filepath.Join(c.Path(path), "cpuacct.usage_percpu"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, v := range strings.Fields(string(data)) {
|
||||
u, err := strconv.ParseUint(v, 10, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
usage = append(usage, u)
|
||||
}
|
||||
return usage, nil
|
||||
}
|
||||
|
||||
// Usage return value in cpuacct.usage
|
||||
func (c *CPUAcct) Usage(path string) (uint64, error) {
|
||||
return parseutil.ReadUint(filepath.Join(c.Path(path), "cpuacct.usage"))
|
||||
}
|
||||
|
||||
// Stat return user/kernel values in cpuacct.stat
|
||||
func (c *CPUAcct) Stat(path string) (user, kernel uint64, err error) {
|
||||
statPath := filepath.Join(c.Path(path), "cpuacct.stat")
|
||||
f, err := os.Open(statPath)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var (
|
||||
raw = make(map[string]uint64)
|
||||
sc = bufio.NewScanner(f)
|
||||
)
|
||||
for sc.Scan() {
|
||||
key, v, err := parseutil.ParseKV(sc.Text())
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
raw[key] = v
|
||||
}
|
||||
if err := sc.Err(); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
for _, t := range []struct {
|
||||
name string
|
||||
value *uint64
|
||||
}{
|
||||
{
|
||||
name: "user",
|
||||
value: &user,
|
||||
},
|
||||
{
|
||||
name: "system",
|
||||
value: &kernel,
|
||||
},
|
||||
} {
|
||||
v, ok := raw[t.name]
|
||||
if !ok {
|
||||
return 0, 0, fmt.Errorf("expected field %q but not found in %q", t.name, statPath)
|
||||
}
|
||||
*t.value = v
|
||||
}
|
||||
return (user * nanosecondsInSecond) / clockTicks, (kernel * nanosecondsInSecond) / clockTicks, nil
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cgrouputil
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
|
||||
"huatuo-bamai/internal/utils/parseutil"
|
||||
)
|
||||
|
||||
// NewMemory new cpu obj with default rootfs
|
||||
func NewMemory() *Memory {
|
||||
return &Memory{
|
||||
root: V1MemoryPath(),
|
||||
}
|
||||
}
|
||||
|
||||
// Memory cgroup obj
|
||||
type Memory struct {
|
||||
root string
|
||||
}
|
||||
|
||||
// Path join path with cgroup v1 rootfs
|
||||
func (c *Memory) Path(path string) string {
|
||||
return filepath.Join(c.root, path)
|
||||
}
|
||||
|
||||
// EventsRaw return kv slice in memory.events
|
||||
func (c *Memory) EventsRaw(path string) (map[string]uint64, error) {
|
||||
return parseutil.ParseRawKV(filepath.Join(c.Path(path), "memory.events"))
|
||||
}
|
|
@ -12,41 +12,25 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package pids
|
||||
package cgrouputil
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// 1. /sys/fs/cgroup/$GROUPPATH/cgroup.procs
|
||||
// 2. /sys/fs/cgroup/$GROUPPATH/cgroup.threads
|
||||
func Tasks(path, file string) ([]int32, error) {
|
||||
f, err := os.Open(filepath.Join(path, file))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
var v1RootfsDefaultPath = "/sys/fs/cgroup"
|
||||
|
||||
var (
|
||||
out []int32
|
||||
s = bufio.NewScanner(f)
|
||||
)
|
||||
|
||||
for s.Scan() {
|
||||
if t := s.Text(); t != "" {
|
||||
pid, err := strconv.ParseInt(t, 10, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, int32(pid))
|
||||
}
|
||||
}
|
||||
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
// CgroupRootFsFilePath join dir with cgroup rootfs
|
||||
func CgroupRootFsFilePath(name string) string {
|
||||
return filepath.Join(v1RootfsDefaultPath, name)
|
||||
}
|
||||
|
||||
// V1CpuPath return the cpu dir in cgroup v1
|
||||
func V1CpuPath() string {
|
||||
return v1RootfsDefaultPath + "/cpu"
|
||||
}
|
||||
|
||||
// V1MemoryPath return the memory dir in cgroup v1
|
||||
func V1MemoryPath() string {
|
||||
return v1RootfsDefaultPath + "/memory"
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
// Copyright 2025 The HuaTuo Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package cgrouputil
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
cgroups "github.com/containerd/cgroups/v3/cgroup1"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
// RuntimeCgroup instance
|
||||
type RuntimeCgroup struct {
|
||||
cg cgroups.Cgroup
|
||||
}
|
||||
|
||||
var runtimeCgroupPeriod uint64 = 100000
|
||||
|
||||
// NewRuntimeCgroup new instance
|
||||
func NewRuntimeCgroup(cgPath string, cpu float64, mem int64) (*RuntimeCgroup, error) {
|
||||
quota := int64(cpu * float64(runtimeCgroupPeriod))
|
||||
|
||||
cg, err := cgroups.New(cgroups.StaticPath(cgPath), &specs.LinuxResources{
|
||||
CPU: &specs.LinuxCPU{
|
||||
Period: &runtimeCgroupPeriod,
|
||||
Quota: "a,
|
||||
},
|
||||
Memory: &specs.LinuxMemory{
|
||||
Limit: &mem,
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := cg.Add(cgroups.Process{Pid: os.Getpid()}); err != nil {
|
||||
_ = cg.Delete()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &RuntimeCgroup{cg: cg}, nil
|
||||
}
|
||||
|
||||
// Delete HostCgroup
|
||||
func (host *RuntimeCgroup) Delete() {
|
||||
// move pids to cgroup rootfs temporarily, make sure we can remove cgroup dir
|
||||
rootfs, _ := cgroups.Load(cgroups.RootPath)
|
||||
_ = host.cg.MoveTo(rootfs)
|
||||
_ = host.cg.Delete()
|
||||
}
|
||||
|
||||
// UpdateCPU update resource
|
||||
func (host *RuntimeCgroup) UpdateCPU(cpu float64) error {
|
||||
quota := int64(cpu * float64(runtimeCgroupPeriod))
|
||||
return host.cg.Update(&specs.LinuxResources{
|
||||
CPU: &specs.LinuxCPU{
|
||||
Period: &runtimeCgroupPeriod,
|
||||
Quota: "a,
|
||||
},
|
||||
})
|
||||
}
|
|
@ -28,7 +28,7 @@
|
|||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v1
|
||||
package cgrouputil
|
||||
|
||||
func getClockTicks() uint64 {
|
||||
// The value comes from `C.sysconf(C._SC_CLK_TCK)`, and
|
|
@ -42,7 +42,7 @@ func ReadInt(path string) (int64, error) {
|
|||
return strconv.ParseInt(strings.TrimSpace(string(v)), 10, 64)
|
||||
}
|
||||
|
||||
func parseKV(raw string) (string, uint64, error) {
|
||||
func ParseKV(raw string) (string, uint64, error) {
|
||||
parts := strings.Fields(raw)
|
||||
switch len(parts) {
|
||||
case 2:
|
||||
|
@ -56,8 +56,8 @@ func parseKV(raw string) (string, uint64, error) {
|
|||
}
|
||||
}
|
||||
|
||||
// RawKV parse the kv cgroup file
|
||||
func RawKV(path string) (map[string]uint64, error) {
|
||||
// ParseRawKV parse the kv cgroup file
|
||||
func ParseRawKV(path string) (map[string]uint64, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -70,7 +70,7 @@ func RawKV(path string) (map[string]uint64, error) {
|
|||
)
|
||||
|
||||
for sc.Scan() {
|
||||
key, v, err := parseKV(sc.Text())
|
||||
key, v, err := ParseKV(sc.Text())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -83,19 +83,3 @@ func RawKV(path string) (map[string]uint64, error) {
|
|||
|
||||
return raw, nil
|
||||
}
|
||||
|
||||
func KV(path string) (string, uint64, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return "", 0, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
scanner := bufio.NewScanner(f)
|
||||
scanner.Scan()
|
||||
if err := scanner.Err(); err != nil {
|
||||
return "", 0, err
|
||||
}
|
||||
|
||||
return parseKV(scanner.Text())
|
||||
}
|
||||
|
|