forked from JointCloud/pcm-coordinator
220 lines
11 KiB
Go
220 lines
11 KiB
Go
/*
|
|
|
|
Copyright (c) [2023] [pcm]
|
|
[pcm-coordinator] is licensed under Mulan PSL v2.
|
|
You can use this software according to the terms and conditions of the Mulan PSL v2.
|
|
You may obtain a copy of Mulan PSL v2 at:
|
|
http://license.coscl.org.cn/MulanPSL2
|
|
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
|
EITHER EXPaRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
|
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
|
See the Mulan PSL v2 for more details.
|
|
|
|
*/
|
|
|
|
package tracker
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
)
|
|
|
|
var promQLTemplates = map[string]string{
|
|
|
|
"cluster_cpu_utilisation": "cluster_cpu_utilisation{$1}",
|
|
"cluster_memory_utilisation": "cluster_memory_utilisation{$1}",
|
|
"cluster_disk_utilisation": "cluster_disk_utilisation{$1}",
|
|
"cluster_cpu_total": "cluster_cpu_total{$1}",
|
|
"cluster_memory_total": "cluster_memory_total{$1}",
|
|
"cluster_disk_total": "cluster_disk_total{$1}",
|
|
"cluster_cpu_avail": "cluster_cpu_avail{$1}",
|
|
"cluster_memory_avail": "cluster_memory_avail{$1}",
|
|
"cluster_disk_avail": "cluster_disk_avail{$1}",
|
|
"cluster_pod_utilisation": "cluster_pod_utilisation{$1}",
|
|
|
|
// center
|
|
"center_cpu_utilisation": "(sum by (adapter_id)(cluster_cpu_total{$1})-sum by (adapter_id)(cluster_cpu_avail{$1}))/sum by (adapter_id)(cluster_cpu_total{$1})",
|
|
"center_memory_utilisation": "(sum by (adapter_id)(cluster_memory_total{$1})-sum by (adapter_id)(cluster_memory_avail{$1}))/sum by (adapter_id)(cluster_memory_total{$1})",
|
|
"center_disk_utilisation": "(sum by (adapter_id)(cluster_disk_total{$1})-sum by (adapter_id)(cluster_disk_avail{$1}))/sum by (adapter_id)(cluster_disk_total{$1})",
|
|
"center_top3": "topk(3,((sum by (adapter_id)(cluster_cpu_total)-sum by (adapter_id)(cluster_cpu_avail))/sum by (adapter_id)(cluster_cpu_total) + (sum by (adapter_id)(cluster_memory_total) - sum by (adapter_id)(cluster_memory_avail))/sum by (adapter_id)(cluster_memory_total) + (sum by (adapter_id)(cluster_disk_total)-sum by (adapter_id)(cluster_disk_avail))/sum by (adapter_id)(cluster_disk_total))/3)",
|
|
|
|
// namespace
|
|
"namespace_cpu_usage": `round(namespace:container_cpu_usage_seconds_total:sum_rate{namespace!="", $1}, 0.001)`,
|
|
"namespace_memory_usage": `namespace:container_memory_usage_bytes:sum{namespace!="", $1}`,
|
|
"namespace_memory_usage_wo_cache": `namespace:container_memory_usage_bytes_wo_cache:sum{namespace!="", $1}`,
|
|
// controller
|
|
"controller_cpu_usage_rate": `sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{}* on(namespace,pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel{$1}) by (workload)/sum( kube_pod_container_resource_limits{job="kube-state-metrics", resource="cpu"}* on(namespace,pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel{ }) by (workload)`,
|
|
"controller_memory_usage_rate": `sum( container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", container!="", image!=""} * on(namespace,pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel{$1}) by (workload)/sum( kube_pod_container_resource_limits{job="kube-state-metrics", resource="memory"}* on(namespace,pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel{ }) by (workload)`,
|
|
// pod
|
|
"pod_cpu_usage": `round(sum by (namespace, pod) (irate(container_cpu_usage_seconds_total{job="kubelet", pod!="", image!=""}[5m])) * on (namespace, pod) group_left(owner_kind,owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}, 0.001)`,
|
|
"pod_cpu_usage_rate": `sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{ $1}) by (pod) / sum(kube_pod_container_resource_limits{ $1,unit="core"}) by (pod)`,
|
|
"pod_memory_usage_rate": `sum(container_memory_working_set_bytes{job="kubelet", $1, container!="", image!=""}) by (pod) / sum(kube_pod_container_resource_limits{ $1,unit="byte"}) by (pod)`,
|
|
"pod_memory_usage": `sum by (namespace, pod) (container_memory_usage_bytes{job="kubelet", pod!="", image!=""}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
|
|
"pod_memory_usage_wo_cache": `sum by (namespace, pod) (container_memory_working_set_bytes{job="kubelet", pod!="", image!=""}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
|
|
"pod_net_bytes_transmitted": `sum by (namespace, pod) (irate(container_network_transmit_bytes_total{pod!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m])) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
|
|
"pod_net_bytes_received": `sum by (namespace, pod) (irate(container_network_receive_bytes_total{pod!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m])) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
|
|
"pod_cpu_resource_limits": `sum by (namespace, pod) (kube_pod_container_resource_limits{origin_prometheus=~"",resource="cpu",unit="core"}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
|
|
"pod_memory_resource_limits": `sum by (namespace, pod) (kube_pod_container_resource_limits{origin_prometheus=~"",resource="memory",unit="byte"}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
|
|
|
|
// container
|
|
"container_cpu_usage": `round(sum by (namespace, pod, container) (irate(container_cpu_usage_seconds_total{job="kubelet", container!="POD", container!="", image!="", $1}[5m])), 0.001)`,
|
|
"container_memory_usage": `sum by (namespace, pod, container) (container_memory_usage_bytes{job="kubelet", container!="POD", container!="", image!="", $1})`,
|
|
"container_memory_usage_wo_cache": `sum by (namespace, pod, container) (container_memory_working_set_bytes{job="kubelet", container!="POD", container!="", image!="", $1})`,
|
|
"container_processes_usage": `sum by (namespace, pod, container) (container_processes{job="kubelet", container!="POD", container!="", image!="", $1})`,
|
|
"container_threads_usage": `sum by (namespace, pod, container) (container_threads {job="kubelet", container!="POD", container!="", image!="", $1})`,
|
|
}
|
|
|
|
func makeExpr(metric string, opts QueryOptions) string {
|
|
tmpl := promQLTemplates[metric]
|
|
switch opts.Level {
|
|
case LevelAdapter:
|
|
return makeAdapterMetricExpr(tmpl, opts)
|
|
case LevelCluster:
|
|
return makeClusterMetricExpr(tmpl, opts)
|
|
case LevelNode:
|
|
return makeNodeMetricExpr(tmpl, opts)
|
|
case LevelWorkspace:
|
|
return makeWorkspaceMetricExpr(tmpl, opts)
|
|
case LevelNamespace:
|
|
return makeNamespaceMetricExpr(tmpl, opts)
|
|
case LevelController:
|
|
return makeControllerMetricExpr(tmpl, opts)
|
|
case LevelPod:
|
|
return makePodMetricExpr(tmpl, opts)
|
|
case LevelContainer:
|
|
return makeContainerMetricExpr(tmpl, opts)
|
|
case LevelPVC:
|
|
return makePVCMetricExpr(tmpl, opts)
|
|
case LevelComponent:
|
|
return tmpl
|
|
default:
|
|
return tmpl
|
|
}
|
|
}
|
|
|
|
func makeClusterMetricExpr(tmpl string, o QueryOptions) string {
|
|
var clusterSelector string
|
|
if o.ClusterName != "" {
|
|
clusterSelector = fmt.Sprintf(`cluster_name="%s"`, o.ClusterName)
|
|
}
|
|
return strings.Replace(tmpl, "$1", clusterSelector, -1)
|
|
|
|
}
|
|
|
|
func makeAdapterMetricExpr(tmpl string, o QueryOptions) string {
|
|
var adapterSelector string
|
|
if o.AdapterId != 0 {
|
|
adapterSelector = fmt.Sprintf(`adapter_id="%d"`, o.AdapterId)
|
|
}
|
|
if len(o.ClustersName) != 0 {
|
|
adapterSelector = fmt.Sprintf(`adapter_id="%d"`, o.AdapterId)
|
|
}
|
|
return strings.Replace(tmpl, "$1", adapterSelector, -1)
|
|
|
|
}
|
|
|
|
func makeNodeMetricExpr(tmpl string, o QueryOptions) string {
|
|
var nodeSelector string
|
|
if o.NodeName != "" {
|
|
nodeSelector = fmt.Sprintf(`node="%s"`, o.NodeName)
|
|
} else {
|
|
nodeSelector = fmt.Sprintf(`node=~"%s"`, o.ResourceFilter)
|
|
}
|
|
return strings.Replace(tmpl, "$1", nodeSelector, -1)
|
|
}
|
|
|
|
func makeWorkspaceMetricExpr(tmpl string, o QueryOptions) string {
|
|
var workspaceSelector string
|
|
if o.WorkspaceName != "" {
|
|
workspaceSelector = fmt.Sprintf(`workspace="%s"`, o.WorkspaceName)
|
|
} else {
|
|
workspaceSelector = fmt.Sprintf(`workspace=~"%s", workspace!=""`, o.ResourceFilter)
|
|
}
|
|
return strings.Replace(tmpl, "$1", workspaceSelector, -1)
|
|
}
|
|
|
|
func makeNamespaceMetricExpr(tmpl string, o QueryOptions) string {
|
|
var namespaceSelector string
|
|
|
|
// For monitoring namespaces in the specific workspace
|
|
// GET /workspaces/{workspace}/namespaces
|
|
if o.WorkspaceName != "" {
|
|
namespaceSelector = fmt.Sprintf(`workspace="%s", namespace=~"%s"`, o.WorkspaceName, o.ResourceFilter)
|
|
return strings.Replace(tmpl, "$1", namespaceSelector, -1)
|
|
}
|
|
|
|
// For monitoring the specific namespaces
|
|
// GET /namespaces/{namespace} or
|
|
// GET /namespaces
|
|
if o.Namespace != "" {
|
|
namespaceSelector = fmt.Sprintf(`namespace="%s"`, o.Namespace)
|
|
} else {
|
|
namespaceSelector = fmt.Sprintf(`namespace=~"%s"`, o.ResourceFilter)
|
|
}
|
|
return strings.Replace(tmpl, "$1", namespaceSelector, -1)
|
|
}
|
|
|
|
func makeControllerMetricExpr(tmpl string, o QueryOptions) string {
|
|
var workload string
|
|
|
|
workload = fmt.Sprintf(`workload="%s"`, o.WorkloadName)
|
|
return strings.NewReplacer("$1", workload).Replace(tmpl)
|
|
}
|
|
|
|
func makePodMetricExpr(tmpl string, o QueryOptions) string {
|
|
var podName string
|
|
|
|
podName = fmt.Sprintf(`pod="%s"`, o.PodName)
|
|
return strings.NewReplacer("$1", podName).Replace(tmpl)
|
|
}
|
|
|
|
func makeContainerMetricExpr(tmpl string, o QueryOptions) string {
|
|
var containerSelector string
|
|
if o.ContainerName != "" {
|
|
containerSelector = fmt.Sprintf(`pod="%s", namespace="%s", container="%s"`, o.PodName, o.Namespace, o.ContainerName)
|
|
} else {
|
|
containerSelector = fmt.Sprintf(`pod="%s", namespace="%s", container=~"%s"`, o.PodName, o.Namespace, o.ResourceFilter)
|
|
}
|
|
return strings.Replace(tmpl, "$1", containerSelector, -1)
|
|
}
|
|
|
|
func makePVCMetricExpr(tmpl string, o QueryOptions) string {
|
|
var pvcSelector string
|
|
|
|
// For monitoring persistentvolumeclaims in the specific namespace
|
|
// GET /namespaces/{namespace}/persistentvolumeclaims/{persistentvolumeclaim} or
|
|
// GET /namespaces/{namespace}/persistentvolumeclaims
|
|
if o.Namespace != "" {
|
|
|
|
pvcSelector = fmt.Sprintf(`namespace="%s", persistentvolumeclaim=~"%s"`, o.Namespace, o.ResourceFilter)
|
|
|
|
return strings.Replace(tmpl, "$1", pvcSelector, -1)
|
|
}
|
|
|
|
return strings.Replace(tmpl, "$1", pvcSelector, -1)
|
|
}
|
|
|
|
func makeIngressMetricExpr(tmpl string, o QueryOptions) string {
|
|
var ingressSelector string
|
|
var jobSelector string
|
|
duration := "5m"
|
|
|
|
// parse Range Vector Selectors metric{key=value}[duration]
|
|
if o.Duration != nil {
|
|
duration = o.Duration.String()
|
|
}
|
|
|
|
// job is a reqiuried filter
|
|
// GET /namespaces/{namespace}/ingress?job=xxx&pod=xxx
|
|
if o.Job != "" {
|
|
jobSelector = fmt.Sprintf(`job="%s"`, o.Job)
|
|
if o.PodName != "" {
|
|
jobSelector = fmt.Sprintf(`%s,controller_pod="%s"`, jobSelector, o.PodName)
|
|
}
|
|
}
|
|
|
|
tmpl = strings.Replace(tmpl, "$1", ingressSelector, -1)
|
|
tmpl = strings.Replace(tmpl, "$2", jobSelector, -1)
|
|
return strings.Replace(tmpl, "$3", duration, -1)
|
|
}
|