add cluster level multiple metrics in dashboard
This commit is contained in:
@@ -46,6 +46,19 @@ func (u MonitorResource) monitorContainer(request *restful.Request, response *re
|
||||
}
|
||||
|
||||
func (u MonitorResource) monitorWorkload(request *restful.Request, response *restful.Response) {
|
||||
wlKind := request.PathParameter("workload_kind")
|
||||
if strings.Trim(wlKind, " ") == "" {
|
||||
// count all workloads figure
|
||||
//metricName := "workload_count"
|
||||
res := metrics.MonitorWorkloadCount(request)
|
||||
response.WriteAsJson(res)
|
||||
} else {
|
||||
res := metrics.MonitorAllMetrics(request)
|
||||
response.WriteAsJson(res)
|
||||
}
|
||||
}
|
||||
|
||||
func (u MonitorResource) monitorWorkspacePodLevelMetrics(request *restful.Request, response *restful.Response) {
|
||||
res := metrics.MonitorAllMetrics(request)
|
||||
response.WriteAsJson(res)
|
||||
}
|
||||
@@ -196,10 +209,18 @@ func Register(ws *restful.WebService, subPath string) {
|
||||
Doc("monitor specific workload level metrics").
|
||||
Param(ws.PathParameter("ns_name", "namespace").DataType("string").Required(true).DefaultValue("kube-system")).
|
||||
Param(ws.QueryParameter("metrics_filter", "metrics name cpu memory...").DataType("string").Required(false)).
|
||||
Param(ws.PathParameter("workload_kind", "workload kind").DataType("string").Required(true).DefaultValue("daemonset")).
|
||||
Param(ws.PathParameter("workload_kind", "workload kind").DataType("string").Required(false).DefaultValue("daemonset")).
|
||||
Param(ws.QueryParameter("workload_name", "workload name").DataType("string").Required(true).DefaultValue("")).
|
||||
Metadata(restfulspec.KeyOpenAPITags, tags)).
|
||||
Consumes(restful.MIME_JSON, restful.MIME_XML).
|
||||
Produces(restful.MIME_JSON)
|
||||
|
||||
ws.Route(ws.GET(subPath+"/namespaces/{ns_name}/workloads").To(u.monitorWorkload).
|
||||
Filter(route.RouteLogging).
|
||||
Doc("monitor all workload level metrics").
|
||||
Param(ws.PathParameter("ns_name", "namespace").DataType("string").Required(true).DefaultValue("kube-system")).
|
||||
Param(ws.QueryParameter("metrics_filter", "metrics name cpu memory...").DataType("string").Required(false)).
|
||||
Metadata(restfulspec.KeyOpenAPITags, tags)).
|
||||
Consumes(restful.MIME_JSON, restful.MIME_XML).
|
||||
Produces(restful.MIME_JSON)
|
||||
}
|
||||
|
||||
@@ -17,6 +17,9 @@ import (
|
||||
"net/http"
|
||||
"net/url"
|
||||
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/emicklei/go-restful"
|
||||
"github.com/golang/glog"
|
||||
"github.com/pkg/errors"
|
||||
@@ -28,6 +31,8 @@ const (
|
||||
DefaultPrometheusPort = "9090"
|
||||
PrometheusApiPath = "/api/v1/"
|
||||
PrometheusEndpointUrl = DefaultScheme + "://" + DefaultPrometheusService + ":" + DefaultPrometheusPort + PrometheusApiPath
|
||||
DefaultQueryStep = "10m"
|
||||
DefaultQueryTimeout = "30s"
|
||||
)
|
||||
|
||||
var client = &http.Client{}
|
||||
@@ -79,14 +84,18 @@ func ParseRequestHeader(request *restful.Request) (url.Values, bool, error) {
|
||||
end := request.QueryParameter("end")
|
||||
step := request.QueryParameter("step")
|
||||
timeout := request.QueryParameter("timeout")
|
||||
|
||||
if timeout == "" {
|
||||
timeout = "30s"
|
||||
timeout = DefaultQueryTimeout
|
||||
}
|
||||
if step == "" {
|
||||
step = DefaultQueryStep
|
||||
}
|
||||
// Whether query or query_range request
|
||||
u := url.Values{}
|
||||
if start != "" && end != "" && step != "" {
|
||||
u.Set("start", start)
|
||||
u.Set("end", end)
|
||||
if start != "" && end != "" {
|
||||
u.Set("start", convertTimeGranularity(start))
|
||||
u.Set("end", convertTimeGranularity(end))
|
||||
u.Set("step", step)
|
||||
u.Set("timeout", timeout)
|
||||
return u, true, nil
|
||||
@@ -101,6 +110,18 @@ func ParseRequestHeader(request *restful.Request) (url.Values, bool, error) {
|
||||
return u, false, nil
|
||||
}
|
||||
|
||||
glog.Error("Parse request failed", u)
|
||||
return u, false, errors.Errorf("Parse request failed")
|
||||
glog.Errorln("Parse request %s failed", u)
|
||||
return u, false, errors.Errorf("Parse request time range %s failed", u)
|
||||
}
|
||||
|
||||
func convertTimeGranularity(ts string) string {
|
||||
timeFloat, err := strconv.ParseFloat(ts, 64)
|
||||
if err != nil {
|
||||
glog.Errorf("convert second timestamp %s to minute timestamp failed", ts)
|
||||
return strconv.FormatInt(int64(time.Now().Unix()), 10)
|
||||
}
|
||||
timeInt := int64(timeFloat)
|
||||
// convert second timestamp to minute timestamp
|
||||
secondTime := time.Unix(timeInt, 0).Truncate(time.Minute).Unix()
|
||||
return strconv.FormatInt(secondTime, 10)
|
||||
}
|
||||
|
||||
@@ -1,184 +0,0 @@
|
||||
/*
|
||||
Copyright 2018 The KubeSphere Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/emicklei/go-restful"
|
||||
"github.com/golang/glog"
|
||||
|
||||
"kubesphere.io/kubesphere/pkg/client"
|
||||
)
|
||||
|
||||
func getPodNameRegexInWorkload(request *restful.Request) string {
|
||||
promql := MakeWorkloadRule(request)
|
||||
res := client.SendPrometheusRequest(request, promql)
|
||||
data := []byte(res)
|
||||
var dat CommonMetricsResult
|
||||
jsonErr := json.Unmarshal(data, &dat)
|
||||
if jsonErr != nil {
|
||||
glog.Errorln("json parse failed", jsonErr)
|
||||
}
|
||||
var podNames []string
|
||||
for _, x := range dat.Data.Result {
|
||||
podName := x.KubePodMetric.Pod
|
||||
podNames = append(podNames, podName)
|
||||
}
|
||||
podNamesFilter := "^(" + strings.Join(podNames, "|") + ")$"
|
||||
return podNamesFilter
|
||||
}
|
||||
|
||||
func MonitorWorkloadSingleMetric(request *restful.Request, metricsName string) *FormatedMetric {
|
||||
nsName := strings.Trim(request.PathParameter("ns_name"), " ")
|
||||
podNamesFilter := getPodNameRegexInWorkload(request)
|
||||
newPromql := MakePodPromQL(request, []string{metricsName, nsName, "", "", podNamesFilter})
|
||||
podMetrics := client.SendPrometheusRequest(request, newPromql)
|
||||
cleanedJson := ReformatJson(podMetrics, metricsName)
|
||||
return cleanedJson
|
||||
}
|
||||
|
||||
func MonitorPodSingleMetric(request *restful.Request, metricsName string) *FormatedMetric {
|
||||
nsName := strings.Trim(request.PathParameter("ns_name"), " ")
|
||||
nodeID := strings.Trim(request.PathParameter("node_id"), " ")
|
||||
podName := strings.Trim(request.PathParameter("pod_name"), " ")
|
||||
podFilter := strings.Trim(request.QueryParameter("pods_filter"), " ")
|
||||
params := []string{metricsName, nsName, nodeID, podName, podFilter}
|
||||
promql := MakePodPromQL(request, params)
|
||||
if promql != "" {
|
||||
res := client.SendPrometheusRequest(request, promql)
|
||||
cleanedJson := ReformatJson(res, metricsName)
|
||||
return cleanedJson
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func MonitorNamespaceSingleMetric(request *restful.Request, metricsName string) *FormatedMetric {
|
||||
recordingRule := MakeNamespacePromQL(request, metricsName)
|
||||
res := client.SendPrometheusRequest(request, recordingRule)
|
||||
cleanedJson := ReformatJson(res, metricsName)
|
||||
return cleanedJson
|
||||
}
|
||||
|
||||
func ReformatJson(metric string, metricsName string) *FormatedMetric {
|
||||
var formatMetric FormatedMetric
|
||||
err := json.Unmarshal([]byte(metric), &formatMetric)
|
||||
if err != nil {
|
||||
glog.Errorln("Unmarshal metric json failed", err)
|
||||
}
|
||||
if formatMetric.MetricName == "" {
|
||||
formatMetric.MetricName = metricsName
|
||||
}
|
||||
// retrive metrics success
|
||||
if formatMetric.Status == "success" {
|
||||
result := formatMetric.Data.Result
|
||||
for _, res := range result {
|
||||
metric, ok := res["metric"]
|
||||
me := metric.(map[string]interface{})
|
||||
if ok {
|
||||
delete(me, "__name__")
|
||||
}
|
||||
}
|
||||
}
|
||||
return &formatMetric
|
||||
}
|
||||
|
||||
func collectNodeorClusterMetrics(request *restful.Request, metricsName string, ch chan<- *FormatedMetric) {
|
||||
metric := MonitorNodeorClusterSingleMetric(request, metricsName)
|
||||
ch <- metric
|
||||
}
|
||||
|
||||
func collectNamespaceMetrics(request *restful.Request, metricsName string, ch chan<- *FormatedMetric) {
|
||||
metric := MonitorNamespaceSingleMetric(request, metricsName)
|
||||
ch <- metric
|
||||
}
|
||||
|
||||
func collectWorkloadMetrics(request *restful.Request, metricsName string, ch chan<- *FormatedMetric) {
|
||||
metricsName = strings.TrimLeft(metricsName, "workload_")
|
||||
metric := MonitorWorkloadSingleMetric(request, metricsName)
|
||||
ch <- metric
|
||||
}
|
||||
|
||||
func collectPodMetrics(request *restful.Request, metricsName string, ch chan<- *FormatedMetric) {
|
||||
metric := MonitorPodSingleMetric(request, metricsName)
|
||||
ch <- metric
|
||||
}
|
||||
|
||||
func MonitorAllMetrics(request *restful.Request) FormatedLevelMetric {
|
||||
metricsName := strings.Trim(request.QueryParameter("metrics_filter"), " ")
|
||||
if metricsName == "" {
|
||||
metricsName = ".*"
|
||||
}
|
||||
path := request.SelectedRoutePath()
|
||||
sourceType := path[strings.LastIndex(path, "/")+1 : len(path)-1]
|
||||
if strings.Contains(path, "workload") {
|
||||
sourceType = "workload"
|
||||
}
|
||||
var ch = make(chan *FormatedMetric, 10)
|
||||
for _, k := range MetricsNames {
|
||||
bol, err := regexp.MatchString(metricsName, k)
|
||||
if !bol {
|
||||
continue
|
||||
}
|
||||
if err != nil {
|
||||
glog.Errorln("regex match failed", err)
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(k, sourceType) {
|
||||
if sourceType == "node" || sourceType == "cluster" {
|
||||
go collectNodeorClusterMetrics(request, k, ch)
|
||||
} else if sourceType == "namespace" {
|
||||
go collectNamespaceMetrics(request, k, ch)
|
||||
} else if sourceType == "pod" {
|
||||
go collectPodMetrics(request, k, ch)
|
||||
} else if sourceType == "workload" {
|
||||
go collectWorkloadMetrics(request, k, ch)
|
||||
}
|
||||
}
|
||||
}
|
||||
var metricsArray []FormatedMetric
|
||||
var tempJson *FormatedMetric
|
||||
for _, k := range MetricsNames {
|
||||
bol, err := regexp.MatchString(metricsName, k)
|
||||
if !bol {
|
||||
continue
|
||||
}
|
||||
if err != nil {
|
||||
glog.Errorln("regex match failed")
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(k, sourceType) {
|
||||
tempJson = <-ch
|
||||
if tempJson != nil {
|
||||
metricsArray = append(metricsArray, *tempJson)
|
||||
}
|
||||
}
|
||||
}
|
||||
return FormatedLevelMetric{
|
||||
MetricsLevel: sourceType,
|
||||
Results: metricsArray,
|
||||
}
|
||||
}
|
||||
|
||||
func MonitorNodeorClusterSingleMetric(request *restful.Request, metricsName string) *FormatedMetric {
|
||||
recordingRule := MakeNodeorClusterRule(request, metricsName)
|
||||
res := client.SendPrometheusRequest(request, recordingRule)
|
||||
cleanedJson := ReformatJson(res, metricsName)
|
||||
return cleanedJson
|
||||
}
|
||||
@@ -1,147 +0,0 @@
|
||||
/*
|
||||
Copyright 2018 The KubeSphere Authors.
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package metrics
|
||||
|
||||
type MetricMap map[string]string
|
||||
|
||||
var MetricsNames = []string{
|
||||
"cluster_cpu_utilisation",
|
||||
"cluster_memory_utilisation",
|
||||
"cluster_net_utilisation",
|
||||
"cluster_pod_count",
|
||||
|
||||
"node_cpu_utilisation",
|
||||
"node_memory_utilisation",
|
||||
"node_memory_available",
|
||||
"node_memory_total",
|
||||
"node_net_utilisation",
|
||||
"node_net_bytes_transmitted",
|
||||
"node_net_bytes_received",
|
||||
"node_disk_read_iops",
|
||||
"node_disk_write_iops",
|
||||
"node_disk_read_throughput",
|
||||
"node_disk_write_throughput",
|
||||
"node_disk_capacity",
|
||||
"node_disk_available",
|
||||
"node_disk_utilization",
|
||||
|
||||
"namespace_cpu_utilisation",
|
||||
"namespace_memory_utilisation",
|
||||
"namespace_memory_utilisation_wo_cache",
|
||||
"namespace_net_bytes_transmitted",
|
||||
"namespace_net_bytes_received",
|
||||
"namespace_pod_count",
|
||||
|
||||
"pod_cpu_utilisation",
|
||||
"pod_memory_utilisation",
|
||||
"pod_memory_utilisation_wo_cache",
|
||||
"pod_net_bytes_transmitted",
|
||||
"pod_net_bytes_received",
|
||||
|
||||
"workload_pod_cpu_utilisation",
|
||||
"workload_pod_memory_utilisation",
|
||||
"workload_pod_memory_utilisation_wo_cache",
|
||||
"workload_pod_net_bytes_transmitted",
|
||||
"workload_pod_net_bytes_received",
|
||||
//"container_cpu_utilisation",
|
||||
//"container_memory_utilisation_wo_cache",
|
||||
//"container_memory_utilisation",
|
||||
|
||||
"tenant_cpu_utilisation",
|
||||
"tenant_memory_utilisation",
|
||||
"tenant_memory_utilisation_wo_cache",
|
||||
"tenant_net_bytes_transmitted",
|
||||
"tenant_net_bytes_received",
|
||||
"tenant_pod_count",
|
||||
}
|
||||
|
||||
var RulePromQLTmplMap = MetricMap{
|
||||
//cluster
|
||||
"cluster_cpu_utilisation": ":node_cpu_utilisation:avg1m",
|
||||
"cluster_memory_utilisation": ":node_memory_utilisation:",
|
||||
// Cluster network utilisation (bytes received + bytes transmitted per second)
|
||||
"cluster_net_utilisation": ":node_net_utilisation:sum_irate",
|
||||
"cluster_pod_count": `count(kube_pod_info{job="kube-state-metrics"})`,
|
||||
|
||||
//node
|
||||
"node_cpu_utilisation": "node:node_cpu_utilisation:avg1m",
|
||||
"node_memory_utilisation": "node:node_memory_utilisation:",
|
||||
"node_memory_available": "node:node_memory_bytes_available:sum",
|
||||
"node_memory_total": "node:node_memory_bytes_total:sum",
|
||||
// Node network utilisation (bytes received + bytes transmitted per second)
|
||||
"node_net_utilisation": "node:node_net_utilisation:sum_irate",
|
||||
// Node network bytes transmitted per second
|
||||
"node_net_bytes_transmitted": "node:node_net_bytes_transmitted:sum_irate",
|
||||
// Node network bytes received per second
|
||||
"node_net_bytes_received": "node:node_net_bytes_received:sum_irate",
|
||||
|
||||
// node:data_volume_iops_reads:sum{node=~"i-5xcldxos|i-6soe9zl1"}
|
||||
"node_disk_read_iops": "node:data_volume_iops_reads:sum",
|
||||
// node:data_volume_iops_writes:sum{node=~"i-5xcldxos|i-6soe9zl1"}
|
||||
"node_disk_write_iops": "node:data_volume_iops_writes:sum",
|
||||
// node:data_volume_throughput_bytes_read:sum{node=~"i-5xcldxos|i-6soe9zl1"}
|
||||
"node_disk_read_throughput": "node:data_volume_throughput_bytes_read:sum",
|
||||
// node:data_volume_throughput_bytes_written:sum{node=~"i-5xcldxos|i-6soe9zl1"}
|
||||
"node_disk_write_throughput": "node:data_volume_throughput_bytes_written:sum",
|
||||
|
||||
"node_disk_capacity": `sum by (node) ((node_filesystem_avail{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1)`,
|
||||
"node_disk_available": `sum by (node) ((node_filesystem_avail{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1)`,
|
||||
"node_disk_utilization": `sum by (node) (((node_filesystem_size{mountpoint="/", job="node-exporter"} - node_filesystem_avail{mountpoint="/", job="node-exporter"}) / node_filesystem_size{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1)`,
|
||||
|
||||
//namespace
|
||||
"namespace_cpu_utilisation": `namespace:container_cpu_usage_seconds_total:sum_rate{namespace=~"$1"}`,
|
||||
"namespace_memory_utilisation": `namespace:container_memory_usage_bytes:sum{namespace=~"$1"}`,
|
||||
"namespace_memory_utilisation_wo_cache": `namespace:container_memory_usage_bytes_wo_cache:sum{namespace=~"$1"}`,
|
||||
"namespace_net_bytes_transmitted": `sum by (namespace) (irate(container_network_transmit_bytes_total{namespace=~"$1", pod_name!="", interface="eth0", job="kubelet"}[2m]))`,
|
||||
"namespace_net_bytes_received": `sum by (namespace) (irate(container_network_receive_bytes_total{namespace=~"$1", pod_name!="", interface="eth0", job="kubelet"}[2m]))`,
|
||||
// count(kube_pod_info) by (namespace) namespace=~"monitoring|default|kube-system"
|
||||
"namespace_pod_count": `count(kube_pod_info{job="kube-state-metrics", namespace=~"$1"}) by (namespace)`,
|
||||
|
||||
// pod
|
||||
"pod_cpu_utilisation": `sum(irate(container_cpu_usage_seconds_total{job="kubelet", namespace="$1", pod_name="$2", image!=""}[5m])) by (namespace, pod_name)`,
|
||||
"pod_memory_utilisation": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name="$2", image!=""}) by (namespace, pod_name)`,
|
||||
"pod_memory_utilisation_wo_cache": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name="$2", image!=""} - container_memory_cache{job="kubelet", namespace="$1", pod_name="$2",image!=""}) by (namespace, pod_name)`,
|
||||
"pod_net_bytes_transmitted": `sum by (namespace, pod_name) (irate(container_network_transmit_bytes_total{namespace="$1", pod_name!="", pod_name="$2", interface="eth0", job="kubelet"}[2m]))`,
|
||||
"pod_net_bytes_received": `sum by (namespace, pod_name) (irate(container_network_receive_bytes_total{namespace="$1", pod_name!="", pod_name="$2", interface="eth0", job="kubelet"}[2m]))`,
|
||||
|
||||
"pod_cpu_utilisation_all": `sum(irate(container_cpu_usage_seconds_total{job="kubelet", namespace="$1", pod_name=~"$2", image!=""}[5m])) by (namespace, pod_name)`,
|
||||
"pod_memory_utilisation_all": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name=~"$2", image!=""}) by (namespace, pod_name)`,
|
||||
"pod_memory_utilisation_wo_cache_all": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name=~"$2", image!=""} - container_memory_cache{job="kubelet", namespace="$1", pod_name=~"$2", image!=""}) by (namespace, pod_name)`,
|
||||
"pod_net_bytes_transmitted_all": `sum by (namespace, pod_name) (irate(container_network_transmit_bytes_total{namespace="$1", pod_name!="", pod_name=~"$2", interface="eth0", job="kubelet"}[2m]))`,
|
||||
"pod_net_bytes_received_all": `sum by (namespace, pod_name) (irate(container_network_receive_bytes_total{namespace="$1", pod_name!="", pod_name=~"$2", interface="eth0", job="kubelet"}[2m]))`,
|
||||
|
||||
//"pod_cpu_utilisation_node": `sum by (node, pod) (label_join(irate(container_cpu_usage_seconds_total{job="kubelet", image!=""}[5m]), "pod", " ", "pod_name") * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{node=~"$3"})`,
|
||||
"pod_cpu_utilisation_node": `sum by (node, pod) (label_join(irate(container_cpu_usage_seconds_total{job="kubelet",pod_name=~"$2", image!=""}[5m]), "pod", " ", "pod_name") * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{node=~"$3"})`,
|
||||
"pod_memory_utilisation_node": `sum by (node, pod) (label_join(container_memory_usage_bytes{job="kubelet",pod_name=~"$2", image!=""}, "pod", " ", "pod_name") * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{node=~"$3"})`,
|
||||
"pod_memory_utilisation_wo_cache_node": `sum by (node, pod) ((label_join(container_memory_usage_bytes{job="kubelet",pod_name=~"$2", image!=""}, "pod", " ", "pod_name") - label_join(container_memory_cache{job="kubelet",pod_name=~"$2", image!=""}, "pod", " ", "pod_name")) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{node=~"$3"})`,
|
||||
|
||||
// container
|
||||
"container_cpu_utilisation": `sum(irate(container_cpu_usage_seconds_total{namespace="$1", pod_name="$2", container_name="$3"}[5m])) by (namespace, pod_name, container_name)`,
|
||||
//"container_cpu_utilisation_wo_podname": `sum(irate(container_cpu_usage_seconds_total{namespace="$1", container_name=~"$3"}[5m])) by (namespace, pod_name, container_name)`,
|
||||
"container_cpu_utilisation_all": `sum(irate(container_cpu_usage_seconds_total{namespace="$1", pod_name="$2", container_name=~"$3", container_name!="POD"}[5m])) by (namespace, pod_name, container_name)`,
|
||||
//"container_cpu_utilisation_all_wo_podname": `sum(irate(container_cpu_usage_seconds_total{namespace="$1", container_name!="POD"}[5m])) by (namespace, pod_name, container_name)`,
|
||||
|
||||
"container_memory_utilisation_wo_cache": `container_memory_usage_bytes{namespace="$1", pod_name="$2", container_name="$3"} - ignoring(id, image, endpoint, instance, job, name, service) container_memory_cache{namespace="$1", pod_name="$2", container_name="$3"}`,
|
||||
"container_memory_utilisation_wo_cache_all": `container_memory_usage_bytes{namespace="$1", pod_name="$2", container_name=~"$3", container_name!="POD"} - ignoring(id, image, endpoint, instance, job, name, service) container_memory_cache{namespace="$1", pod_name="$2", container_name=~"$3", container_name!="POD"}`,
|
||||
"container_memory_utilisation": `container_memory_usage_bytes{namespace="$1", pod_name="$2", container_name="$3"}`,
|
||||
"container_memory_utilisation_all": `container_memory_usage_bytes{namespace="$1", pod_name="$2", container_name=~"$3", container_name!="POD"}`,
|
||||
|
||||
// tenant
|
||||
"tenant_cpu_utilisation": `sum(namespace:container_cpu_usage_seconds_total:sum_rate{namespace =~"$1"})`,
|
||||
"tenant_memory_utilisation": `sum(namespace:container_memory_usage_bytes:sum{namespace =~"$1"})`,
|
||||
"tenant_memory_utilisation_wo_cache": `sum(namespace:container_memory_usage_bytes_wo_cache:sum{namespace =~"$1"})`,
|
||||
"tenant_net_bytes_transmitted": `sum(sum by (namespace) (irate(container_network_transmit_bytes_total{namespace=~"$1", pod_name!="", interface="eth0", job="kubelet"}[2m])))`,
|
||||
"tenant_net_bytes_received": `sum(sum by (namespace) (irate(container_network_receive_bytes_total{namespace=~"$1", pod_name!="", interface="eth0", job="kubelet"}[2m])))`,
|
||||
"tenant_pod_count": `sum(count(kube_pod_info{job="kube-state-metrics", namespace=~"$1"}) by (namespace))`,
|
||||
}
|
||||
422
pkg/models/metrics/metricscollector.go
Normal file
422
pkg/models/metrics/metricscollector.go
Normal file
@@ -0,0 +1,422 @@
|
||||
/*
|
||||
Copyright 2018 The KubeSphere Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/emicklei/go-restful"
|
||||
"github.com/golang/glog"
|
||||
|
||||
"time"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
metaV1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
||||
"kubesphere.io/kubesphere/pkg/client"
|
||||
"kubesphere.io/kubesphere/pkg/models"
|
||||
)
|
||||
|
||||
func getPodNameRegexInWorkload(request *restful.Request) string {
|
||||
promql := MakeWorkloadRule(request)
|
||||
res := client.SendPrometheusRequest(request, promql)
|
||||
data := []byte(res)
|
||||
var dat CommonMetricsResult
|
||||
jsonErr := json.Unmarshal(data, &dat)
|
||||
if jsonErr != nil {
|
||||
glog.Errorln("json parse failed", jsonErr)
|
||||
}
|
||||
var podNames []string
|
||||
for _, x := range dat.Data.Result {
|
||||
podName := x.KubePodMetric.Pod
|
||||
podNames = append(podNames, podName)
|
||||
}
|
||||
podNamesFilter := "^(" + strings.Join(podNames, "|") + ")$"
|
||||
return podNamesFilter
|
||||
}
|
||||
|
||||
func MonitorWorkloadSingleMetric(request *restful.Request, metricsName string) *FormatedMetric {
|
||||
nsName := strings.Trim(request.PathParameter("ns_name"), " ")
|
||||
podNamesFilter := getPodNameRegexInWorkload(request)
|
||||
newPromql := MakePodPromQL(request, []string{metricsName, nsName, "", "", podNamesFilter})
|
||||
podMetrics := client.SendPrometheusRequest(request, newPromql)
|
||||
cleanedJson := ReformatJson(podMetrics, metricsName)
|
||||
return cleanedJson
|
||||
}
|
||||
|
||||
func MonitorPodSingleMetric(request *restful.Request, metricsName string) *FormatedMetric {
|
||||
nsName := strings.Trim(request.PathParameter("ns_name"), " ")
|
||||
nodeID := strings.Trim(request.PathParameter("node_id"), " ")
|
||||
podName := strings.Trim(request.PathParameter("pod_name"), " ")
|
||||
podFilter := strings.Trim(request.QueryParameter("pods_filter"), " ")
|
||||
params := []string{metricsName, nsName, nodeID, podName, podFilter}
|
||||
promql := MakePodPromQL(request, params)
|
||||
if promql != "" {
|
||||
res := client.SendPrometheusRequest(request, promql)
|
||||
cleanedJson := ReformatJson(res, metricsName)
|
||||
return cleanedJson
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func MonitorNamespaceSingleMetric(request *restful.Request, metricsName string) *FormatedMetric {
|
||||
recordingRule := MakeNamespacePromQL(request, metricsName)
|
||||
res := client.SendPrometheusRequest(request, recordingRule)
|
||||
cleanedJson := ReformatJson(res, metricsName)
|
||||
return cleanedJson
|
||||
}
|
||||
|
||||
// maybe this function is time consuming
|
||||
func ReformatJson(metric string, metricsName string) *FormatedMetric {
|
||||
var formatMetric FormatedMetric
|
||||
err := json.Unmarshal([]byte(metric), &formatMetric)
|
||||
if err != nil {
|
||||
glog.Errorln("Unmarshal metric json failed", err)
|
||||
}
|
||||
if formatMetric.MetricName == "" {
|
||||
formatMetric.MetricName = metricsName
|
||||
}
|
||||
// retrive metrics success
|
||||
if formatMetric.Status == MetricStatusSuccess {
|
||||
result := formatMetric.Data.Result
|
||||
for _, res := range result {
|
||||
metric, ok := res[ResultItemMetric]
|
||||
me := metric.(map[string]interface{})
|
||||
if ok {
|
||||
delete(me, "__name__")
|
||||
}
|
||||
}
|
||||
}
|
||||
return &formatMetric
|
||||
}
|
||||
|
||||
func collectNodeorClusterMetrics(request *restful.Request, metricsName string, ch chan<- *FormatedMetric) {
|
||||
metric := MonitorNodeorClusterSingleMetric(request, metricsName)
|
||||
ch <- metric
|
||||
}
|
||||
|
||||
func collectNamespaceMetrics(request *restful.Request, metricsName string, ch chan<- *FormatedMetric) {
|
||||
metric := MonitorNamespaceSingleMetric(request, metricsName)
|
||||
ch <- metric
|
||||
}
|
||||
|
||||
func collectWorkloadMetrics(request *restful.Request, metricsName string, ch chan<- *FormatedMetric) {
|
||||
metricsName = strings.TrimLeft(metricsName, "workload_")
|
||||
metric := MonitorWorkloadSingleMetric(request, metricsName)
|
||||
ch <- metric
|
||||
}
|
||||
|
||||
func collectWorkspaceMetrics(request *restful.Request, metricsName string, namespaceList []string, ch chan<- *FormatedMetric) {
|
||||
mertic := monitorWorkspaceSingleMertic(request, metricsName, namespaceList)
|
||||
ch <- mertic
|
||||
}
|
||||
|
||||
func collectPodMetrics(request *restful.Request, metricsName string, ch chan<- *FormatedMetric) {
|
||||
metric := MonitorPodSingleMetric(request, metricsName)
|
||||
ch <- metric
|
||||
}
|
||||
|
||||
func monitorWorkspaceSingleMertic(request *restful.Request, metricsName string, namespaceList []string) *FormatedMetric {
|
||||
namespaceRe2 := "^(" + strings.Join(namespaceList, "|") + ")$"
|
||||
newpromql := MakeWorkspacePromQL(metricsName, namespaceRe2)
|
||||
podMetrics := client.SendPrometheusRequest(request, newpromql)
|
||||
cleanedJson := ReformatJson(podMetrics, metricsName)
|
||||
return cleanedJson
|
||||
}
|
||||
|
||||
func filterNamespace(request *restful.Request, namespaceList []string) []string {
|
||||
var newNSlist []string
|
||||
nsFilter := strings.Trim(request.QueryParameter("namespaces_filter"), " ")
|
||||
if nsFilter == "" {
|
||||
nsFilter = ".*"
|
||||
}
|
||||
for _, ns := range namespaceList {
|
||||
bol, _ := regexp.MatchString(nsFilter, ns)
|
||||
if bol {
|
||||
newNSlist = append(newNSlist, ns)
|
||||
}
|
||||
}
|
||||
return newNSlist
|
||||
}
|
||||
|
||||
func MonitorAllMetrics(request *restful.Request) FormatedLevelMetric {
|
||||
metricsName := strings.Trim(request.QueryParameter("metrics_filter"), " ")
|
||||
if metricsName == "" {
|
||||
metricsName = ".*"
|
||||
}
|
||||
path := request.SelectedRoutePath()
|
||||
sourceType := path[strings.LastIndex(path, "/")+1 : len(path)-1]
|
||||
if strings.Contains(path, MetricLevelWorkload) {
|
||||
sourceType = MetricLevelWorkload
|
||||
} else if strings.Contains(path, MetricLevelWorkspace) {
|
||||
sourceType = MetricLevelWorkspace
|
||||
}
|
||||
var ch = make(chan *FormatedMetric, 10)
|
||||
for _, metricName := range MetricsNames {
|
||||
bol, err := regexp.MatchString(metricsName, metricName)
|
||||
if !bol {
|
||||
continue
|
||||
}
|
||||
if err != nil {
|
||||
glog.Errorln("regex match failed", err)
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(metricName, sourceType) {
|
||||
if sourceType == MetricLevelCluster || sourceType == MetricLevelNode {
|
||||
go collectNodeorClusterMetrics(request, metricName, ch)
|
||||
} else if sourceType == MetricLevelNamespace {
|
||||
go collectNamespaceMetrics(request, metricName, ch)
|
||||
} else if sourceType == MetricLevelPod {
|
||||
go collectPodMetrics(request, metricName, ch)
|
||||
} else if sourceType == MetricLevelWorkload {
|
||||
go collectWorkloadMetrics(request, metricName, ch)
|
||||
}
|
||||
}
|
||||
}
|
||||
var metricsArray []FormatedMetric
|
||||
var tempJson *FormatedMetric
|
||||
for _, k := range MetricsNames {
|
||||
bol, err := regexp.MatchString(metricsName, k)
|
||||
if !bol {
|
||||
continue
|
||||
}
|
||||
if err != nil {
|
||||
glog.Errorln("regex match failed")
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(k, sourceType) {
|
||||
tempJson = <-ch
|
||||
if tempJson != nil {
|
||||
metricsArray = append(metricsArray, *tempJson)
|
||||
}
|
||||
}
|
||||
}
|
||||
return FormatedLevelMetric{
|
||||
MetricsLevel: sourceType,
|
||||
Results: metricsArray,
|
||||
}
|
||||
}
|
||||
|
||||
func getWorkspacePodsCountMetrics(request *restful.Request, namespaces []string) *FormatedMetric {
|
||||
metricName := MetricNameNamespacePodCount
|
||||
var recordingRule = RulePromQLTmplMap[metricName]
|
||||
nsFilter := "^(" + strings.Join(namespaces, "|") + ")$"
|
||||
recordingRule = strings.Replace(recordingRule, "$1", nsFilter, -1)
|
||||
res := client.SendPrometheusRequest(request, recordingRule)
|
||||
cleanedJson := ReformatJson(res, metricName)
|
||||
return cleanedJson
|
||||
}
|
||||
|
||||
func getWorkspaceWorkloadCountMetrics(namespaces []string) FormatedMetric {
|
||||
var wlQuotaMetrics models.ResourceQuota
|
||||
wlQuotaMetrics.NameSpace = strings.Join(namespaces, "|")
|
||||
wlQuotaMetrics.Data.Used = make(v1.ResourceList, 1)
|
||||
wlQuotaMetrics.Data.Hard = make(v1.ResourceList, 1)
|
||||
for _, ns := range namespaces {
|
||||
quotaMetric, err := models.GetNamespaceQuota(ns)
|
||||
if err != nil {
|
||||
glog.Errorln(err)
|
||||
continue
|
||||
}
|
||||
// sum all resources used along namespaces
|
||||
quotaUsed := quotaMetric.Data.Used
|
||||
for resourceName, quantity := range quotaUsed {
|
||||
if _, ok := wlQuotaMetrics.Data.Used[resourceName]; ok {
|
||||
tmpQuantity := wlQuotaMetrics.Data.Used[v1.ResourceName(resourceName)]
|
||||
tmpQuantity.Add(quantity)
|
||||
wlQuotaMetrics.Data.Used[v1.ResourceName(resourceName)] = tmpQuantity
|
||||
} else {
|
||||
wlQuotaMetrics.Data.Used[v1.ResourceName(resourceName)] = quantity.DeepCopy()
|
||||
}
|
||||
}
|
||||
|
||||
// sum all resources hard along namespaces
|
||||
quotaHard := quotaMetric.Data.Hard
|
||||
for resourceName, quantity := range quotaHard {
|
||||
if _, ok := wlQuotaMetrics.Data.Hard[resourceName]; ok {
|
||||
tmpQuantity := wlQuotaMetrics.Data.Hard[v1.ResourceName(resourceName)]
|
||||
tmpQuantity.Add(quantity)
|
||||
wlQuotaMetrics.Data.Hard[v1.ResourceName(resourceName)] = tmpQuantity
|
||||
} else {
|
||||
wlQuotaMetrics.Data.Hard[v1.ResourceName(resourceName)] = quantity.DeepCopy()
|
||||
}
|
||||
}
|
||||
}
|
||||
wlMetrics := convertQuota2MetricStruct(&wlQuotaMetrics)
|
||||
return wlMetrics
|
||||
}
|
||||
|
||||
func getSpecificMetricItem(timestamp int64, metricName string, kind string, count int, err error) FormatedMetric {
|
||||
var nsMetrics FormatedMetric
|
||||
nsMetrics.MetricName = metricName
|
||||
nsMetrics.Data.ResultType = ResultTypeVector
|
||||
resultItem := make(map[string]interface{})
|
||||
tmp := make(map[string]string)
|
||||
tmp[ResultItemMetricResource] = kind
|
||||
if err == nil {
|
||||
nsMetrics.Status = MetricStatusSuccess
|
||||
} else {
|
||||
nsMetrics.Status = MetricStatusError
|
||||
resultItem["errorinfo"] = err.Error()
|
||||
}
|
||||
|
||||
resultItem[ResultItemMetric] = tmp
|
||||
resultItem[ResultItemValue] = []interface{}{timestamp, count}
|
||||
nsMetrics.Data.Result = make([]map[string]interface{}, 1)
|
||||
nsMetrics.Data.Result[0] = resultItem
|
||||
return nsMetrics
|
||||
}
|
||||
|
||||
func MonitorNodeorClusterSingleMetric(request *restful.Request, metricsName string) *FormatedMetric {
|
||||
// support cluster node statistic, include healthy nodes and unhealthy nodes
|
||||
var res string
|
||||
var fMetric FormatedMetric
|
||||
timestamp := int64(time.Now().Unix())
|
||||
|
||||
if metricsName == MetricNameClusterHealthyNodeCount {
|
||||
onlineNodes, _ := getNodeHealthyConditionMetric()
|
||||
fMetric = getSpecificMetricItem(timestamp, MetricNameClusterHealthyNodeCount, "node_count", len(onlineNodes), nil)
|
||||
} else if metricsName == MetricNameClusterUnhealthyNodeCount {
|
||||
_, offlineNodes := getNodeHealthyConditionMetric()
|
||||
fMetric = getSpecificMetricItem(timestamp, MetricNameClusterUnhealthyNodeCount, "node_count", len(offlineNodes), nil)
|
||||
} else if metricsName == MetricNameClusterNodeCount {
|
||||
onlineNodes, offlineNodes := getNodeHealthyConditionMetric()
|
||||
fMetric = getSpecificMetricItem(timestamp, MetricNameClusterNodeCount, "node_count", len(onlineNodes)+len(offlineNodes), nil)
|
||||
} else {
|
||||
recordingRule := MakeNodeorClusterRule(request, metricsName)
|
||||
res = client.SendPrometheusRequest(request, recordingRule)
|
||||
fMetric = *ReformatJson(res, metricsName)
|
||||
}
|
||||
return &fMetric
|
||||
}
|
||||
|
||||
func getNodeHealthyConditionMetric() ([]string, []string) {
|
||||
nodeList, err := client.NewK8sClient().CoreV1().Nodes().List(metaV1.ListOptions{})
|
||||
if err != nil {
|
||||
glog.Errorln(err)
|
||||
return nil, nil
|
||||
}
|
||||
var onlineNodes []string
|
||||
var offlineNodes []string
|
||||
for _, node := range nodeList.Items {
|
||||
nodeName := node.Labels["kubernetes.io/hostname"]
|
||||
nodeRole := node.Labels["role"]
|
||||
bol := true
|
||||
for _, cond := range node.Status.Conditions {
|
||||
if cond.Type == "Ready" && cond.Status == "Unknown" {
|
||||
bol = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if nodeRole != "log" {
|
||||
if bol {
|
||||
// reachable node
|
||||
onlineNodes = append(onlineNodes, nodeName)
|
||||
} else {
|
||||
// unreachable node
|
||||
offlineNodes = append(offlineNodes, nodeName)
|
||||
}
|
||||
}
|
||||
}
|
||||
return onlineNodes, offlineNodes
|
||||
}
|
||||
|
||||
func getExistingNamespace(namespaces []string) ([]string, []string) {
|
||||
namespaceMap, err := getAllNamespace()
|
||||
var existedNs []string
|
||||
var noneExistedNs []string
|
||||
if err != nil {
|
||||
return namespaces, nil
|
||||
}
|
||||
for _, ns := range namespaces {
|
||||
if _, ok := namespaceMap[ns]; ok {
|
||||
existedNs = append(existedNs, ns)
|
||||
} else {
|
||||
noneExistedNs = append(noneExistedNs, ns)
|
||||
}
|
||||
}
|
||||
return existedNs, noneExistedNs
|
||||
}
|
||||
|
||||
func getAllNamespace() (map[string]int, error) {
|
||||
k8sClient := client.NewK8sClient()
|
||||
nsList, err := k8sClient.CoreV1().Namespaces().List(metaV1.ListOptions{})
|
||||
if err != nil {
|
||||
glog.Errorln(err)
|
||||
return nil, err
|
||||
}
|
||||
namespaceMap := make(map[string]int)
|
||||
for _, item := range nsList.Items {
|
||||
namespaceMap[item.Name] = 0
|
||||
}
|
||||
return namespaceMap, nil
|
||||
}
|
||||
|
||||
func MonitorWorkloadCount(request *restful.Request) FormatedMetric {
|
||||
namespace := strings.Trim(request.PathParameter("ns_name"), " ")
|
||||
|
||||
quotaMetric, err := models.GetNamespaceQuota(namespace)
|
||||
fMetric := convertQuota2MetricStruct(quotaMetric)
|
||||
|
||||
// whether the namespace in request parameters exists?
|
||||
namespaceMap, e := getAllNamespace()
|
||||
_, ok := namespaceMap[namespace]
|
||||
if e != nil {
|
||||
ok = true
|
||||
}
|
||||
if !ok || err != nil {
|
||||
fMetric.Status = MetricStatusError
|
||||
fMetric.Data.ResultType = ""
|
||||
errInfo := make(map[string]interface{})
|
||||
if err != nil {
|
||||
errInfo["errormsg"] = err.Error()
|
||||
} else {
|
||||
errInfo["errormsg"] = "namespace " + namespace + " does not exist"
|
||||
}
|
||||
fMetric.Data.Result = []map[string]interface{}{errInfo}
|
||||
}
|
||||
|
||||
return fMetric
|
||||
}
|
||||
|
||||
func convertQuota2MetricStruct(quotaMetric *models.ResourceQuota) FormatedMetric {
|
||||
var fMetric FormatedMetric
|
||||
fMetric.MetricName = MetricNameWorkloadCount
|
||||
fMetric.Status = MetricStatusSuccess
|
||||
fMetric.Data.ResultType = ResultTypeVector
|
||||
timestamp := int64(time.Now().Unix())
|
||||
var resultItems []map[string]interface{}
|
||||
|
||||
hardMap := make(map[string]string)
|
||||
for resourceName, v := range quotaMetric.Data.Hard {
|
||||
hardMap[resourceName.String()] = v.String()
|
||||
}
|
||||
|
||||
for resourceName, v := range quotaMetric.Data.Used {
|
||||
resultItem := make(map[string]interface{})
|
||||
tmp := make(map[string]string)
|
||||
tmp[ResultItemMetricResource] = resourceName.String()
|
||||
resultItem[ResultItemMetric] = tmp
|
||||
resultItem[ResultItemValue] = []interface{}{timestamp, hardMap[resourceName.String()], v.String()}
|
||||
resultItems = append(resultItems, resultItem)
|
||||
}
|
||||
|
||||
fMetric.Data.Result = resultItems
|
||||
return fMetric
|
||||
}
|
||||
236
pkg/models/metrics/metricsconst.go
Normal file
236
pkg/models/metrics/metricsconst.go
Normal file
@@ -0,0 +1,236 @@
|
||||
/*
|
||||
Copyright 2018 The KubeSphere Authors.
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package metrics
|
||||
|
||||
const (
|
||||
ResultTypeVector = "vector"
|
||||
ResultTypeMatrix = "matrix"
|
||||
MetricStatusError = "error"
|
||||
MetricStatusSuccess = "success"
|
||||
ResultItemMetric = "metric"
|
||||
ResultItemMetricResource = "resource"
|
||||
ResultItemValue = "value"
|
||||
)
|
||||
|
||||
const (
|
||||
MetricNameWorkloadCount = "workload_count"
|
||||
MetricNameNamespacePodCount = "namespace_pod_count"
|
||||
|
||||
MetricNameWorkspaceAllOrganizationCount = "workspace_all_organization_count"
|
||||
MetricNameWorkspaceAllAccountCount = "workspace_all_account_count"
|
||||
MetricNameWorkspaceAllProjectCount = "workspace_all_project_count"
|
||||
MetricNameWorkspaceAllDevopsCount = "workspace_all_devops_project_count"
|
||||
|
||||
MetricNameWorkspaceNamespaceCount = "workspace_namespace_count"
|
||||
MetricNameWorkspaceDevopsCount = "workspace_devops_project_count"
|
||||
MetricNameWorkspaceMemberCount = "workspace_member_count"
|
||||
MetricNameWorkspaceRoleCount = "workspace_role_count"
|
||||
|
||||
MetricNameClusterHealthyNodeCount = "cluster_node_online"
|
||||
MetricNameClusterUnhealthyNodeCount = "cluster_node_offline"
|
||||
MetricNameClusterNodeCount = "cluster_node_total"
|
||||
)
|
||||
|
||||
const (
|
||||
WorkspaceResourceKindOrganization = "organization"
|
||||
WorkspaceResourceKindAccount = "account"
|
||||
WorkspaceResourceKindNamespace = "namespace"
|
||||
WorkspaceResourceKindDevops = "devops"
|
||||
WorkspaceResourceKindMember = "member"
|
||||
WorkspaceResourceKindRole = "role"
|
||||
)
|
||||
|
||||
const (
|
||||
MetricLevelCluster = "cluster"
|
||||
MetricLevelNode = "node"
|
||||
MetricLevelWorkspace = "workspace"
|
||||
MetricLevelNamespace = "namespace"
|
||||
MetricLevelPod = "pod"
|
||||
MetricLevelContainer = "container"
|
||||
MetricLevelWorkload = "workload"
|
||||
)
|
||||
|
||||
type MetricMap map[string]string
|
||||
|
||||
var MetricsNames = []string{
|
||||
"cluster_cpu_utilisation",
|
||||
"cluster_cpu_usage",
|
||||
"cluster_cpu_total",
|
||||
"cluster_memory_utilisation",
|
||||
"cluster_pod_count",
|
||||
"cluster_memory_bytes_available",
|
||||
"cluster_memory_bytes_total",
|
||||
"cluster_memory_bytes_usage",
|
||||
"cluster_net_utilisation",
|
||||
"cluster_net_bytes_transmitted",
|
||||
"cluster_net_bytes_received",
|
||||
"cluster_disk_read_iops",
|
||||
"cluster_disk_write_iops",
|
||||
"cluster_disk_read_throughput",
|
||||
"cluster_disk_write_throughput",
|
||||
"cluster_disk_size_usage",
|
||||
"cluster_disk_size_utilisation",
|
||||
"cluster_disk_size_capacity",
|
||||
"cluster_disk_size_available",
|
||||
"cluster_node_online",
|
||||
"cluster_node_offline",
|
||||
"cluster_node_total",
|
||||
|
||||
"node_cpu_utilisation",
|
||||
"node_cpu_total",
|
||||
"node_cpu_usage",
|
||||
"node_memory_utilisation",
|
||||
"node_memory_bytes_usage",
|
||||
"node_memory_bytes_available",
|
||||
"node_memory_bytes_total",
|
||||
"node_net_utilisation",
|
||||
"node_net_bytes_transmitted",
|
||||
"node_net_bytes_received",
|
||||
"node_disk_read_iops",
|
||||
"node_disk_write_iops",
|
||||
"node_disk_read_throughput",
|
||||
"node_disk_write_throughput",
|
||||
"node_disk_size_capacity",
|
||||
"node_disk_size_available",
|
||||
"node_disk_size_usage",
|
||||
"node_disk_size_utilisation",
|
||||
"node_pod_count",
|
||||
"node_pod_quota",
|
||||
|
||||
"namespace_cpu_usage",
|
||||
"namespace_memory_usage",
|
||||
"namespace_memory_usage_wo_cache",
|
||||
"namespace_net_bytes_transmitted",
|
||||
"namespace_net_bytes_received",
|
||||
"namespace_pod_count",
|
||||
|
||||
"pod_cpu_usage",
|
||||
"pod_memory_usage",
|
||||
"pod_memory_usage_wo_cache",
|
||||
"pod_net_bytes_transmitted",
|
||||
"pod_net_bytes_received",
|
||||
|
||||
"workload_pod_cpu_usage",
|
||||
"workload_pod_memory_usage",
|
||||
"workload_pod_memory_usage_wo_cache",
|
||||
"workload_pod_net_bytes_transmitted",
|
||||
"workload_pod_net_bytes_received",
|
||||
//"container_cpu_usage",
|
||||
//"container_memory_usage_wo_cache",
|
||||
//"container_memory_usage",
|
||||
|
||||
"workspace_cpu_usage",
|
||||
"workspace_memory_usage",
|
||||
"workspace_memory_usage_wo_cache",
|
||||
"workspace_net_bytes_transmitted",
|
||||
"workspace_net_bytes_received",
|
||||
"workspace_pod_count",
|
||||
}
|
||||
|
||||
var RulePromQLTmplMap = MetricMap{
|
||||
//cluster
|
||||
"cluster_cpu_utilisation": ":node_cpu_utilisation:avg1m",
|
||||
"cluster_cpu_usage": `sum (irate(container_cpu_usage_seconds_total{job="kubelet", image!=""}[5m]))`,
|
||||
"cluster_cpu_total": "sum(node:node_num_cpu:sum)",
|
||||
"cluster_memory_utilisation": ":node_memory_utilisation:",
|
||||
"cluster_pod_count": `count(kube_pod_info unless on(pod) kube_pod_completion_time unless on(node) kube_node_labels{label_role="log"})`,
|
||||
"cluster_memory_bytes_available": "sum(node:node_memory_bytes_available:sum)",
|
||||
"cluster_memory_bytes_total": "sum(node:node_memory_bytes_total:sum)",
|
||||
"cluster_memory_bytes_usage": "sum(node:node_memory_bytes_total:sum) - sum(node:node_memory_bytes_available:sum)",
|
||||
"cluster_net_utilisation": "sum(node:node_net_utilisation:sum_irate)",
|
||||
"cluster_net_bytes_transmitted": "sum(node:node_net_bytes_transmitted:sum_irate)",
|
||||
"cluster_net_bytes_received": "sum(node:node_net_bytes_received:sum_irate)",
|
||||
"cluster_disk_read_iops": "sum(node:data_volume_iops_reads:sum)",
|
||||
"cluster_disk_write_iops": "sum(node:data_volume_iops_writes:sum)",
|
||||
"cluster_disk_read_throughput": "sum(node:data_volume_throughput_bytes_read:sum)",
|
||||
"cluster_disk_write_throughput": "sum(node:data_volume_throughput_bytes_written:sum)",
|
||||
"cluster_disk_size_usage": `sum(sum by (node) ((node_filesystem_size{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:)) - sum(sum by (node) ((node_filesystem_avail{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:))`,
|
||||
"cluster_disk_size_utilisation": `(sum(sum by (node) ((node_filesystem_size{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:)) - sum(sum by (node) ((node_filesystem_avail{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:))) / sum(sum by (node) ((node_filesystem_size{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:))`,
|
||||
"cluster_disk_size_capacity": `sum(sum by (node) ((node_filesystem_size{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:))`,
|
||||
"cluster_disk_size_available": `sum(sum by (node) ((node_filesystem_avail{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:))`,
|
||||
|
||||
//node
|
||||
"node_cpu_utilisation": "node:node_cpu_utilisation:avg1m",
|
||||
"node_cpu_total": "node:node_num_cpu:sum",
|
||||
"node_memory_utilisation": "node:node_memory_utilisation:",
|
||||
"node_memory_bytes_available": "node:node_memory_bytes_available:sum",
|
||||
"node_memory_bytes_total": "node:node_memory_bytes_total:sum",
|
||||
// Node network utilisation (bytes received + bytes transmitted per second)
|
||||
"node_net_utilisation": "node:node_net_utilisation:sum_irate",
|
||||
// Node network bytes transmitted per second
|
||||
"node_net_bytes_transmitted": "node:node_net_bytes_transmitted:sum_irate",
|
||||
// Node network bytes received per second
|
||||
"node_net_bytes_received": "node:node_net_bytes_received:sum_irate",
|
||||
|
||||
// node:data_volume_iops_reads:sum{node=~"i-5xcldxos|i-6soe9zl1"}
|
||||
"node_disk_read_iops": "node:data_volume_iops_reads:sum",
|
||||
// node:data_volume_iops_writes:sum{node=~"i-5xcldxos|i-6soe9zl1"}
|
||||
"node_disk_write_iops": "node:data_volume_iops_writes:sum",
|
||||
// node:data_volume_throughput_bytes_read:sum{node=~"i-5xcldxos|i-6soe9zl1"}
|
||||
"node_disk_read_throughput": "node:data_volume_throughput_bytes_read:sum",
|
||||
// node:data_volume_throughput_bytes_written:sum{node=~"i-5xcldxos|i-6soe9zl1"}
|
||||
"node_disk_write_throughput": "node:data_volume_throughput_bytes_written:sum",
|
||||
|
||||
"node_disk_size_capacity": `sum by (node) ((node_filesystem_size{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1)`,
|
||||
"node_disk_size_available": `sum by (node) ((node_filesystem_avail{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1)`,
|
||||
"node_disk_size_usage": `sum by (node) ((node_filesystem_size{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1) -sum by (node) ((node_filesystem_avail{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1)`,
|
||||
"node_disk_size_utilisation": `sum by (node) (((node_filesystem_size{mountpoint="/", job="node-exporter"} - node_filesystem_avail{mountpoint="/", job="node-exporter"}) / node_filesystem_size{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1)`,
|
||||
"node_pod_count": `count(kube_pod_info$1 unless on(pod) kube_pod_completion_time) by (node)`,
|
||||
// without log node: unless on(node) kube_node_labels{label_role="log"}
|
||||
"node_pod_quota": `sum(kube_node_status_capacity_pods$1) by (node)`,
|
||||
"node_cpu_usage": `sum by (node) (label_join(irate(container_cpu_usage_seconds_total{job="kubelet", image!=""}[5m]), "pod", " ", "pod_name") * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1)`,
|
||||
"node_memory_bytes_usage": "node:node_memory_bytes_total:sum$1 - node:node_memory_bytes_available:sum$1",
|
||||
|
||||
//namespace
|
||||
"namespace_cpu_usage": `namespace:container_cpu_usage_seconds_total:sum_rate{namespace=~"$1"}`,
|
||||
"namespace_memory_usage": `namespace:container_memory_usage_bytes:sum{namespace=~"$1"}`,
|
||||
"namespace_memory_usage_wo_cache": `namespace:container_memory_usage_bytes_wo_cache:sum{namespace=~"$1"}`,
|
||||
"namespace_net_bytes_transmitted": `sum by (namespace) (irate(container_network_transmit_bytes_total{namespace=~"$1", pod_name!="", interface="eth0", job="kubelet"}[5m]))`,
|
||||
"namespace_net_bytes_received": `sum by (namespace) (irate(container_network_receive_bytes_total{namespace=~"$1", pod_name!="", interface="eth0", job="kubelet"}[5m]))`,
|
||||
"namespace_pod_count": `count(kube_pod_info{namespace=~"$1"} unless on(pod) kube_pod_completion_time) by (namespace)`,
|
||||
|
||||
// pod
|
||||
"pod_cpu_usage": `sum(irate(container_cpu_usage_seconds_total{job="kubelet", namespace="$1", pod_name="$2", image!=""}[5m])) by (namespace, pod_name)`,
|
||||
"pod_memory_usage": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name="$2", image!=""}) by (namespace, pod_name)`,
|
||||
"pod_memory_usage_wo_cache": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name="$2", image!=""} - container_memory_cache{job="kubelet", namespace="$1", pod_name="$2",image!=""}) by (namespace, pod_name)`,
|
||||
"pod_net_bytes_transmitted": `sum by (namespace, pod_name) (irate(container_network_transmit_bytes_total{namespace="$1", pod_name!="", pod_name="$2", interface="eth0", job="kubelet"}[5m]))`,
|
||||
"pod_net_bytes_received": `sum by (namespace, pod_name) (irate(container_network_receive_bytes_total{namespace="$1", pod_name!="", pod_name="$2", interface="eth0", job="kubelet"}[5m]))`,
|
||||
|
||||
"pod_cpu_usage_all": `sum(irate(container_cpu_usage_seconds_total{job="kubelet", namespace="$1", pod_name=~"$2", image!=""}[5m])) by (namespace, pod_name)`,
|
||||
"pod_memory_usage_all": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name=~"$2", image!=""}) by (namespace, pod_name)`,
|
||||
"pod_memory_usage_wo_cache_all": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name=~"$2", image!=""} - container_memory_cache{job="kubelet", namespace="$1", pod_name=~"$2", image!=""}) by (namespace, pod_name)`,
|
||||
"pod_net_bytes_transmitted_all": `sum by (namespace, pod_name) (irate(container_network_transmit_bytes_total{namespace="$1", pod_name!="", pod_name=~"$2", interface="eth0", job="kubelet"}[5m]))`,
|
||||
"pod_net_bytes_received_all": `sum by (namespace, pod_name) (irate(container_network_receive_bytes_total{namespace="$1", pod_name!="", pod_name=~"$2", interface="eth0", job="kubelet"}[5m]))`,
|
||||
|
||||
"pod_cpu_usage_node": `sum by (node, pod) (label_join(irate(container_cpu_usage_seconds_total{job="kubelet",pod_name=~"$2", image!=""}[5m]), "pod", " ", "pod_name") * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{node=~"$3"})`,
|
||||
"pod_memory_usage_node": `sum by (node, pod) (label_join(container_memory_usage_bytes{job="kubelet",pod_name=~"$2", image!=""}, "pod", " ", "pod_name") * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{node=~"$3"})`,
|
||||
"pod_memory_usage_wo_cache_node": `sum by (node, pod) ((label_join(container_memory_usage_bytes{job="kubelet",pod_name=~"$2", image!=""}, "pod", " ", "pod_name") - label_join(container_memory_cache{job="kubelet",pod_name=~"$2", image!=""}, "pod", " ", "pod_name")) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{node=~"$3"})`,
|
||||
|
||||
// container
|
||||
"container_cpu_usage": `sum(irate(container_cpu_usage_seconds_total{namespace="$1", pod_name="$2", container_name="$3"}[5m])) by (namespace, pod_name, container_name)`,
|
||||
"container_cpu_usage_all": `sum(irate(container_cpu_usage_seconds_total{namespace="$1", pod_name="$2", container_name=~"$3", container_name!="POD"}[5m])) by (namespace, pod_name, container_name)`,
|
||||
|
||||
"container_memory_usage_wo_cache": `container_memory_usage_bytes{namespace="$1", pod_name="$2", container_name="$3"} - ignoring(id, image, endpoint, instance, job, name, service) container_memory_cache{namespace="$1", pod_name="$2", container_name="$3"}`,
|
||||
"container_memory_usage_wo_cache_all": `container_memory_usage_bytes{namespace="$1", pod_name="$2", container_name=~"$3", container_name!="POD"} - ignoring(id, image, endpoint, instance, job, name, service) container_memory_cache{namespace="$1", pod_name="$2", container_name=~"$3", container_name!="POD"}`,
|
||||
"container_memory_usage": `container_memory_usage_bytes{namespace="$1", pod_name="$2", container_name="$3"}`,
|
||||
"container_memory_usage_all": `container_memory_usage_bytes{namespace="$1", pod_name="$2", container_name=~"$3", container_name!="POD"}`,
|
||||
|
||||
// enterprise
|
||||
"workspace_cpu_usage": `sum(namespace:container_cpu_usage_seconds_total:sum_rate{namespace =~"$1"})`,
|
||||
"workspace_memory_usage": `sum(namespace:container_memory_usage_bytes:sum{namespace =~"$1"})`,
|
||||
"workspace_memory_usage_wo_cache": `sum(namespace:container_memory_usage_bytes_wo_cache:sum{namespace =~"$1"})`,
|
||||
"workspace_net_bytes_transmitted": `sum(sum by (namespace) (irate(container_network_transmit_bytes_total{namespace=~"$1", pod_name!="", interface="eth0", job="kubelet"}[5m])))`,
|
||||
"workspace_net_bytes_received": `sum(sum by (namespace) (irate(container_network_receive_bytes_total{namespace=~"$1", pod_name!="", interface="eth0", job="kubelet"}[5m])))`,
|
||||
"workspace_pod_count": `sum(count(kube_pod_info{namespace=~"$1"} unless on(pod) kube_pod_completion_time) by (namespace))`,
|
||||
}
|
||||
@@ -67,6 +67,12 @@ func MakeWorkloadRule(request *restful.Request) string {
|
||||
return rule
|
||||
}
|
||||
|
||||
func MakeWorkspacePromQL(metricsName string, namespaceRe2 string) string {
|
||||
promql := RulePromQLTmplMap[metricsName]
|
||||
promql = strings.Replace(promql, "$1", namespaceRe2, -1)
|
||||
return promql
|
||||
}
|
||||
|
||||
func MakeContainerPromQL(request *restful.Request) string {
|
||||
nsName := strings.Trim(request.PathParameter("ns_name"), " ")
|
||||
poName := strings.Trim(request.PathParameter("pod_name"), " ")
|
||||
@@ -169,9 +175,8 @@ func MakeNodeorClusterRule(request *restful.Request, metricsName string) string
|
||||
if nodesFilter == "" {
|
||||
nodesFilter = ".*"
|
||||
}
|
||||
if strings.Contains(metricsName, "disk") && (!(strings.Contains(metricsName, "read") || strings.Contains(metricsName, "write"))) {
|
||||
if strings.Contains(metricsName, "disk_size") || strings.Contains(metricsName, "pod") || strings.Contains(metricsName, "usage") {
|
||||
// disk size promql
|
||||
nodesFilter := ""
|
||||
if nodeID != "" {
|
||||
nodesFilter = "{" + "node" + "=" + "\"" + nodeID + "\"" + "}"
|
||||
} else {
|
||||
Reference in New Issue
Block a user