add cluster level multiple metrics in dashboard

This commit is contained in:
Carman Zhang
2018-10-12 18:35:44 +08:00
parent 5a51bb68af
commit c65ecddbef
8 changed files with 714 additions and 340 deletions

View File

@@ -46,6 +46,19 @@ func (u MonitorResource) monitorContainer(request *restful.Request, response *re
}
func (u MonitorResource) monitorWorkload(request *restful.Request, response *restful.Response) {
wlKind := request.PathParameter("workload_kind")
if strings.Trim(wlKind, " ") == "" {
// count all workloads figure
//metricName := "workload_count"
res := metrics.MonitorWorkloadCount(request)
response.WriteAsJson(res)
} else {
res := metrics.MonitorAllMetrics(request)
response.WriteAsJson(res)
}
}
func (u MonitorResource) monitorWorkspacePodLevelMetrics(request *restful.Request, response *restful.Response) {
res := metrics.MonitorAllMetrics(request)
response.WriteAsJson(res)
}
@@ -196,10 +209,18 @@ func Register(ws *restful.WebService, subPath string) {
Doc("monitor specific workload level metrics").
Param(ws.PathParameter("ns_name", "namespace").DataType("string").Required(true).DefaultValue("kube-system")).
Param(ws.QueryParameter("metrics_filter", "metrics name cpu memory...").DataType("string").Required(false)).
Param(ws.PathParameter("workload_kind", "workload kind").DataType("string").Required(true).DefaultValue("daemonset")).
Param(ws.PathParameter("workload_kind", "workload kind").DataType("string").Required(false).DefaultValue("daemonset")).
Param(ws.QueryParameter("workload_name", "workload name").DataType("string").Required(true).DefaultValue("")).
Metadata(restfulspec.KeyOpenAPITags, tags)).
Consumes(restful.MIME_JSON, restful.MIME_XML).
Produces(restful.MIME_JSON)
ws.Route(ws.GET(subPath+"/namespaces/{ns_name}/workloads").To(u.monitorWorkload).
Filter(route.RouteLogging).
Doc("monitor all workload level metrics").
Param(ws.PathParameter("ns_name", "namespace").DataType("string").Required(true).DefaultValue("kube-system")).
Param(ws.QueryParameter("metrics_filter", "metrics name cpu memory...").DataType("string").Required(false)).
Metadata(restfulspec.KeyOpenAPITags, tags)).
Consumes(restful.MIME_JSON, restful.MIME_XML).
Produces(restful.MIME_JSON)
}

View File

@@ -17,6 +17,9 @@ import (
"net/http"
"net/url"
"strconv"
"time"
"github.com/emicklei/go-restful"
"github.com/golang/glog"
"github.com/pkg/errors"
@@ -28,6 +31,8 @@ const (
DefaultPrometheusPort = "9090"
PrometheusApiPath = "/api/v1/"
PrometheusEndpointUrl = DefaultScheme + "://" + DefaultPrometheusService + ":" + DefaultPrometheusPort + PrometheusApiPath
DefaultQueryStep = "10m"
DefaultQueryTimeout = "30s"
)
var client = &http.Client{}
@@ -79,14 +84,18 @@ func ParseRequestHeader(request *restful.Request) (url.Values, bool, error) {
end := request.QueryParameter("end")
step := request.QueryParameter("step")
timeout := request.QueryParameter("timeout")
if timeout == "" {
timeout = "30s"
timeout = DefaultQueryTimeout
}
if step == "" {
step = DefaultQueryStep
}
// Whether query or query_range request
u := url.Values{}
if start != "" && end != "" && step != "" {
u.Set("start", start)
u.Set("end", end)
if start != "" && end != "" {
u.Set("start", convertTimeGranularity(start))
u.Set("end", convertTimeGranularity(end))
u.Set("step", step)
u.Set("timeout", timeout)
return u, true, nil
@@ -101,6 +110,18 @@ func ParseRequestHeader(request *restful.Request) (url.Values, bool, error) {
return u, false, nil
}
glog.Error("Parse request failed", u)
return u, false, errors.Errorf("Parse request failed")
glog.Errorln("Parse request %s failed", u)
return u, false, errors.Errorf("Parse request time range %s failed", u)
}
func convertTimeGranularity(ts string) string {
timeFloat, err := strconv.ParseFloat(ts, 64)
if err != nil {
glog.Errorf("convert second timestamp %s to minute timestamp failed", ts)
return strconv.FormatInt(int64(time.Now().Unix()), 10)
}
timeInt := int64(timeFloat)
// convert second timestamp to minute timestamp
secondTime := time.Unix(timeInt, 0).Truncate(time.Minute).Unix()
return strconv.FormatInt(secondTime, 10)
}

View File

@@ -1,184 +0,0 @@
/*
Copyright 2018 The KubeSphere Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"encoding/json"
"regexp"
"strings"
"github.com/emicklei/go-restful"
"github.com/golang/glog"
"kubesphere.io/kubesphere/pkg/client"
)
func getPodNameRegexInWorkload(request *restful.Request) string {
promql := MakeWorkloadRule(request)
res := client.SendPrometheusRequest(request, promql)
data := []byte(res)
var dat CommonMetricsResult
jsonErr := json.Unmarshal(data, &dat)
if jsonErr != nil {
glog.Errorln("json parse failed", jsonErr)
}
var podNames []string
for _, x := range dat.Data.Result {
podName := x.KubePodMetric.Pod
podNames = append(podNames, podName)
}
podNamesFilter := "^(" + strings.Join(podNames, "|") + ")$"
return podNamesFilter
}
func MonitorWorkloadSingleMetric(request *restful.Request, metricsName string) *FormatedMetric {
nsName := strings.Trim(request.PathParameter("ns_name"), " ")
podNamesFilter := getPodNameRegexInWorkload(request)
newPromql := MakePodPromQL(request, []string{metricsName, nsName, "", "", podNamesFilter})
podMetrics := client.SendPrometheusRequest(request, newPromql)
cleanedJson := ReformatJson(podMetrics, metricsName)
return cleanedJson
}
func MonitorPodSingleMetric(request *restful.Request, metricsName string) *FormatedMetric {
nsName := strings.Trim(request.PathParameter("ns_name"), " ")
nodeID := strings.Trim(request.PathParameter("node_id"), " ")
podName := strings.Trim(request.PathParameter("pod_name"), " ")
podFilter := strings.Trim(request.QueryParameter("pods_filter"), " ")
params := []string{metricsName, nsName, nodeID, podName, podFilter}
promql := MakePodPromQL(request, params)
if promql != "" {
res := client.SendPrometheusRequest(request, promql)
cleanedJson := ReformatJson(res, metricsName)
return cleanedJson
}
return nil
}
func MonitorNamespaceSingleMetric(request *restful.Request, metricsName string) *FormatedMetric {
recordingRule := MakeNamespacePromQL(request, metricsName)
res := client.SendPrometheusRequest(request, recordingRule)
cleanedJson := ReformatJson(res, metricsName)
return cleanedJson
}
func ReformatJson(metric string, metricsName string) *FormatedMetric {
var formatMetric FormatedMetric
err := json.Unmarshal([]byte(metric), &formatMetric)
if err != nil {
glog.Errorln("Unmarshal metric json failed", err)
}
if formatMetric.MetricName == "" {
formatMetric.MetricName = metricsName
}
// retrive metrics success
if formatMetric.Status == "success" {
result := formatMetric.Data.Result
for _, res := range result {
metric, ok := res["metric"]
me := metric.(map[string]interface{})
if ok {
delete(me, "__name__")
}
}
}
return &formatMetric
}
func collectNodeorClusterMetrics(request *restful.Request, metricsName string, ch chan<- *FormatedMetric) {
metric := MonitorNodeorClusterSingleMetric(request, metricsName)
ch <- metric
}
func collectNamespaceMetrics(request *restful.Request, metricsName string, ch chan<- *FormatedMetric) {
metric := MonitorNamespaceSingleMetric(request, metricsName)
ch <- metric
}
func collectWorkloadMetrics(request *restful.Request, metricsName string, ch chan<- *FormatedMetric) {
metricsName = strings.TrimLeft(metricsName, "workload_")
metric := MonitorWorkloadSingleMetric(request, metricsName)
ch <- metric
}
func collectPodMetrics(request *restful.Request, metricsName string, ch chan<- *FormatedMetric) {
metric := MonitorPodSingleMetric(request, metricsName)
ch <- metric
}
func MonitorAllMetrics(request *restful.Request) FormatedLevelMetric {
metricsName := strings.Trim(request.QueryParameter("metrics_filter"), " ")
if metricsName == "" {
metricsName = ".*"
}
path := request.SelectedRoutePath()
sourceType := path[strings.LastIndex(path, "/")+1 : len(path)-1]
if strings.Contains(path, "workload") {
sourceType = "workload"
}
var ch = make(chan *FormatedMetric, 10)
for _, k := range MetricsNames {
bol, err := regexp.MatchString(metricsName, k)
if !bol {
continue
}
if err != nil {
glog.Errorln("regex match failed", err)
continue
}
if strings.HasPrefix(k, sourceType) {
if sourceType == "node" || sourceType == "cluster" {
go collectNodeorClusterMetrics(request, k, ch)
} else if sourceType == "namespace" {
go collectNamespaceMetrics(request, k, ch)
} else if sourceType == "pod" {
go collectPodMetrics(request, k, ch)
} else if sourceType == "workload" {
go collectWorkloadMetrics(request, k, ch)
}
}
}
var metricsArray []FormatedMetric
var tempJson *FormatedMetric
for _, k := range MetricsNames {
bol, err := regexp.MatchString(metricsName, k)
if !bol {
continue
}
if err != nil {
glog.Errorln("regex match failed")
continue
}
if strings.HasPrefix(k, sourceType) {
tempJson = <-ch
if tempJson != nil {
metricsArray = append(metricsArray, *tempJson)
}
}
}
return FormatedLevelMetric{
MetricsLevel: sourceType,
Results: metricsArray,
}
}
func MonitorNodeorClusterSingleMetric(request *restful.Request, metricsName string) *FormatedMetric {
recordingRule := MakeNodeorClusterRule(request, metricsName)
res := client.SendPrometheusRequest(request, recordingRule)
cleanedJson := ReformatJson(res, metricsName)
return cleanedJson
}

View File

@@ -1,147 +0,0 @@
/*
Copyright 2018 The KubeSphere Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
type MetricMap map[string]string
var MetricsNames = []string{
"cluster_cpu_utilisation",
"cluster_memory_utilisation",
"cluster_net_utilisation",
"cluster_pod_count",
"node_cpu_utilisation",
"node_memory_utilisation",
"node_memory_available",
"node_memory_total",
"node_net_utilisation",
"node_net_bytes_transmitted",
"node_net_bytes_received",
"node_disk_read_iops",
"node_disk_write_iops",
"node_disk_read_throughput",
"node_disk_write_throughput",
"node_disk_capacity",
"node_disk_available",
"node_disk_utilization",
"namespace_cpu_utilisation",
"namespace_memory_utilisation",
"namespace_memory_utilisation_wo_cache",
"namespace_net_bytes_transmitted",
"namespace_net_bytes_received",
"namespace_pod_count",
"pod_cpu_utilisation",
"pod_memory_utilisation",
"pod_memory_utilisation_wo_cache",
"pod_net_bytes_transmitted",
"pod_net_bytes_received",
"workload_pod_cpu_utilisation",
"workload_pod_memory_utilisation",
"workload_pod_memory_utilisation_wo_cache",
"workload_pod_net_bytes_transmitted",
"workload_pod_net_bytes_received",
//"container_cpu_utilisation",
//"container_memory_utilisation_wo_cache",
//"container_memory_utilisation",
"tenant_cpu_utilisation",
"tenant_memory_utilisation",
"tenant_memory_utilisation_wo_cache",
"tenant_net_bytes_transmitted",
"tenant_net_bytes_received",
"tenant_pod_count",
}
var RulePromQLTmplMap = MetricMap{
//cluster
"cluster_cpu_utilisation": ":node_cpu_utilisation:avg1m",
"cluster_memory_utilisation": ":node_memory_utilisation:",
// Cluster network utilisation (bytes received + bytes transmitted per second)
"cluster_net_utilisation": ":node_net_utilisation:sum_irate",
"cluster_pod_count": `count(kube_pod_info{job="kube-state-metrics"})`,
//node
"node_cpu_utilisation": "node:node_cpu_utilisation:avg1m",
"node_memory_utilisation": "node:node_memory_utilisation:",
"node_memory_available": "node:node_memory_bytes_available:sum",
"node_memory_total": "node:node_memory_bytes_total:sum",
// Node network utilisation (bytes received + bytes transmitted per second)
"node_net_utilisation": "node:node_net_utilisation:sum_irate",
// Node network bytes transmitted per second
"node_net_bytes_transmitted": "node:node_net_bytes_transmitted:sum_irate",
// Node network bytes received per second
"node_net_bytes_received": "node:node_net_bytes_received:sum_irate",
// node:data_volume_iops_reads:sum{node=~"i-5xcldxos|i-6soe9zl1"}
"node_disk_read_iops": "node:data_volume_iops_reads:sum",
// node:data_volume_iops_writes:sum{node=~"i-5xcldxos|i-6soe9zl1"}
"node_disk_write_iops": "node:data_volume_iops_writes:sum",
// node:data_volume_throughput_bytes_read:sum{node=~"i-5xcldxos|i-6soe9zl1"}
"node_disk_read_throughput": "node:data_volume_throughput_bytes_read:sum",
// node:data_volume_throughput_bytes_written:sum{node=~"i-5xcldxos|i-6soe9zl1"}
"node_disk_write_throughput": "node:data_volume_throughput_bytes_written:sum",
"node_disk_capacity": `sum by (node) ((node_filesystem_avail{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1)`,
"node_disk_available": `sum by (node) ((node_filesystem_avail{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1)`,
"node_disk_utilization": `sum by (node) (((node_filesystem_size{mountpoint="/", job="node-exporter"} - node_filesystem_avail{mountpoint="/", job="node-exporter"}) / node_filesystem_size{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1)`,
//namespace
"namespace_cpu_utilisation": `namespace:container_cpu_usage_seconds_total:sum_rate{namespace=~"$1"}`,
"namespace_memory_utilisation": `namespace:container_memory_usage_bytes:sum{namespace=~"$1"}`,
"namespace_memory_utilisation_wo_cache": `namespace:container_memory_usage_bytes_wo_cache:sum{namespace=~"$1"}`,
"namespace_net_bytes_transmitted": `sum by (namespace) (irate(container_network_transmit_bytes_total{namespace=~"$1", pod_name!="", interface="eth0", job="kubelet"}[2m]))`,
"namespace_net_bytes_received": `sum by (namespace) (irate(container_network_receive_bytes_total{namespace=~"$1", pod_name!="", interface="eth0", job="kubelet"}[2m]))`,
// count(kube_pod_info) by (namespace) namespace=~"monitoring|default|kube-system"
"namespace_pod_count": `count(kube_pod_info{job="kube-state-metrics", namespace=~"$1"}) by (namespace)`,
// pod
"pod_cpu_utilisation": `sum(irate(container_cpu_usage_seconds_total{job="kubelet", namespace="$1", pod_name="$2", image!=""}[5m])) by (namespace, pod_name)`,
"pod_memory_utilisation": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name="$2", image!=""}) by (namespace, pod_name)`,
"pod_memory_utilisation_wo_cache": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name="$2", image!=""} - container_memory_cache{job="kubelet", namespace="$1", pod_name="$2",image!=""}) by (namespace, pod_name)`,
"pod_net_bytes_transmitted": `sum by (namespace, pod_name) (irate(container_network_transmit_bytes_total{namespace="$1", pod_name!="", pod_name="$2", interface="eth0", job="kubelet"}[2m]))`,
"pod_net_bytes_received": `sum by (namespace, pod_name) (irate(container_network_receive_bytes_total{namespace="$1", pod_name!="", pod_name="$2", interface="eth0", job="kubelet"}[2m]))`,
"pod_cpu_utilisation_all": `sum(irate(container_cpu_usage_seconds_total{job="kubelet", namespace="$1", pod_name=~"$2", image!=""}[5m])) by (namespace, pod_name)`,
"pod_memory_utilisation_all": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name=~"$2", image!=""}) by (namespace, pod_name)`,
"pod_memory_utilisation_wo_cache_all": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name=~"$2", image!=""} - container_memory_cache{job="kubelet", namespace="$1", pod_name=~"$2", image!=""}) by (namespace, pod_name)`,
"pod_net_bytes_transmitted_all": `sum by (namespace, pod_name) (irate(container_network_transmit_bytes_total{namespace="$1", pod_name!="", pod_name=~"$2", interface="eth0", job="kubelet"}[2m]))`,
"pod_net_bytes_received_all": `sum by (namespace, pod_name) (irate(container_network_receive_bytes_total{namespace="$1", pod_name!="", pod_name=~"$2", interface="eth0", job="kubelet"}[2m]))`,
//"pod_cpu_utilisation_node": `sum by (node, pod) (label_join(irate(container_cpu_usage_seconds_total{job="kubelet", image!=""}[5m]), "pod", " ", "pod_name") * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{node=~"$3"})`,
"pod_cpu_utilisation_node": `sum by (node, pod) (label_join(irate(container_cpu_usage_seconds_total{job="kubelet",pod_name=~"$2", image!=""}[5m]), "pod", " ", "pod_name") * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{node=~"$3"})`,
"pod_memory_utilisation_node": `sum by (node, pod) (label_join(container_memory_usage_bytes{job="kubelet",pod_name=~"$2", image!=""}, "pod", " ", "pod_name") * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{node=~"$3"})`,
"pod_memory_utilisation_wo_cache_node": `sum by (node, pod) ((label_join(container_memory_usage_bytes{job="kubelet",pod_name=~"$2", image!=""}, "pod", " ", "pod_name") - label_join(container_memory_cache{job="kubelet",pod_name=~"$2", image!=""}, "pod", " ", "pod_name")) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{node=~"$3"})`,
// container
"container_cpu_utilisation": `sum(irate(container_cpu_usage_seconds_total{namespace="$1", pod_name="$2", container_name="$3"}[5m])) by (namespace, pod_name, container_name)`,
//"container_cpu_utilisation_wo_podname": `sum(irate(container_cpu_usage_seconds_total{namespace="$1", container_name=~"$3"}[5m])) by (namespace, pod_name, container_name)`,
"container_cpu_utilisation_all": `sum(irate(container_cpu_usage_seconds_total{namespace="$1", pod_name="$2", container_name=~"$3", container_name!="POD"}[5m])) by (namespace, pod_name, container_name)`,
//"container_cpu_utilisation_all_wo_podname": `sum(irate(container_cpu_usage_seconds_total{namespace="$1", container_name!="POD"}[5m])) by (namespace, pod_name, container_name)`,
"container_memory_utilisation_wo_cache": `container_memory_usage_bytes{namespace="$1", pod_name="$2", container_name="$3"} - ignoring(id, image, endpoint, instance, job, name, service) container_memory_cache{namespace="$1", pod_name="$2", container_name="$3"}`,
"container_memory_utilisation_wo_cache_all": `container_memory_usage_bytes{namespace="$1", pod_name="$2", container_name=~"$3", container_name!="POD"} - ignoring(id, image, endpoint, instance, job, name, service) container_memory_cache{namespace="$1", pod_name="$2", container_name=~"$3", container_name!="POD"}`,
"container_memory_utilisation": `container_memory_usage_bytes{namespace="$1", pod_name="$2", container_name="$3"}`,
"container_memory_utilisation_all": `container_memory_usage_bytes{namespace="$1", pod_name="$2", container_name=~"$3", container_name!="POD"}`,
// tenant
"tenant_cpu_utilisation": `sum(namespace:container_cpu_usage_seconds_total:sum_rate{namespace =~"$1"})`,
"tenant_memory_utilisation": `sum(namespace:container_memory_usage_bytes:sum{namespace =~"$1"})`,
"tenant_memory_utilisation_wo_cache": `sum(namespace:container_memory_usage_bytes_wo_cache:sum{namespace =~"$1"})`,
"tenant_net_bytes_transmitted": `sum(sum by (namespace) (irate(container_network_transmit_bytes_total{namespace=~"$1", pod_name!="", interface="eth0", job="kubelet"}[2m])))`,
"tenant_net_bytes_received": `sum(sum by (namespace) (irate(container_network_receive_bytes_total{namespace=~"$1", pod_name!="", interface="eth0", job="kubelet"}[2m])))`,
"tenant_pod_count": `sum(count(kube_pod_info{job="kube-state-metrics", namespace=~"$1"}) by (namespace))`,
}

View File

@@ -0,0 +1,422 @@
/*
Copyright 2018 The KubeSphere Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"encoding/json"
"regexp"
"strings"
"github.com/emicklei/go-restful"
"github.com/golang/glog"
"time"
"k8s.io/api/core/v1"
metaV1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"kubesphere.io/kubesphere/pkg/client"
"kubesphere.io/kubesphere/pkg/models"
)
func getPodNameRegexInWorkload(request *restful.Request) string {
promql := MakeWorkloadRule(request)
res := client.SendPrometheusRequest(request, promql)
data := []byte(res)
var dat CommonMetricsResult
jsonErr := json.Unmarshal(data, &dat)
if jsonErr != nil {
glog.Errorln("json parse failed", jsonErr)
}
var podNames []string
for _, x := range dat.Data.Result {
podName := x.KubePodMetric.Pod
podNames = append(podNames, podName)
}
podNamesFilter := "^(" + strings.Join(podNames, "|") + ")$"
return podNamesFilter
}
func MonitorWorkloadSingleMetric(request *restful.Request, metricsName string) *FormatedMetric {
nsName := strings.Trim(request.PathParameter("ns_name"), " ")
podNamesFilter := getPodNameRegexInWorkload(request)
newPromql := MakePodPromQL(request, []string{metricsName, nsName, "", "", podNamesFilter})
podMetrics := client.SendPrometheusRequest(request, newPromql)
cleanedJson := ReformatJson(podMetrics, metricsName)
return cleanedJson
}
func MonitorPodSingleMetric(request *restful.Request, metricsName string) *FormatedMetric {
nsName := strings.Trim(request.PathParameter("ns_name"), " ")
nodeID := strings.Trim(request.PathParameter("node_id"), " ")
podName := strings.Trim(request.PathParameter("pod_name"), " ")
podFilter := strings.Trim(request.QueryParameter("pods_filter"), " ")
params := []string{metricsName, nsName, nodeID, podName, podFilter}
promql := MakePodPromQL(request, params)
if promql != "" {
res := client.SendPrometheusRequest(request, promql)
cleanedJson := ReformatJson(res, metricsName)
return cleanedJson
}
return nil
}
func MonitorNamespaceSingleMetric(request *restful.Request, metricsName string) *FormatedMetric {
recordingRule := MakeNamespacePromQL(request, metricsName)
res := client.SendPrometheusRequest(request, recordingRule)
cleanedJson := ReformatJson(res, metricsName)
return cleanedJson
}
// maybe this function is time consuming
func ReformatJson(metric string, metricsName string) *FormatedMetric {
var formatMetric FormatedMetric
err := json.Unmarshal([]byte(metric), &formatMetric)
if err != nil {
glog.Errorln("Unmarshal metric json failed", err)
}
if formatMetric.MetricName == "" {
formatMetric.MetricName = metricsName
}
// retrive metrics success
if formatMetric.Status == MetricStatusSuccess {
result := formatMetric.Data.Result
for _, res := range result {
metric, ok := res[ResultItemMetric]
me := metric.(map[string]interface{})
if ok {
delete(me, "__name__")
}
}
}
return &formatMetric
}
func collectNodeorClusterMetrics(request *restful.Request, metricsName string, ch chan<- *FormatedMetric) {
metric := MonitorNodeorClusterSingleMetric(request, metricsName)
ch <- metric
}
func collectNamespaceMetrics(request *restful.Request, metricsName string, ch chan<- *FormatedMetric) {
metric := MonitorNamespaceSingleMetric(request, metricsName)
ch <- metric
}
func collectWorkloadMetrics(request *restful.Request, metricsName string, ch chan<- *FormatedMetric) {
metricsName = strings.TrimLeft(metricsName, "workload_")
metric := MonitorWorkloadSingleMetric(request, metricsName)
ch <- metric
}
func collectWorkspaceMetrics(request *restful.Request, metricsName string, namespaceList []string, ch chan<- *FormatedMetric) {
mertic := monitorWorkspaceSingleMertic(request, metricsName, namespaceList)
ch <- mertic
}
func collectPodMetrics(request *restful.Request, metricsName string, ch chan<- *FormatedMetric) {
metric := MonitorPodSingleMetric(request, metricsName)
ch <- metric
}
func monitorWorkspaceSingleMertic(request *restful.Request, metricsName string, namespaceList []string) *FormatedMetric {
namespaceRe2 := "^(" + strings.Join(namespaceList, "|") + ")$"
newpromql := MakeWorkspacePromQL(metricsName, namespaceRe2)
podMetrics := client.SendPrometheusRequest(request, newpromql)
cleanedJson := ReformatJson(podMetrics, metricsName)
return cleanedJson
}
func filterNamespace(request *restful.Request, namespaceList []string) []string {
var newNSlist []string
nsFilter := strings.Trim(request.QueryParameter("namespaces_filter"), " ")
if nsFilter == "" {
nsFilter = ".*"
}
for _, ns := range namespaceList {
bol, _ := regexp.MatchString(nsFilter, ns)
if bol {
newNSlist = append(newNSlist, ns)
}
}
return newNSlist
}
func MonitorAllMetrics(request *restful.Request) FormatedLevelMetric {
metricsName := strings.Trim(request.QueryParameter("metrics_filter"), " ")
if metricsName == "" {
metricsName = ".*"
}
path := request.SelectedRoutePath()
sourceType := path[strings.LastIndex(path, "/")+1 : len(path)-1]
if strings.Contains(path, MetricLevelWorkload) {
sourceType = MetricLevelWorkload
} else if strings.Contains(path, MetricLevelWorkspace) {
sourceType = MetricLevelWorkspace
}
var ch = make(chan *FormatedMetric, 10)
for _, metricName := range MetricsNames {
bol, err := regexp.MatchString(metricsName, metricName)
if !bol {
continue
}
if err != nil {
glog.Errorln("regex match failed", err)
continue
}
if strings.HasPrefix(metricName, sourceType) {
if sourceType == MetricLevelCluster || sourceType == MetricLevelNode {
go collectNodeorClusterMetrics(request, metricName, ch)
} else if sourceType == MetricLevelNamespace {
go collectNamespaceMetrics(request, metricName, ch)
} else if sourceType == MetricLevelPod {
go collectPodMetrics(request, metricName, ch)
} else if sourceType == MetricLevelWorkload {
go collectWorkloadMetrics(request, metricName, ch)
}
}
}
var metricsArray []FormatedMetric
var tempJson *FormatedMetric
for _, k := range MetricsNames {
bol, err := regexp.MatchString(metricsName, k)
if !bol {
continue
}
if err != nil {
glog.Errorln("regex match failed")
continue
}
if strings.HasPrefix(k, sourceType) {
tempJson = <-ch
if tempJson != nil {
metricsArray = append(metricsArray, *tempJson)
}
}
}
return FormatedLevelMetric{
MetricsLevel: sourceType,
Results: metricsArray,
}
}
func getWorkspacePodsCountMetrics(request *restful.Request, namespaces []string) *FormatedMetric {
metricName := MetricNameNamespacePodCount
var recordingRule = RulePromQLTmplMap[metricName]
nsFilter := "^(" + strings.Join(namespaces, "|") + ")$"
recordingRule = strings.Replace(recordingRule, "$1", nsFilter, -1)
res := client.SendPrometheusRequest(request, recordingRule)
cleanedJson := ReformatJson(res, metricName)
return cleanedJson
}
func getWorkspaceWorkloadCountMetrics(namespaces []string) FormatedMetric {
var wlQuotaMetrics models.ResourceQuota
wlQuotaMetrics.NameSpace = strings.Join(namespaces, "|")
wlQuotaMetrics.Data.Used = make(v1.ResourceList, 1)
wlQuotaMetrics.Data.Hard = make(v1.ResourceList, 1)
for _, ns := range namespaces {
quotaMetric, err := models.GetNamespaceQuota(ns)
if err != nil {
glog.Errorln(err)
continue
}
// sum all resources used along namespaces
quotaUsed := quotaMetric.Data.Used
for resourceName, quantity := range quotaUsed {
if _, ok := wlQuotaMetrics.Data.Used[resourceName]; ok {
tmpQuantity := wlQuotaMetrics.Data.Used[v1.ResourceName(resourceName)]
tmpQuantity.Add(quantity)
wlQuotaMetrics.Data.Used[v1.ResourceName(resourceName)] = tmpQuantity
} else {
wlQuotaMetrics.Data.Used[v1.ResourceName(resourceName)] = quantity.DeepCopy()
}
}
// sum all resources hard along namespaces
quotaHard := quotaMetric.Data.Hard
for resourceName, quantity := range quotaHard {
if _, ok := wlQuotaMetrics.Data.Hard[resourceName]; ok {
tmpQuantity := wlQuotaMetrics.Data.Hard[v1.ResourceName(resourceName)]
tmpQuantity.Add(quantity)
wlQuotaMetrics.Data.Hard[v1.ResourceName(resourceName)] = tmpQuantity
} else {
wlQuotaMetrics.Data.Hard[v1.ResourceName(resourceName)] = quantity.DeepCopy()
}
}
}
wlMetrics := convertQuota2MetricStruct(&wlQuotaMetrics)
return wlMetrics
}
func getSpecificMetricItem(timestamp int64, metricName string, kind string, count int, err error) FormatedMetric {
var nsMetrics FormatedMetric
nsMetrics.MetricName = metricName
nsMetrics.Data.ResultType = ResultTypeVector
resultItem := make(map[string]interface{})
tmp := make(map[string]string)
tmp[ResultItemMetricResource] = kind
if err == nil {
nsMetrics.Status = MetricStatusSuccess
} else {
nsMetrics.Status = MetricStatusError
resultItem["errorinfo"] = err.Error()
}
resultItem[ResultItemMetric] = tmp
resultItem[ResultItemValue] = []interface{}{timestamp, count}
nsMetrics.Data.Result = make([]map[string]interface{}, 1)
nsMetrics.Data.Result[0] = resultItem
return nsMetrics
}
func MonitorNodeorClusterSingleMetric(request *restful.Request, metricsName string) *FormatedMetric {
// support cluster node statistic, include healthy nodes and unhealthy nodes
var res string
var fMetric FormatedMetric
timestamp := int64(time.Now().Unix())
if metricsName == MetricNameClusterHealthyNodeCount {
onlineNodes, _ := getNodeHealthyConditionMetric()
fMetric = getSpecificMetricItem(timestamp, MetricNameClusterHealthyNodeCount, "node_count", len(onlineNodes), nil)
} else if metricsName == MetricNameClusterUnhealthyNodeCount {
_, offlineNodes := getNodeHealthyConditionMetric()
fMetric = getSpecificMetricItem(timestamp, MetricNameClusterUnhealthyNodeCount, "node_count", len(offlineNodes), nil)
} else if metricsName == MetricNameClusterNodeCount {
onlineNodes, offlineNodes := getNodeHealthyConditionMetric()
fMetric = getSpecificMetricItem(timestamp, MetricNameClusterNodeCount, "node_count", len(onlineNodes)+len(offlineNodes), nil)
} else {
recordingRule := MakeNodeorClusterRule(request, metricsName)
res = client.SendPrometheusRequest(request, recordingRule)
fMetric = *ReformatJson(res, metricsName)
}
return &fMetric
}
func getNodeHealthyConditionMetric() ([]string, []string) {
nodeList, err := client.NewK8sClient().CoreV1().Nodes().List(metaV1.ListOptions{})
if err != nil {
glog.Errorln(err)
return nil, nil
}
var onlineNodes []string
var offlineNodes []string
for _, node := range nodeList.Items {
nodeName := node.Labels["kubernetes.io/hostname"]
nodeRole := node.Labels["role"]
bol := true
for _, cond := range node.Status.Conditions {
if cond.Type == "Ready" && cond.Status == "Unknown" {
bol = false
break
}
}
if nodeRole != "log" {
if bol {
// reachable node
onlineNodes = append(onlineNodes, nodeName)
} else {
// unreachable node
offlineNodes = append(offlineNodes, nodeName)
}
}
}
return onlineNodes, offlineNodes
}
func getExistingNamespace(namespaces []string) ([]string, []string) {
namespaceMap, err := getAllNamespace()
var existedNs []string
var noneExistedNs []string
if err != nil {
return namespaces, nil
}
for _, ns := range namespaces {
if _, ok := namespaceMap[ns]; ok {
existedNs = append(existedNs, ns)
} else {
noneExistedNs = append(noneExistedNs, ns)
}
}
return existedNs, noneExistedNs
}
func getAllNamespace() (map[string]int, error) {
k8sClient := client.NewK8sClient()
nsList, err := k8sClient.CoreV1().Namespaces().List(metaV1.ListOptions{})
if err != nil {
glog.Errorln(err)
return nil, err
}
namespaceMap := make(map[string]int)
for _, item := range nsList.Items {
namespaceMap[item.Name] = 0
}
return namespaceMap, nil
}
func MonitorWorkloadCount(request *restful.Request) FormatedMetric {
namespace := strings.Trim(request.PathParameter("ns_name"), " ")
quotaMetric, err := models.GetNamespaceQuota(namespace)
fMetric := convertQuota2MetricStruct(quotaMetric)
// whether the namespace in request parameters exists?
namespaceMap, e := getAllNamespace()
_, ok := namespaceMap[namespace]
if e != nil {
ok = true
}
if !ok || err != nil {
fMetric.Status = MetricStatusError
fMetric.Data.ResultType = ""
errInfo := make(map[string]interface{})
if err != nil {
errInfo["errormsg"] = err.Error()
} else {
errInfo["errormsg"] = "namespace " + namespace + " does not exist"
}
fMetric.Data.Result = []map[string]interface{}{errInfo}
}
return fMetric
}
func convertQuota2MetricStruct(quotaMetric *models.ResourceQuota) FormatedMetric {
var fMetric FormatedMetric
fMetric.MetricName = MetricNameWorkloadCount
fMetric.Status = MetricStatusSuccess
fMetric.Data.ResultType = ResultTypeVector
timestamp := int64(time.Now().Unix())
var resultItems []map[string]interface{}
hardMap := make(map[string]string)
for resourceName, v := range quotaMetric.Data.Hard {
hardMap[resourceName.String()] = v.String()
}
for resourceName, v := range quotaMetric.Data.Used {
resultItem := make(map[string]interface{})
tmp := make(map[string]string)
tmp[ResultItemMetricResource] = resourceName.String()
resultItem[ResultItemMetric] = tmp
resultItem[ResultItemValue] = []interface{}{timestamp, hardMap[resourceName.String()], v.String()}
resultItems = append(resultItems, resultItem)
}
fMetric.Data.Result = resultItems
return fMetric
}

View File

@@ -0,0 +1,236 @@
/*
Copyright 2018 The KubeSphere Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
const (
ResultTypeVector = "vector"
ResultTypeMatrix = "matrix"
MetricStatusError = "error"
MetricStatusSuccess = "success"
ResultItemMetric = "metric"
ResultItemMetricResource = "resource"
ResultItemValue = "value"
)
const (
MetricNameWorkloadCount = "workload_count"
MetricNameNamespacePodCount = "namespace_pod_count"
MetricNameWorkspaceAllOrganizationCount = "workspace_all_organization_count"
MetricNameWorkspaceAllAccountCount = "workspace_all_account_count"
MetricNameWorkspaceAllProjectCount = "workspace_all_project_count"
MetricNameWorkspaceAllDevopsCount = "workspace_all_devops_project_count"
MetricNameWorkspaceNamespaceCount = "workspace_namespace_count"
MetricNameWorkspaceDevopsCount = "workspace_devops_project_count"
MetricNameWorkspaceMemberCount = "workspace_member_count"
MetricNameWorkspaceRoleCount = "workspace_role_count"
MetricNameClusterHealthyNodeCount = "cluster_node_online"
MetricNameClusterUnhealthyNodeCount = "cluster_node_offline"
MetricNameClusterNodeCount = "cluster_node_total"
)
const (
WorkspaceResourceKindOrganization = "organization"
WorkspaceResourceKindAccount = "account"
WorkspaceResourceKindNamespace = "namespace"
WorkspaceResourceKindDevops = "devops"
WorkspaceResourceKindMember = "member"
WorkspaceResourceKindRole = "role"
)
const (
MetricLevelCluster = "cluster"
MetricLevelNode = "node"
MetricLevelWorkspace = "workspace"
MetricLevelNamespace = "namespace"
MetricLevelPod = "pod"
MetricLevelContainer = "container"
MetricLevelWorkload = "workload"
)
type MetricMap map[string]string
var MetricsNames = []string{
"cluster_cpu_utilisation",
"cluster_cpu_usage",
"cluster_cpu_total",
"cluster_memory_utilisation",
"cluster_pod_count",
"cluster_memory_bytes_available",
"cluster_memory_bytes_total",
"cluster_memory_bytes_usage",
"cluster_net_utilisation",
"cluster_net_bytes_transmitted",
"cluster_net_bytes_received",
"cluster_disk_read_iops",
"cluster_disk_write_iops",
"cluster_disk_read_throughput",
"cluster_disk_write_throughput",
"cluster_disk_size_usage",
"cluster_disk_size_utilisation",
"cluster_disk_size_capacity",
"cluster_disk_size_available",
"cluster_node_online",
"cluster_node_offline",
"cluster_node_total",
"node_cpu_utilisation",
"node_cpu_total",
"node_cpu_usage",
"node_memory_utilisation",
"node_memory_bytes_usage",
"node_memory_bytes_available",
"node_memory_bytes_total",
"node_net_utilisation",
"node_net_bytes_transmitted",
"node_net_bytes_received",
"node_disk_read_iops",
"node_disk_write_iops",
"node_disk_read_throughput",
"node_disk_write_throughput",
"node_disk_size_capacity",
"node_disk_size_available",
"node_disk_size_usage",
"node_disk_size_utilisation",
"node_pod_count",
"node_pod_quota",
"namespace_cpu_usage",
"namespace_memory_usage",
"namespace_memory_usage_wo_cache",
"namespace_net_bytes_transmitted",
"namespace_net_bytes_received",
"namespace_pod_count",
"pod_cpu_usage",
"pod_memory_usage",
"pod_memory_usage_wo_cache",
"pod_net_bytes_transmitted",
"pod_net_bytes_received",
"workload_pod_cpu_usage",
"workload_pod_memory_usage",
"workload_pod_memory_usage_wo_cache",
"workload_pod_net_bytes_transmitted",
"workload_pod_net_bytes_received",
//"container_cpu_usage",
//"container_memory_usage_wo_cache",
//"container_memory_usage",
"workspace_cpu_usage",
"workspace_memory_usage",
"workspace_memory_usage_wo_cache",
"workspace_net_bytes_transmitted",
"workspace_net_bytes_received",
"workspace_pod_count",
}
var RulePromQLTmplMap = MetricMap{
//cluster
"cluster_cpu_utilisation": ":node_cpu_utilisation:avg1m",
"cluster_cpu_usage": `sum (irate(container_cpu_usage_seconds_total{job="kubelet", image!=""}[5m]))`,
"cluster_cpu_total": "sum(node:node_num_cpu:sum)",
"cluster_memory_utilisation": ":node_memory_utilisation:",
"cluster_pod_count": `count(kube_pod_info unless on(pod) kube_pod_completion_time unless on(node) kube_node_labels{label_role="log"})`,
"cluster_memory_bytes_available": "sum(node:node_memory_bytes_available:sum)",
"cluster_memory_bytes_total": "sum(node:node_memory_bytes_total:sum)",
"cluster_memory_bytes_usage": "sum(node:node_memory_bytes_total:sum) - sum(node:node_memory_bytes_available:sum)",
"cluster_net_utilisation": "sum(node:node_net_utilisation:sum_irate)",
"cluster_net_bytes_transmitted": "sum(node:node_net_bytes_transmitted:sum_irate)",
"cluster_net_bytes_received": "sum(node:node_net_bytes_received:sum_irate)",
"cluster_disk_read_iops": "sum(node:data_volume_iops_reads:sum)",
"cluster_disk_write_iops": "sum(node:data_volume_iops_writes:sum)",
"cluster_disk_read_throughput": "sum(node:data_volume_throughput_bytes_read:sum)",
"cluster_disk_write_throughput": "sum(node:data_volume_throughput_bytes_written:sum)",
"cluster_disk_size_usage": `sum(sum by (node) ((node_filesystem_size{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:)) - sum(sum by (node) ((node_filesystem_avail{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:))`,
"cluster_disk_size_utilisation": `(sum(sum by (node) ((node_filesystem_size{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:)) - sum(sum by (node) ((node_filesystem_avail{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:))) / sum(sum by (node) ((node_filesystem_size{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:))`,
"cluster_disk_size_capacity": `sum(sum by (node) ((node_filesystem_size{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:))`,
"cluster_disk_size_available": `sum(sum by (node) ((node_filesystem_avail{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:))`,
//node
"node_cpu_utilisation": "node:node_cpu_utilisation:avg1m",
"node_cpu_total": "node:node_num_cpu:sum",
"node_memory_utilisation": "node:node_memory_utilisation:",
"node_memory_bytes_available": "node:node_memory_bytes_available:sum",
"node_memory_bytes_total": "node:node_memory_bytes_total:sum",
// Node network utilisation (bytes received + bytes transmitted per second)
"node_net_utilisation": "node:node_net_utilisation:sum_irate",
// Node network bytes transmitted per second
"node_net_bytes_transmitted": "node:node_net_bytes_transmitted:sum_irate",
// Node network bytes received per second
"node_net_bytes_received": "node:node_net_bytes_received:sum_irate",
// node:data_volume_iops_reads:sum{node=~"i-5xcldxos|i-6soe9zl1"}
"node_disk_read_iops": "node:data_volume_iops_reads:sum",
// node:data_volume_iops_writes:sum{node=~"i-5xcldxos|i-6soe9zl1"}
"node_disk_write_iops": "node:data_volume_iops_writes:sum",
// node:data_volume_throughput_bytes_read:sum{node=~"i-5xcldxos|i-6soe9zl1"}
"node_disk_read_throughput": "node:data_volume_throughput_bytes_read:sum",
// node:data_volume_throughput_bytes_written:sum{node=~"i-5xcldxos|i-6soe9zl1"}
"node_disk_write_throughput": "node:data_volume_throughput_bytes_written:sum",
"node_disk_size_capacity": `sum by (node) ((node_filesystem_size{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1)`,
"node_disk_size_available": `sum by (node) ((node_filesystem_avail{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1)`,
"node_disk_size_usage": `sum by (node) ((node_filesystem_size{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1) -sum by (node) ((node_filesystem_avail{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1)`,
"node_disk_size_utilisation": `sum by (node) (((node_filesystem_size{mountpoint="/", job="node-exporter"} - node_filesystem_avail{mountpoint="/", job="node-exporter"}) / node_filesystem_size{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1)`,
"node_pod_count": `count(kube_pod_info$1 unless on(pod) kube_pod_completion_time) by (node)`,
// without log node: unless on(node) kube_node_labels{label_role="log"}
"node_pod_quota": `sum(kube_node_status_capacity_pods$1) by (node)`,
"node_cpu_usage": `sum by (node) (label_join(irate(container_cpu_usage_seconds_total{job="kubelet", image!=""}[5m]), "pod", " ", "pod_name") * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1)`,
"node_memory_bytes_usage": "node:node_memory_bytes_total:sum$1 - node:node_memory_bytes_available:sum$1",
//namespace
"namespace_cpu_usage": `namespace:container_cpu_usage_seconds_total:sum_rate{namespace=~"$1"}`,
"namespace_memory_usage": `namespace:container_memory_usage_bytes:sum{namespace=~"$1"}`,
"namespace_memory_usage_wo_cache": `namespace:container_memory_usage_bytes_wo_cache:sum{namespace=~"$1"}`,
"namespace_net_bytes_transmitted": `sum by (namespace) (irate(container_network_transmit_bytes_total{namespace=~"$1", pod_name!="", interface="eth0", job="kubelet"}[5m]))`,
"namespace_net_bytes_received": `sum by (namespace) (irate(container_network_receive_bytes_total{namespace=~"$1", pod_name!="", interface="eth0", job="kubelet"}[5m]))`,
"namespace_pod_count": `count(kube_pod_info{namespace=~"$1"} unless on(pod) kube_pod_completion_time) by (namespace)`,
// pod
"pod_cpu_usage": `sum(irate(container_cpu_usage_seconds_total{job="kubelet", namespace="$1", pod_name="$2", image!=""}[5m])) by (namespace, pod_name)`,
"pod_memory_usage": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name="$2", image!=""}) by (namespace, pod_name)`,
"pod_memory_usage_wo_cache": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name="$2", image!=""} - container_memory_cache{job="kubelet", namespace="$1", pod_name="$2",image!=""}) by (namespace, pod_name)`,
"pod_net_bytes_transmitted": `sum by (namespace, pod_name) (irate(container_network_transmit_bytes_total{namespace="$1", pod_name!="", pod_name="$2", interface="eth0", job="kubelet"}[5m]))`,
"pod_net_bytes_received": `sum by (namespace, pod_name) (irate(container_network_receive_bytes_total{namespace="$1", pod_name!="", pod_name="$2", interface="eth0", job="kubelet"}[5m]))`,
"pod_cpu_usage_all": `sum(irate(container_cpu_usage_seconds_total{job="kubelet", namespace="$1", pod_name=~"$2", image!=""}[5m])) by (namespace, pod_name)`,
"pod_memory_usage_all": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name=~"$2", image!=""}) by (namespace, pod_name)`,
"pod_memory_usage_wo_cache_all": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name=~"$2", image!=""} - container_memory_cache{job="kubelet", namespace="$1", pod_name=~"$2", image!=""}) by (namespace, pod_name)`,
"pod_net_bytes_transmitted_all": `sum by (namespace, pod_name) (irate(container_network_transmit_bytes_total{namespace="$1", pod_name!="", pod_name=~"$2", interface="eth0", job="kubelet"}[5m]))`,
"pod_net_bytes_received_all": `sum by (namespace, pod_name) (irate(container_network_receive_bytes_total{namespace="$1", pod_name!="", pod_name=~"$2", interface="eth0", job="kubelet"}[5m]))`,
"pod_cpu_usage_node": `sum by (node, pod) (label_join(irate(container_cpu_usage_seconds_total{job="kubelet",pod_name=~"$2", image!=""}[5m]), "pod", " ", "pod_name") * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{node=~"$3"})`,
"pod_memory_usage_node": `sum by (node, pod) (label_join(container_memory_usage_bytes{job="kubelet",pod_name=~"$2", image!=""}, "pod", " ", "pod_name") * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{node=~"$3"})`,
"pod_memory_usage_wo_cache_node": `sum by (node, pod) ((label_join(container_memory_usage_bytes{job="kubelet",pod_name=~"$2", image!=""}, "pod", " ", "pod_name") - label_join(container_memory_cache{job="kubelet",pod_name=~"$2", image!=""}, "pod", " ", "pod_name")) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{node=~"$3"})`,
// container
"container_cpu_usage": `sum(irate(container_cpu_usage_seconds_total{namespace="$1", pod_name="$2", container_name="$3"}[5m])) by (namespace, pod_name, container_name)`,
"container_cpu_usage_all": `sum(irate(container_cpu_usage_seconds_total{namespace="$1", pod_name="$2", container_name=~"$3", container_name!="POD"}[5m])) by (namespace, pod_name, container_name)`,
"container_memory_usage_wo_cache": `container_memory_usage_bytes{namespace="$1", pod_name="$2", container_name="$3"} - ignoring(id, image, endpoint, instance, job, name, service) container_memory_cache{namespace="$1", pod_name="$2", container_name="$3"}`,
"container_memory_usage_wo_cache_all": `container_memory_usage_bytes{namespace="$1", pod_name="$2", container_name=~"$3", container_name!="POD"} - ignoring(id, image, endpoint, instance, job, name, service) container_memory_cache{namespace="$1", pod_name="$2", container_name=~"$3", container_name!="POD"}`,
"container_memory_usage": `container_memory_usage_bytes{namespace="$1", pod_name="$2", container_name="$3"}`,
"container_memory_usage_all": `container_memory_usage_bytes{namespace="$1", pod_name="$2", container_name=~"$3", container_name!="POD"}`,
// enterprise
"workspace_cpu_usage": `sum(namespace:container_cpu_usage_seconds_total:sum_rate{namespace =~"$1"})`,
"workspace_memory_usage": `sum(namespace:container_memory_usage_bytes:sum{namespace =~"$1"})`,
"workspace_memory_usage_wo_cache": `sum(namespace:container_memory_usage_bytes_wo_cache:sum{namespace =~"$1"})`,
"workspace_net_bytes_transmitted": `sum(sum by (namespace) (irate(container_network_transmit_bytes_total{namespace=~"$1", pod_name!="", interface="eth0", job="kubelet"}[5m])))`,
"workspace_net_bytes_received": `sum(sum by (namespace) (irate(container_network_receive_bytes_total{namespace=~"$1", pod_name!="", interface="eth0", job="kubelet"}[5m])))`,
"workspace_pod_count": `sum(count(kube_pod_info{namespace=~"$1"} unless on(pod) kube_pod_completion_time) by (namespace))`,
}

View File

@@ -67,6 +67,12 @@ func MakeWorkloadRule(request *restful.Request) string {
return rule
}
func MakeWorkspacePromQL(metricsName string, namespaceRe2 string) string {
promql := RulePromQLTmplMap[metricsName]
promql = strings.Replace(promql, "$1", namespaceRe2, -1)
return promql
}
func MakeContainerPromQL(request *restful.Request) string {
nsName := strings.Trim(request.PathParameter("ns_name"), " ")
poName := strings.Trim(request.PathParameter("pod_name"), " ")
@@ -169,9 +175,8 @@ func MakeNodeorClusterRule(request *restful.Request, metricsName string) string
if nodesFilter == "" {
nodesFilter = ".*"
}
if strings.Contains(metricsName, "disk") && (!(strings.Contains(metricsName, "read") || strings.Contains(metricsName, "write"))) {
if strings.Contains(metricsName, "disk_size") || strings.Contains(metricsName, "pod") || strings.Contains(metricsName, "usage") {
// disk size promql
nodesFilter := ""
if nodeID != "" {
nodesFilter = "{" + "node" + "=" + "\"" + nodeID + "\"" + "}"
} else {