diff --git a/pkg/apis/v1alpha/monitoring/monitor_handler.go b/pkg/apis/v1alpha/monitoring/monitor_handler.go index 6c08836e4..dc39c12b7 100644 --- a/pkg/apis/v1alpha/monitoring/monitor_handler.go +++ b/pkg/apis/v1alpha/monitoring/monitor_handler.go @@ -46,6 +46,19 @@ func (u MonitorResource) monitorContainer(request *restful.Request, response *re } func (u MonitorResource) monitorWorkload(request *restful.Request, response *restful.Response) { + wlKind := request.PathParameter("workload_kind") + if strings.Trim(wlKind, " ") == "" { + // count all workloads figure + //metricName := "workload_count" + res := metrics.MonitorWorkloadCount(request) + response.WriteAsJson(res) + } else { + res := metrics.MonitorAllMetrics(request) + response.WriteAsJson(res) + } +} + +func (u MonitorResource) monitorWorkspacePodLevelMetrics(request *restful.Request, response *restful.Response) { res := metrics.MonitorAllMetrics(request) response.WriteAsJson(res) } @@ -196,10 +209,18 @@ func Register(ws *restful.WebService, subPath string) { Doc("monitor specific workload level metrics"). Param(ws.PathParameter("ns_name", "namespace").DataType("string").Required(true).DefaultValue("kube-system")). Param(ws.QueryParameter("metrics_filter", "metrics name cpu memory...").DataType("string").Required(false)). - Param(ws.PathParameter("workload_kind", "workload kind").DataType("string").Required(true).DefaultValue("daemonset")). + Param(ws.PathParameter("workload_kind", "workload kind").DataType("string").Required(false).DefaultValue("daemonset")). Param(ws.QueryParameter("workload_name", "workload name").DataType("string").Required(true).DefaultValue("")). Metadata(restfulspec.KeyOpenAPITags, tags)). Consumes(restful.MIME_JSON, restful.MIME_XML). Produces(restful.MIME_JSON) + ws.Route(ws.GET(subPath+"/namespaces/{ns_name}/workloads").To(u.monitorWorkload). + Filter(route.RouteLogging). + Doc("monitor all workload level metrics"). + Param(ws.PathParameter("ns_name", "namespace").DataType("string").Required(true).DefaultValue("kube-system")). + Param(ws.QueryParameter("metrics_filter", "metrics name cpu memory...").DataType("string").Required(false)). + Metadata(restfulspec.KeyOpenAPITags, tags)). + Consumes(restful.MIME_JSON, restful.MIME_XML). + Produces(restful.MIME_JSON) } diff --git a/pkg/client/prometheusclient.go b/pkg/client/prometheusclient.go index 9bfc1b056..95992936f 100644 --- a/pkg/client/prometheusclient.go +++ b/pkg/client/prometheusclient.go @@ -17,6 +17,9 @@ import ( "net/http" "net/url" + "strconv" + "time" + "github.com/emicklei/go-restful" "github.com/golang/glog" "github.com/pkg/errors" @@ -28,6 +31,8 @@ const ( DefaultPrometheusPort = "9090" PrometheusApiPath = "/api/v1/" PrometheusEndpointUrl = DefaultScheme + "://" + DefaultPrometheusService + ":" + DefaultPrometheusPort + PrometheusApiPath + DefaultQueryStep = "10m" + DefaultQueryTimeout = "30s" ) var client = &http.Client{} @@ -79,14 +84,18 @@ func ParseRequestHeader(request *restful.Request) (url.Values, bool, error) { end := request.QueryParameter("end") step := request.QueryParameter("step") timeout := request.QueryParameter("timeout") + if timeout == "" { - timeout = "30s" + timeout = DefaultQueryTimeout + } + if step == "" { + step = DefaultQueryStep } // Whether query or query_range request u := url.Values{} - if start != "" && end != "" && step != "" { - u.Set("start", start) - u.Set("end", end) + if start != "" && end != "" { + u.Set("start", convertTimeGranularity(start)) + u.Set("end", convertTimeGranularity(end)) u.Set("step", step) u.Set("timeout", timeout) return u, true, nil @@ -101,6 +110,18 @@ func ParseRequestHeader(request *restful.Request) (url.Values, bool, error) { return u, false, nil } - glog.Error("Parse request failed", u) - return u, false, errors.Errorf("Parse request failed") + glog.Errorln("Parse request %s failed", u) + return u, false, errors.Errorf("Parse request time range %s failed", u) +} + +func convertTimeGranularity(ts string) string { + timeFloat, err := strconv.ParseFloat(ts, 64) + if err != nil { + glog.Errorf("convert second timestamp %s to minute timestamp failed", ts) + return strconv.FormatInt(int64(time.Now().Unix()), 10) + } + timeInt := int64(timeFloat) + // convert second timestamp to minute timestamp + secondTime := time.Unix(timeInt, 0).Truncate(time.Minute).Unix() + return strconv.FormatInt(secondTime, 10) } diff --git a/pkg/models/metrics/metrics_collector.go b/pkg/models/metrics/metrics_collector.go deleted file mode 100644 index 8f5762cdc..000000000 --- a/pkg/models/metrics/metrics_collector.go +++ /dev/null @@ -1,184 +0,0 @@ -/* -Copyright 2018 The KubeSphere Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package metrics - -import ( - "encoding/json" - "regexp" - "strings" - - "github.com/emicklei/go-restful" - "github.com/golang/glog" - - "kubesphere.io/kubesphere/pkg/client" -) - -func getPodNameRegexInWorkload(request *restful.Request) string { - promql := MakeWorkloadRule(request) - res := client.SendPrometheusRequest(request, promql) - data := []byte(res) - var dat CommonMetricsResult - jsonErr := json.Unmarshal(data, &dat) - if jsonErr != nil { - glog.Errorln("json parse failed", jsonErr) - } - var podNames []string - for _, x := range dat.Data.Result { - podName := x.KubePodMetric.Pod - podNames = append(podNames, podName) - } - podNamesFilter := "^(" + strings.Join(podNames, "|") + ")$" - return podNamesFilter -} - -func MonitorWorkloadSingleMetric(request *restful.Request, metricsName string) *FormatedMetric { - nsName := strings.Trim(request.PathParameter("ns_name"), " ") - podNamesFilter := getPodNameRegexInWorkload(request) - newPromql := MakePodPromQL(request, []string{metricsName, nsName, "", "", podNamesFilter}) - podMetrics := client.SendPrometheusRequest(request, newPromql) - cleanedJson := ReformatJson(podMetrics, metricsName) - return cleanedJson -} - -func MonitorPodSingleMetric(request *restful.Request, metricsName string) *FormatedMetric { - nsName := strings.Trim(request.PathParameter("ns_name"), " ") - nodeID := strings.Trim(request.PathParameter("node_id"), " ") - podName := strings.Trim(request.PathParameter("pod_name"), " ") - podFilter := strings.Trim(request.QueryParameter("pods_filter"), " ") - params := []string{metricsName, nsName, nodeID, podName, podFilter} - promql := MakePodPromQL(request, params) - if promql != "" { - res := client.SendPrometheusRequest(request, promql) - cleanedJson := ReformatJson(res, metricsName) - return cleanedJson - } - return nil -} - -func MonitorNamespaceSingleMetric(request *restful.Request, metricsName string) *FormatedMetric { - recordingRule := MakeNamespacePromQL(request, metricsName) - res := client.SendPrometheusRequest(request, recordingRule) - cleanedJson := ReformatJson(res, metricsName) - return cleanedJson -} - -func ReformatJson(metric string, metricsName string) *FormatedMetric { - var formatMetric FormatedMetric - err := json.Unmarshal([]byte(metric), &formatMetric) - if err != nil { - glog.Errorln("Unmarshal metric json failed", err) - } - if formatMetric.MetricName == "" { - formatMetric.MetricName = metricsName - } - // retrive metrics success - if formatMetric.Status == "success" { - result := formatMetric.Data.Result - for _, res := range result { - metric, ok := res["metric"] - me := metric.(map[string]interface{}) - if ok { - delete(me, "__name__") - } - } - } - return &formatMetric -} - -func collectNodeorClusterMetrics(request *restful.Request, metricsName string, ch chan<- *FormatedMetric) { - metric := MonitorNodeorClusterSingleMetric(request, metricsName) - ch <- metric -} - -func collectNamespaceMetrics(request *restful.Request, metricsName string, ch chan<- *FormatedMetric) { - metric := MonitorNamespaceSingleMetric(request, metricsName) - ch <- metric -} - -func collectWorkloadMetrics(request *restful.Request, metricsName string, ch chan<- *FormatedMetric) { - metricsName = strings.TrimLeft(metricsName, "workload_") - metric := MonitorWorkloadSingleMetric(request, metricsName) - ch <- metric -} - -func collectPodMetrics(request *restful.Request, metricsName string, ch chan<- *FormatedMetric) { - metric := MonitorPodSingleMetric(request, metricsName) - ch <- metric -} - -func MonitorAllMetrics(request *restful.Request) FormatedLevelMetric { - metricsName := strings.Trim(request.QueryParameter("metrics_filter"), " ") - if metricsName == "" { - metricsName = ".*" - } - path := request.SelectedRoutePath() - sourceType := path[strings.LastIndex(path, "/")+1 : len(path)-1] - if strings.Contains(path, "workload") { - sourceType = "workload" - } - var ch = make(chan *FormatedMetric, 10) - for _, k := range MetricsNames { - bol, err := regexp.MatchString(metricsName, k) - if !bol { - continue - } - if err != nil { - glog.Errorln("regex match failed", err) - continue - } - if strings.HasPrefix(k, sourceType) { - if sourceType == "node" || sourceType == "cluster" { - go collectNodeorClusterMetrics(request, k, ch) - } else if sourceType == "namespace" { - go collectNamespaceMetrics(request, k, ch) - } else if sourceType == "pod" { - go collectPodMetrics(request, k, ch) - } else if sourceType == "workload" { - go collectWorkloadMetrics(request, k, ch) - } - } - } - var metricsArray []FormatedMetric - var tempJson *FormatedMetric - for _, k := range MetricsNames { - bol, err := regexp.MatchString(metricsName, k) - if !bol { - continue - } - if err != nil { - glog.Errorln("regex match failed") - continue - } - if strings.HasPrefix(k, sourceType) { - tempJson = <-ch - if tempJson != nil { - metricsArray = append(metricsArray, *tempJson) - } - } - } - return FormatedLevelMetric{ - MetricsLevel: sourceType, - Results: metricsArray, - } -} - -func MonitorNodeorClusterSingleMetric(request *restful.Request, metricsName string) *FormatedMetric { - recordingRule := MakeNodeorClusterRule(request, metricsName) - res := client.SendPrometheusRequest(request, recordingRule) - cleanedJson := ReformatJson(res, metricsName) - return cleanedJson -} diff --git a/pkg/models/metrics/metrics_rule_tmpl.go b/pkg/models/metrics/metrics_rule_tmpl.go deleted file mode 100644 index eae10835b..000000000 --- a/pkg/models/metrics/metrics_rule_tmpl.go +++ /dev/null @@ -1,147 +0,0 @@ -/* -Copyright 2018 The KubeSphere Authors. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package metrics - -type MetricMap map[string]string - -var MetricsNames = []string{ - "cluster_cpu_utilisation", - "cluster_memory_utilisation", - "cluster_net_utilisation", - "cluster_pod_count", - - "node_cpu_utilisation", - "node_memory_utilisation", - "node_memory_available", - "node_memory_total", - "node_net_utilisation", - "node_net_bytes_transmitted", - "node_net_bytes_received", - "node_disk_read_iops", - "node_disk_write_iops", - "node_disk_read_throughput", - "node_disk_write_throughput", - "node_disk_capacity", - "node_disk_available", - "node_disk_utilization", - - "namespace_cpu_utilisation", - "namespace_memory_utilisation", - "namespace_memory_utilisation_wo_cache", - "namespace_net_bytes_transmitted", - "namespace_net_bytes_received", - "namespace_pod_count", - - "pod_cpu_utilisation", - "pod_memory_utilisation", - "pod_memory_utilisation_wo_cache", - "pod_net_bytes_transmitted", - "pod_net_bytes_received", - - "workload_pod_cpu_utilisation", - "workload_pod_memory_utilisation", - "workload_pod_memory_utilisation_wo_cache", - "workload_pod_net_bytes_transmitted", - "workload_pod_net_bytes_received", - //"container_cpu_utilisation", - //"container_memory_utilisation_wo_cache", - //"container_memory_utilisation", - - "tenant_cpu_utilisation", - "tenant_memory_utilisation", - "tenant_memory_utilisation_wo_cache", - "tenant_net_bytes_transmitted", - "tenant_net_bytes_received", - "tenant_pod_count", -} - -var RulePromQLTmplMap = MetricMap{ - //cluster - "cluster_cpu_utilisation": ":node_cpu_utilisation:avg1m", - "cluster_memory_utilisation": ":node_memory_utilisation:", - // Cluster network utilisation (bytes received + bytes transmitted per second) - "cluster_net_utilisation": ":node_net_utilisation:sum_irate", - "cluster_pod_count": `count(kube_pod_info{job="kube-state-metrics"})`, - - //node - "node_cpu_utilisation": "node:node_cpu_utilisation:avg1m", - "node_memory_utilisation": "node:node_memory_utilisation:", - "node_memory_available": "node:node_memory_bytes_available:sum", - "node_memory_total": "node:node_memory_bytes_total:sum", - // Node network utilisation (bytes received + bytes transmitted per second) - "node_net_utilisation": "node:node_net_utilisation:sum_irate", - // Node network bytes transmitted per second - "node_net_bytes_transmitted": "node:node_net_bytes_transmitted:sum_irate", - // Node network bytes received per second - "node_net_bytes_received": "node:node_net_bytes_received:sum_irate", - - // node:data_volume_iops_reads:sum{node=~"i-5xcldxos|i-6soe9zl1"} - "node_disk_read_iops": "node:data_volume_iops_reads:sum", - // node:data_volume_iops_writes:sum{node=~"i-5xcldxos|i-6soe9zl1"} - "node_disk_write_iops": "node:data_volume_iops_writes:sum", - // node:data_volume_throughput_bytes_read:sum{node=~"i-5xcldxos|i-6soe9zl1"} - "node_disk_read_throughput": "node:data_volume_throughput_bytes_read:sum", - // node:data_volume_throughput_bytes_written:sum{node=~"i-5xcldxos|i-6soe9zl1"} - "node_disk_write_throughput": "node:data_volume_throughput_bytes_written:sum", - - "node_disk_capacity": `sum by (node) ((node_filesystem_avail{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1)`, - "node_disk_available": `sum by (node) ((node_filesystem_avail{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1)`, - "node_disk_utilization": `sum by (node) (((node_filesystem_size{mountpoint="/", job="node-exporter"} - node_filesystem_avail{mountpoint="/", job="node-exporter"}) / node_filesystem_size{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1)`, - - //namespace - "namespace_cpu_utilisation": `namespace:container_cpu_usage_seconds_total:sum_rate{namespace=~"$1"}`, - "namespace_memory_utilisation": `namespace:container_memory_usage_bytes:sum{namespace=~"$1"}`, - "namespace_memory_utilisation_wo_cache": `namespace:container_memory_usage_bytes_wo_cache:sum{namespace=~"$1"}`, - "namespace_net_bytes_transmitted": `sum by (namespace) (irate(container_network_transmit_bytes_total{namespace=~"$1", pod_name!="", interface="eth0", job="kubelet"}[2m]))`, - "namespace_net_bytes_received": `sum by (namespace) (irate(container_network_receive_bytes_total{namespace=~"$1", pod_name!="", interface="eth0", job="kubelet"}[2m]))`, - // count(kube_pod_info) by (namespace) namespace=~"monitoring|default|kube-system" - "namespace_pod_count": `count(kube_pod_info{job="kube-state-metrics", namespace=~"$1"}) by (namespace)`, - - // pod - "pod_cpu_utilisation": `sum(irate(container_cpu_usage_seconds_total{job="kubelet", namespace="$1", pod_name="$2", image!=""}[5m])) by (namespace, pod_name)`, - "pod_memory_utilisation": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name="$2", image!=""}) by (namespace, pod_name)`, - "pod_memory_utilisation_wo_cache": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name="$2", image!=""} - container_memory_cache{job="kubelet", namespace="$1", pod_name="$2",image!=""}) by (namespace, pod_name)`, - "pod_net_bytes_transmitted": `sum by (namespace, pod_name) (irate(container_network_transmit_bytes_total{namespace="$1", pod_name!="", pod_name="$2", interface="eth0", job="kubelet"}[2m]))`, - "pod_net_bytes_received": `sum by (namespace, pod_name) (irate(container_network_receive_bytes_total{namespace="$1", pod_name!="", pod_name="$2", interface="eth0", job="kubelet"}[2m]))`, - - "pod_cpu_utilisation_all": `sum(irate(container_cpu_usage_seconds_total{job="kubelet", namespace="$1", pod_name=~"$2", image!=""}[5m])) by (namespace, pod_name)`, - "pod_memory_utilisation_all": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name=~"$2", image!=""}) by (namespace, pod_name)`, - "pod_memory_utilisation_wo_cache_all": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name=~"$2", image!=""} - container_memory_cache{job="kubelet", namespace="$1", pod_name=~"$2", image!=""}) by (namespace, pod_name)`, - "pod_net_bytes_transmitted_all": `sum by (namespace, pod_name) (irate(container_network_transmit_bytes_total{namespace="$1", pod_name!="", pod_name=~"$2", interface="eth0", job="kubelet"}[2m]))`, - "pod_net_bytes_received_all": `sum by (namespace, pod_name) (irate(container_network_receive_bytes_total{namespace="$1", pod_name!="", pod_name=~"$2", interface="eth0", job="kubelet"}[2m]))`, - - //"pod_cpu_utilisation_node": `sum by (node, pod) (label_join(irate(container_cpu_usage_seconds_total{job="kubelet", image!=""}[5m]), "pod", " ", "pod_name") * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{node=~"$3"})`, - "pod_cpu_utilisation_node": `sum by (node, pod) (label_join(irate(container_cpu_usage_seconds_total{job="kubelet",pod_name=~"$2", image!=""}[5m]), "pod", " ", "pod_name") * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{node=~"$3"})`, - "pod_memory_utilisation_node": `sum by (node, pod) (label_join(container_memory_usage_bytes{job="kubelet",pod_name=~"$2", image!=""}, "pod", " ", "pod_name") * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{node=~"$3"})`, - "pod_memory_utilisation_wo_cache_node": `sum by (node, pod) ((label_join(container_memory_usage_bytes{job="kubelet",pod_name=~"$2", image!=""}, "pod", " ", "pod_name") - label_join(container_memory_cache{job="kubelet",pod_name=~"$2", image!=""}, "pod", " ", "pod_name")) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{node=~"$3"})`, - - // container - "container_cpu_utilisation": `sum(irate(container_cpu_usage_seconds_total{namespace="$1", pod_name="$2", container_name="$3"}[5m])) by (namespace, pod_name, container_name)`, - //"container_cpu_utilisation_wo_podname": `sum(irate(container_cpu_usage_seconds_total{namespace="$1", container_name=~"$3"}[5m])) by (namespace, pod_name, container_name)`, - "container_cpu_utilisation_all": `sum(irate(container_cpu_usage_seconds_total{namespace="$1", pod_name="$2", container_name=~"$3", container_name!="POD"}[5m])) by (namespace, pod_name, container_name)`, - //"container_cpu_utilisation_all_wo_podname": `sum(irate(container_cpu_usage_seconds_total{namespace="$1", container_name!="POD"}[5m])) by (namespace, pod_name, container_name)`, - - "container_memory_utilisation_wo_cache": `container_memory_usage_bytes{namespace="$1", pod_name="$2", container_name="$3"} - ignoring(id, image, endpoint, instance, job, name, service) container_memory_cache{namespace="$1", pod_name="$2", container_name="$3"}`, - "container_memory_utilisation_wo_cache_all": `container_memory_usage_bytes{namespace="$1", pod_name="$2", container_name=~"$3", container_name!="POD"} - ignoring(id, image, endpoint, instance, job, name, service) container_memory_cache{namespace="$1", pod_name="$2", container_name=~"$3", container_name!="POD"}`, - "container_memory_utilisation": `container_memory_usage_bytes{namespace="$1", pod_name="$2", container_name="$3"}`, - "container_memory_utilisation_all": `container_memory_usage_bytes{namespace="$1", pod_name="$2", container_name=~"$3", container_name!="POD"}`, - - // tenant - "tenant_cpu_utilisation": `sum(namespace:container_cpu_usage_seconds_total:sum_rate{namespace =~"$1"})`, - "tenant_memory_utilisation": `sum(namespace:container_memory_usage_bytes:sum{namespace =~"$1"})`, - "tenant_memory_utilisation_wo_cache": `sum(namespace:container_memory_usage_bytes_wo_cache:sum{namespace =~"$1"})`, - "tenant_net_bytes_transmitted": `sum(sum by (namespace) (irate(container_network_transmit_bytes_total{namespace=~"$1", pod_name!="", interface="eth0", job="kubelet"}[2m])))`, - "tenant_net_bytes_received": `sum(sum by (namespace) (irate(container_network_receive_bytes_total{namespace=~"$1", pod_name!="", interface="eth0", job="kubelet"}[2m])))`, - "tenant_pod_count": `sum(count(kube_pod_info{job="kube-state-metrics", namespace=~"$1"}) by (namespace))`, -} diff --git a/pkg/models/metrics/metricscollector.go b/pkg/models/metrics/metricscollector.go new file mode 100644 index 000000000..06b1e369f --- /dev/null +++ b/pkg/models/metrics/metricscollector.go @@ -0,0 +1,422 @@ +/* +Copyright 2018 The KubeSphere Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "encoding/json" + "regexp" + "strings" + + "github.com/emicklei/go-restful" + "github.com/golang/glog" + + "time" + + "k8s.io/api/core/v1" + metaV1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "kubesphere.io/kubesphere/pkg/client" + "kubesphere.io/kubesphere/pkg/models" +) + +func getPodNameRegexInWorkload(request *restful.Request) string { + promql := MakeWorkloadRule(request) + res := client.SendPrometheusRequest(request, promql) + data := []byte(res) + var dat CommonMetricsResult + jsonErr := json.Unmarshal(data, &dat) + if jsonErr != nil { + glog.Errorln("json parse failed", jsonErr) + } + var podNames []string + for _, x := range dat.Data.Result { + podName := x.KubePodMetric.Pod + podNames = append(podNames, podName) + } + podNamesFilter := "^(" + strings.Join(podNames, "|") + ")$" + return podNamesFilter +} + +func MonitorWorkloadSingleMetric(request *restful.Request, metricsName string) *FormatedMetric { + nsName := strings.Trim(request.PathParameter("ns_name"), " ") + podNamesFilter := getPodNameRegexInWorkload(request) + newPromql := MakePodPromQL(request, []string{metricsName, nsName, "", "", podNamesFilter}) + podMetrics := client.SendPrometheusRequest(request, newPromql) + cleanedJson := ReformatJson(podMetrics, metricsName) + return cleanedJson +} + +func MonitorPodSingleMetric(request *restful.Request, metricsName string) *FormatedMetric { + nsName := strings.Trim(request.PathParameter("ns_name"), " ") + nodeID := strings.Trim(request.PathParameter("node_id"), " ") + podName := strings.Trim(request.PathParameter("pod_name"), " ") + podFilter := strings.Trim(request.QueryParameter("pods_filter"), " ") + params := []string{metricsName, nsName, nodeID, podName, podFilter} + promql := MakePodPromQL(request, params) + if promql != "" { + res := client.SendPrometheusRequest(request, promql) + cleanedJson := ReformatJson(res, metricsName) + return cleanedJson + } + return nil +} + +func MonitorNamespaceSingleMetric(request *restful.Request, metricsName string) *FormatedMetric { + recordingRule := MakeNamespacePromQL(request, metricsName) + res := client.SendPrometheusRequest(request, recordingRule) + cleanedJson := ReformatJson(res, metricsName) + return cleanedJson +} + +// maybe this function is time consuming +func ReformatJson(metric string, metricsName string) *FormatedMetric { + var formatMetric FormatedMetric + err := json.Unmarshal([]byte(metric), &formatMetric) + if err != nil { + glog.Errorln("Unmarshal metric json failed", err) + } + if formatMetric.MetricName == "" { + formatMetric.MetricName = metricsName + } + // retrive metrics success + if formatMetric.Status == MetricStatusSuccess { + result := formatMetric.Data.Result + for _, res := range result { + metric, ok := res[ResultItemMetric] + me := metric.(map[string]interface{}) + if ok { + delete(me, "__name__") + } + } + } + return &formatMetric +} + +func collectNodeorClusterMetrics(request *restful.Request, metricsName string, ch chan<- *FormatedMetric) { + metric := MonitorNodeorClusterSingleMetric(request, metricsName) + ch <- metric +} + +func collectNamespaceMetrics(request *restful.Request, metricsName string, ch chan<- *FormatedMetric) { + metric := MonitorNamespaceSingleMetric(request, metricsName) + ch <- metric +} + +func collectWorkloadMetrics(request *restful.Request, metricsName string, ch chan<- *FormatedMetric) { + metricsName = strings.TrimLeft(metricsName, "workload_") + metric := MonitorWorkloadSingleMetric(request, metricsName) + ch <- metric +} + +func collectWorkspaceMetrics(request *restful.Request, metricsName string, namespaceList []string, ch chan<- *FormatedMetric) { + mertic := monitorWorkspaceSingleMertic(request, metricsName, namespaceList) + ch <- mertic +} + +func collectPodMetrics(request *restful.Request, metricsName string, ch chan<- *FormatedMetric) { + metric := MonitorPodSingleMetric(request, metricsName) + ch <- metric +} + +func monitorWorkspaceSingleMertic(request *restful.Request, metricsName string, namespaceList []string) *FormatedMetric { + namespaceRe2 := "^(" + strings.Join(namespaceList, "|") + ")$" + newpromql := MakeWorkspacePromQL(metricsName, namespaceRe2) + podMetrics := client.SendPrometheusRequest(request, newpromql) + cleanedJson := ReformatJson(podMetrics, metricsName) + return cleanedJson +} + +func filterNamespace(request *restful.Request, namespaceList []string) []string { + var newNSlist []string + nsFilter := strings.Trim(request.QueryParameter("namespaces_filter"), " ") + if nsFilter == "" { + nsFilter = ".*" + } + for _, ns := range namespaceList { + bol, _ := regexp.MatchString(nsFilter, ns) + if bol { + newNSlist = append(newNSlist, ns) + } + } + return newNSlist +} + +func MonitorAllMetrics(request *restful.Request) FormatedLevelMetric { + metricsName := strings.Trim(request.QueryParameter("metrics_filter"), " ") + if metricsName == "" { + metricsName = ".*" + } + path := request.SelectedRoutePath() + sourceType := path[strings.LastIndex(path, "/")+1 : len(path)-1] + if strings.Contains(path, MetricLevelWorkload) { + sourceType = MetricLevelWorkload + } else if strings.Contains(path, MetricLevelWorkspace) { + sourceType = MetricLevelWorkspace + } + var ch = make(chan *FormatedMetric, 10) + for _, metricName := range MetricsNames { + bol, err := regexp.MatchString(metricsName, metricName) + if !bol { + continue + } + if err != nil { + glog.Errorln("regex match failed", err) + continue + } + if strings.HasPrefix(metricName, sourceType) { + if sourceType == MetricLevelCluster || sourceType == MetricLevelNode { + go collectNodeorClusterMetrics(request, metricName, ch) + } else if sourceType == MetricLevelNamespace { + go collectNamespaceMetrics(request, metricName, ch) + } else if sourceType == MetricLevelPod { + go collectPodMetrics(request, metricName, ch) + } else if sourceType == MetricLevelWorkload { + go collectWorkloadMetrics(request, metricName, ch) + } + } + } + var metricsArray []FormatedMetric + var tempJson *FormatedMetric + for _, k := range MetricsNames { + bol, err := regexp.MatchString(metricsName, k) + if !bol { + continue + } + if err != nil { + glog.Errorln("regex match failed") + continue + } + if strings.HasPrefix(k, sourceType) { + tempJson = <-ch + if tempJson != nil { + metricsArray = append(metricsArray, *tempJson) + } + } + } + return FormatedLevelMetric{ + MetricsLevel: sourceType, + Results: metricsArray, + } +} + +func getWorkspacePodsCountMetrics(request *restful.Request, namespaces []string) *FormatedMetric { + metricName := MetricNameNamespacePodCount + var recordingRule = RulePromQLTmplMap[metricName] + nsFilter := "^(" + strings.Join(namespaces, "|") + ")$" + recordingRule = strings.Replace(recordingRule, "$1", nsFilter, -1) + res := client.SendPrometheusRequest(request, recordingRule) + cleanedJson := ReformatJson(res, metricName) + return cleanedJson +} + +func getWorkspaceWorkloadCountMetrics(namespaces []string) FormatedMetric { + var wlQuotaMetrics models.ResourceQuota + wlQuotaMetrics.NameSpace = strings.Join(namespaces, "|") + wlQuotaMetrics.Data.Used = make(v1.ResourceList, 1) + wlQuotaMetrics.Data.Hard = make(v1.ResourceList, 1) + for _, ns := range namespaces { + quotaMetric, err := models.GetNamespaceQuota(ns) + if err != nil { + glog.Errorln(err) + continue + } + // sum all resources used along namespaces + quotaUsed := quotaMetric.Data.Used + for resourceName, quantity := range quotaUsed { + if _, ok := wlQuotaMetrics.Data.Used[resourceName]; ok { + tmpQuantity := wlQuotaMetrics.Data.Used[v1.ResourceName(resourceName)] + tmpQuantity.Add(quantity) + wlQuotaMetrics.Data.Used[v1.ResourceName(resourceName)] = tmpQuantity + } else { + wlQuotaMetrics.Data.Used[v1.ResourceName(resourceName)] = quantity.DeepCopy() + } + } + + // sum all resources hard along namespaces + quotaHard := quotaMetric.Data.Hard + for resourceName, quantity := range quotaHard { + if _, ok := wlQuotaMetrics.Data.Hard[resourceName]; ok { + tmpQuantity := wlQuotaMetrics.Data.Hard[v1.ResourceName(resourceName)] + tmpQuantity.Add(quantity) + wlQuotaMetrics.Data.Hard[v1.ResourceName(resourceName)] = tmpQuantity + } else { + wlQuotaMetrics.Data.Hard[v1.ResourceName(resourceName)] = quantity.DeepCopy() + } + } + } + wlMetrics := convertQuota2MetricStruct(&wlQuotaMetrics) + return wlMetrics +} + +func getSpecificMetricItem(timestamp int64, metricName string, kind string, count int, err error) FormatedMetric { + var nsMetrics FormatedMetric + nsMetrics.MetricName = metricName + nsMetrics.Data.ResultType = ResultTypeVector + resultItem := make(map[string]interface{}) + tmp := make(map[string]string) + tmp[ResultItemMetricResource] = kind + if err == nil { + nsMetrics.Status = MetricStatusSuccess + } else { + nsMetrics.Status = MetricStatusError + resultItem["errorinfo"] = err.Error() + } + + resultItem[ResultItemMetric] = tmp + resultItem[ResultItemValue] = []interface{}{timestamp, count} + nsMetrics.Data.Result = make([]map[string]interface{}, 1) + nsMetrics.Data.Result[0] = resultItem + return nsMetrics +} + +func MonitorNodeorClusterSingleMetric(request *restful.Request, metricsName string) *FormatedMetric { + // support cluster node statistic, include healthy nodes and unhealthy nodes + var res string + var fMetric FormatedMetric + timestamp := int64(time.Now().Unix()) + + if metricsName == MetricNameClusterHealthyNodeCount { + onlineNodes, _ := getNodeHealthyConditionMetric() + fMetric = getSpecificMetricItem(timestamp, MetricNameClusterHealthyNodeCount, "node_count", len(onlineNodes), nil) + } else if metricsName == MetricNameClusterUnhealthyNodeCount { + _, offlineNodes := getNodeHealthyConditionMetric() + fMetric = getSpecificMetricItem(timestamp, MetricNameClusterUnhealthyNodeCount, "node_count", len(offlineNodes), nil) + } else if metricsName == MetricNameClusterNodeCount { + onlineNodes, offlineNodes := getNodeHealthyConditionMetric() + fMetric = getSpecificMetricItem(timestamp, MetricNameClusterNodeCount, "node_count", len(onlineNodes)+len(offlineNodes), nil) + } else { + recordingRule := MakeNodeorClusterRule(request, metricsName) + res = client.SendPrometheusRequest(request, recordingRule) + fMetric = *ReformatJson(res, metricsName) + } + return &fMetric +} + +func getNodeHealthyConditionMetric() ([]string, []string) { + nodeList, err := client.NewK8sClient().CoreV1().Nodes().List(metaV1.ListOptions{}) + if err != nil { + glog.Errorln(err) + return nil, nil + } + var onlineNodes []string + var offlineNodes []string + for _, node := range nodeList.Items { + nodeName := node.Labels["kubernetes.io/hostname"] + nodeRole := node.Labels["role"] + bol := true + for _, cond := range node.Status.Conditions { + if cond.Type == "Ready" && cond.Status == "Unknown" { + bol = false + break + } + } + if nodeRole != "log" { + if bol { + // reachable node + onlineNodes = append(onlineNodes, nodeName) + } else { + // unreachable node + offlineNodes = append(offlineNodes, nodeName) + } + } + } + return onlineNodes, offlineNodes +} + +func getExistingNamespace(namespaces []string) ([]string, []string) { + namespaceMap, err := getAllNamespace() + var existedNs []string + var noneExistedNs []string + if err != nil { + return namespaces, nil + } + for _, ns := range namespaces { + if _, ok := namespaceMap[ns]; ok { + existedNs = append(existedNs, ns) + } else { + noneExistedNs = append(noneExistedNs, ns) + } + } + return existedNs, noneExistedNs +} + +func getAllNamespace() (map[string]int, error) { + k8sClient := client.NewK8sClient() + nsList, err := k8sClient.CoreV1().Namespaces().List(metaV1.ListOptions{}) + if err != nil { + glog.Errorln(err) + return nil, err + } + namespaceMap := make(map[string]int) + for _, item := range nsList.Items { + namespaceMap[item.Name] = 0 + } + return namespaceMap, nil +} + +func MonitorWorkloadCount(request *restful.Request) FormatedMetric { + namespace := strings.Trim(request.PathParameter("ns_name"), " ") + + quotaMetric, err := models.GetNamespaceQuota(namespace) + fMetric := convertQuota2MetricStruct(quotaMetric) + + // whether the namespace in request parameters exists? + namespaceMap, e := getAllNamespace() + _, ok := namespaceMap[namespace] + if e != nil { + ok = true + } + if !ok || err != nil { + fMetric.Status = MetricStatusError + fMetric.Data.ResultType = "" + errInfo := make(map[string]interface{}) + if err != nil { + errInfo["errormsg"] = err.Error() + } else { + errInfo["errormsg"] = "namespace " + namespace + " does not exist" + } + fMetric.Data.Result = []map[string]interface{}{errInfo} + } + + return fMetric +} + +func convertQuota2MetricStruct(quotaMetric *models.ResourceQuota) FormatedMetric { + var fMetric FormatedMetric + fMetric.MetricName = MetricNameWorkloadCount + fMetric.Status = MetricStatusSuccess + fMetric.Data.ResultType = ResultTypeVector + timestamp := int64(time.Now().Unix()) + var resultItems []map[string]interface{} + + hardMap := make(map[string]string) + for resourceName, v := range quotaMetric.Data.Hard { + hardMap[resourceName.String()] = v.String() + } + + for resourceName, v := range quotaMetric.Data.Used { + resultItem := make(map[string]interface{}) + tmp := make(map[string]string) + tmp[ResultItemMetricResource] = resourceName.String() + resultItem[ResultItemMetric] = tmp + resultItem[ResultItemValue] = []interface{}{timestamp, hardMap[resourceName.String()], v.String()} + resultItems = append(resultItems, resultItem) + } + + fMetric.Data.Result = resultItems + return fMetric +} diff --git a/pkg/models/metrics/metricsconst.go b/pkg/models/metrics/metricsconst.go new file mode 100644 index 000000000..a44d302a5 --- /dev/null +++ b/pkg/models/metrics/metricsconst.go @@ -0,0 +1,236 @@ +/* +Copyright 2018 The KubeSphere Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +const ( + ResultTypeVector = "vector" + ResultTypeMatrix = "matrix" + MetricStatusError = "error" + MetricStatusSuccess = "success" + ResultItemMetric = "metric" + ResultItemMetricResource = "resource" + ResultItemValue = "value" +) + +const ( + MetricNameWorkloadCount = "workload_count" + MetricNameNamespacePodCount = "namespace_pod_count" + + MetricNameWorkspaceAllOrganizationCount = "workspace_all_organization_count" + MetricNameWorkspaceAllAccountCount = "workspace_all_account_count" + MetricNameWorkspaceAllProjectCount = "workspace_all_project_count" + MetricNameWorkspaceAllDevopsCount = "workspace_all_devops_project_count" + + MetricNameWorkspaceNamespaceCount = "workspace_namespace_count" + MetricNameWorkspaceDevopsCount = "workspace_devops_project_count" + MetricNameWorkspaceMemberCount = "workspace_member_count" + MetricNameWorkspaceRoleCount = "workspace_role_count" + + MetricNameClusterHealthyNodeCount = "cluster_node_online" + MetricNameClusterUnhealthyNodeCount = "cluster_node_offline" + MetricNameClusterNodeCount = "cluster_node_total" +) + +const ( + WorkspaceResourceKindOrganization = "organization" + WorkspaceResourceKindAccount = "account" + WorkspaceResourceKindNamespace = "namespace" + WorkspaceResourceKindDevops = "devops" + WorkspaceResourceKindMember = "member" + WorkspaceResourceKindRole = "role" +) + +const ( + MetricLevelCluster = "cluster" + MetricLevelNode = "node" + MetricLevelWorkspace = "workspace" + MetricLevelNamespace = "namespace" + MetricLevelPod = "pod" + MetricLevelContainer = "container" + MetricLevelWorkload = "workload" +) + +type MetricMap map[string]string + +var MetricsNames = []string{ + "cluster_cpu_utilisation", + "cluster_cpu_usage", + "cluster_cpu_total", + "cluster_memory_utilisation", + "cluster_pod_count", + "cluster_memory_bytes_available", + "cluster_memory_bytes_total", + "cluster_memory_bytes_usage", + "cluster_net_utilisation", + "cluster_net_bytes_transmitted", + "cluster_net_bytes_received", + "cluster_disk_read_iops", + "cluster_disk_write_iops", + "cluster_disk_read_throughput", + "cluster_disk_write_throughput", + "cluster_disk_size_usage", + "cluster_disk_size_utilisation", + "cluster_disk_size_capacity", + "cluster_disk_size_available", + "cluster_node_online", + "cluster_node_offline", + "cluster_node_total", + + "node_cpu_utilisation", + "node_cpu_total", + "node_cpu_usage", + "node_memory_utilisation", + "node_memory_bytes_usage", + "node_memory_bytes_available", + "node_memory_bytes_total", + "node_net_utilisation", + "node_net_bytes_transmitted", + "node_net_bytes_received", + "node_disk_read_iops", + "node_disk_write_iops", + "node_disk_read_throughput", + "node_disk_write_throughput", + "node_disk_size_capacity", + "node_disk_size_available", + "node_disk_size_usage", + "node_disk_size_utilisation", + "node_pod_count", + "node_pod_quota", + + "namespace_cpu_usage", + "namespace_memory_usage", + "namespace_memory_usage_wo_cache", + "namespace_net_bytes_transmitted", + "namespace_net_bytes_received", + "namespace_pod_count", + + "pod_cpu_usage", + "pod_memory_usage", + "pod_memory_usage_wo_cache", + "pod_net_bytes_transmitted", + "pod_net_bytes_received", + + "workload_pod_cpu_usage", + "workload_pod_memory_usage", + "workload_pod_memory_usage_wo_cache", + "workload_pod_net_bytes_transmitted", + "workload_pod_net_bytes_received", + //"container_cpu_usage", + //"container_memory_usage_wo_cache", + //"container_memory_usage", + + "workspace_cpu_usage", + "workspace_memory_usage", + "workspace_memory_usage_wo_cache", + "workspace_net_bytes_transmitted", + "workspace_net_bytes_received", + "workspace_pod_count", +} + +var RulePromQLTmplMap = MetricMap{ + //cluster + "cluster_cpu_utilisation": ":node_cpu_utilisation:avg1m", + "cluster_cpu_usage": `sum (irate(container_cpu_usage_seconds_total{job="kubelet", image!=""}[5m]))`, + "cluster_cpu_total": "sum(node:node_num_cpu:sum)", + "cluster_memory_utilisation": ":node_memory_utilisation:", + "cluster_pod_count": `count(kube_pod_info unless on(pod) kube_pod_completion_time unless on(node) kube_node_labels{label_role="log"})`, + "cluster_memory_bytes_available": "sum(node:node_memory_bytes_available:sum)", + "cluster_memory_bytes_total": "sum(node:node_memory_bytes_total:sum)", + "cluster_memory_bytes_usage": "sum(node:node_memory_bytes_total:sum) - sum(node:node_memory_bytes_available:sum)", + "cluster_net_utilisation": "sum(node:node_net_utilisation:sum_irate)", + "cluster_net_bytes_transmitted": "sum(node:node_net_bytes_transmitted:sum_irate)", + "cluster_net_bytes_received": "sum(node:node_net_bytes_received:sum_irate)", + "cluster_disk_read_iops": "sum(node:data_volume_iops_reads:sum)", + "cluster_disk_write_iops": "sum(node:data_volume_iops_writes:sum)", + "cluster_disk_read_throughput": "sum(node:data_volume_throughput_bytes_read:sum)", + "cluster_disk_write_throughput": "sum(node:data_volume_throughput_bytes_written:sum)", + "cluster_disk_size_usage": `sum(sum by (node) ((node_filesystem_size{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:)) - sum(sum by (node) ((node_filesystem_avail{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:))`, + "cluster_disk_size_utilisation": `(sum(sum by (node) ((node_filesystem_size{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:)) - sum(sum by (node) ((node_filesystem_avail{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:))) / sum(sum by (node) ((node_filesystem_size{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:))`, + "cluster_disk_size_capacity": `sum(sum by (node) ((node_filesystem_size{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:))`, + "cluster_disk_size_available": `sum(sum by (node) ((node_filesystem_avail{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:))`, + + //node + "node_cpu_utilisation": "node:node_cpu_utilisation:avg1m", + "node_cpu_total": "node:node_num_cpu:sum", + "node_memory_utilisation": "node:node_memory_utilisation:", + "node_memory_bytes_available": "node:node_memory_bytes_available:sum", + "node_memory_bytes_total": "node:node_memory_bytes_total:sum", + // Node network utilisation (bytes received + bytes transmitted per second) + "node_net_utilisation": "node:node_net_utilisation:sum_irate", + // Node network bytes transmitted per second + "node_net_bytes_transmitted": "node:node_net_bytes_transmitted:sum_irate", + // Node network bytes received per second + "node_net_bytes_received": "node:node_net_bytes_received:sum_irate", + + // node:data_volume_iops_reads:sum{node=~"i-5xcldxos|i-6soe9zl1"} + "node_disk_read_iops": "node:data_volume_iops_reads:sum", + // node:data_volume_iops_writes:sum{node=~"i-5xcldxos|i-6soe9zl1"} + "node_disk_write_iops": "node:data_volume_iops_writes:sum", + // node:data_volume_throughput_bytes_read:sum{node=~"i-5xcldxos|i-6soe9zl1"} + "node_disk_read_throughput": "node:data_volume_throughput_bytes_read:sum", + // node:data_volume_throughput_bytes_written:sum{node=~"i-5xcldxos|i-6soe9zl1"} + "node_disk_write_throughput": "node:data_volume_throughput_bytes_written:sum", + + "node_disk_size_capacity": `sum by (node) ((node_filesystem_size{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1)`, + "node_disk_size_available": `sum by (node) ((node_filesystem_avail{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1)`, + "node_disk_size_usage": `sum by (node) ((node_filesystem_size{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1) -sum by (node) ((node_filesystem_avail{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1)`, + "node_disk_size_utilisation": `sum by (node) (((node_filesystem_size{mountpoint="/", job="node-exporter"} - node_filesystem_avail{mountpoint="/", job="node-exporter"}) / node_filesystem_size{mountpoint="/", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1)`, + "node_pod_count": `count(kube_pod_info$1 unless on(pod) kube_pod_completion_time) by (node)`, + // without log node: unless on(node) kube_node_labels{label_role="log"} + "node_pod_quota": `sum(kube_node_status_capacity_pods$1) by (node)`, + "node_cpu_usage": `sum by (node) (label_join(irate(container_cpu_usage_seconds_total{job="kubelet", image!=""}[5m]), "pod", " ", "pod_name") * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1)`, + "node_memory_bytes_usage": "node:node_memory_bytes_total:sum$1 - node:node_memory_bytes_available:sum$1", + + //namespace + "namespace_cpu_usage": `namespace:container_cpu_usage_seconds_total:sum_rate{namespace=~"$1"}`, + "namespace_memory_usage": `namespace:container_memory_usage_bytes:sum{namespace=~"$1"}`, + "namespace_memory_usage_wo_cache": `namespace:container_memory_usage_bytes_wo_cache:sum{namespace=~"$1"}`, + "namespace_net_bytes_transmitted": `sum by (namespace) (irate(container_network_transmit_bytes_total{namespace=~"$1", pod_name!="", interface="eth0", job="kubelet"}[5m]))`, + "namespace_net_bytes_received": `sum by (namespace) (irate(container_network_receive_bytes_total{namespace=~"$1", pod_name!="", interface="eth0", job="kubelet"}[5m]))`, + "namespace_pod_count": `count(kube_pod_info{namespace=~"$1"} unless on(pod) kube_pod_completion_time) by (namespace)`, + + // pod + "pod_cpu_usage": `sum(irate(container_cpu_usage_seconds_total{job="kubelet", namespace="$1", pod_name="$2", image!=""}[5m])) by (namespace, pod_name)`, + "pod_memory_usage": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name="$2", image!=""}) by (namespace, pod_name)`, + "pod_memory_usage_wo_cache": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name="$2", image!=""} - container_memory_cache{job="kubelet", namespace="$1", pod_name="$2",image!=""}) by (namespace, pod_name)`, + "pod_net_bytes_transmitted": `sum by (namespace, pod_name) (irate(container_network_transmit_bytes_total{namespace="$1", pod_name!="", pod_name="$2", interface="eth0", job="kubelet"}[5m]))`, + "pod_net_bytes_received": `sum by (namespace, pod_name) (irate(container_network_receive_bytes_total{namespace="$1", pod_name!="", pod_name="$2", interface="eth0", job="kubelet"}[5m]))`, + + "pod_cpu_usage_all": `sum(irate(container_cpu_usage_seconds_total{job="kubelet", namespace="$1", pod_name=~"$2", image!=""}[5m])) by (namespace, pod_name)`, + "pod_memory_usage_all": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name=~"$2", image!=""}) by (namespace, pod_name)`, + "pod_memory_usage_wo_cache_all": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name=~"$2", image!=""} - container_memory_cache{job="kubelet", namespace="$1", pod_name=~"$2", image!=""}) by (namespace, pod_name)`, + "pod_net_bytes_transmitted_all": `sum by (namespace, pod_name) (irate(container_network_transmit_bytes_total{namespace="$1", pod_name!="", pod_name=~"$2", interface="eth0", job="kubelet"}[5m]))`, + "pod_net_bytes_received_all": `sum by (namespace, pod_name) (irate(container_network_receive_bytes_total{namespace="$1", pod_name!="", pod_name=~"$2", interface="eth0", job="kubelet"}[5m]))`, + + "pod_cpu_usage_node": `sum by (node, pod) (label_join(irate(container_cpu_usage_seconds_total{job="kubelet",pod_name=~"$2", image!=""}[5m]), "pod", " ", "pod_name") * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{node=~"$3"})`, + "pod_memory_usage_node": `sum by (node, pod) (label_join(container_memory_usage_bytes{job="kubelet",pod_name=~"$2", image!=""}, "pod", " ", "pod_name") * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{node=~"$3"})`, + "pod_memory_usage_wo_cache_node": `sum by (node, pod) ((label_join(container_memory_usage_bytes{job="kubelet",pod_name=~"$2", image!=""}, "pod", " ", "pod_name") - label_join(container_memory_cache{job="kubelet",pod_name=~"$2", image!=""}, "pod", " ", "pod_name")) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{node=~"$3"})`, + + // container + "container_cpu_usage": `sum(irate(container_cpu_usage_seconds_total{namespace="$1", pod_name="$2", container_name="$3"}[5m])) by (namespace, pod_name, container_name)`, + "container_cpu_usage_all": `sum(irate(container_cpu_usage_seconds_total{namespace="$1", pod_name="$2", container_name=~"$3", container_name!="POD"}[5m])) by (namespace, pod_name, container_name)`, + + "container_memory_usage_wo_cache": `container_memory_usage_bytes{namespace="$1", pod_name="$2", container_name="$3"} - ignoring(id, image, endpoint, instance, job, name, service) container_memory_cache{namespace="$1", pod_name="$2", container_name="$3"}`, + "container_memory_usage_wo_cache_all": `container_memory_usage_bytes{namespace="$1", pod_name="$2", container_name=~"$3", container_name!="POD"} - ignoring(id, image, endpoint, instance, job, name, service) container_memory_cache{namespace="$1", pod_name="$2", container_name=~"$3", container_name!="POD"}`, + "container_memory_usage": `container_memory_usage_bytes{namespace="$1", pod_name="$2", container_name="$3"}`, + "container_memory_usage_all": `container_memory_usage_bytes{namespace="$1", pod_name="$2", container_name=~"$3", container_name!="POD"}`, + + // enterprise + "workspace_cpu_usage": `sum(namespace:container_cpu_usage_seconds_total:sum_rate{namespace =~"$1"})`, + "workspace_memory_usage": `sum(namespace:container_memory_usage_bytes:sum{namespace =~"$1"})`, + "workspace_memory_usage_wo_cache": `sum(namespace:container_memory_usage_bytes_wo_cache:sum{namespace =~"$1"})`, + "workspace_net_bytes_transmitted": `sum(sum by (namespace) (irate(container_network_transmit_bytes_total{namespace=~"$1", pod_name!="", interface="eth0", job="kubelet"}[5m])))`, + "workspace_net_bytes_received": `sum(sum by (namespace) (irate(container_network_receive_bytes_total{namespace=~"$1", pod_name!="", interface="eth0", job="kubelet"}[5m])))`, + "workspace_pod_count": `sum(count(kube_pod_info{namespace=~"$1"} unless on(pod) kube_pod_completion_time) by (namespace))`, +} diff --git a/pkg/models/metrics/metrics_rule.go b/pkg/models/metrics/metricsrule.go similarity index 94% rename from pkg/models/metrics/metrics_rule.go rename to pkg/models/metrics/metricsrule.go index 14be69488..ee3ee3b70 100644 --- a/pkg/models/metrics/metrics_rule.go +++ b/pkg/models/metrics/metricsrule.go @@ -67,6 +67,12 @@ func MakeWorkloadRule(request *restful.Request) string { return rule } +func MakeWorkspacePromQL(metricsName string, namespaceRe2 string) string { + promql := RulePromQLTmplMap[metricsName] + promql = strings.Replace(promql, "$1", namespaceRe2, -1) + return promql +} + func MakeContainerPromQL(request *restful.Request) string { nsName := strings.Trim(request.PathParameter("ns_name"), " ") poName := strings.Trim(request.PathParameter("pod_name"), " ") @@ -169,9 +175,8 @@ func MakeNodeorClusterRule(request *restful.Request, metricsName string) string if nodesFilter == "" { nodesFilter = ".*" } - if strings.Contains(metricsName, "disk") && (!(strings.Contains(metricsName, "read") || strings.Contains(metricsName, "write"))) { + if strings.Contains(metricsName, "disk_size") || strings.Contains(metricsName, "pod") || strings.Contains(metricsName, "usage") { // disk size promql - nodesFilter := "" if nodeID != "" { nodesFilter = "{" + "node" + "=" + "\"" + nodeID + "\"" + "}" } else { diff --git a/pkg/models/metrics/metrics_struct.go b/pkg/models/metrics/metricsstruct.go similarity index 100% rename from pkg/models/metrics/metrics_struct.go rename to pkg/models/metrics/metricsstruct.go