From 1df035ed06ca3fec01a1e642b243fa06d0a5d9b8 Mon Sep 17 00:00:00 2001 From: junotx Date: Mon, 7 Dec 2020 17:59:42 +0800 Subject: [PATCH] support for querying pods metrics from the cluster level Signed-off-by: junotx --- pkg/kapis/monitoring/v1alpha3/register.go | 18 ++++++++++++++++++ .../client/monitoring/prometheus/promql.go | 6 ++++++ .../monitoring/prometheus/promql_test.go | 7 +++++++ .../monitoring/prometheus/testdata/promqls.go | 1 + 4 files changed, 32 insertions(+) diff --git a/pkg/kapis/monitoring/v1alpha3/register.go b/pkg/kapis/monitoring/v1alpha3/register.go index 3a799bef2..f1e778ce3 100644 --- a/pkg/kapis/monitoring/v1alpha3/register.go +++ b/pkg/kapis/monitoring/v1alpha3/register.go @@ -219,6 +219,24 @@ func AddToContainer(c *restful.Container, k8sClient kubernetes.Interface, monito Returns(http.StatusOK, respOK, model.Metrics{})). Produces(restful.MIME_JSON) + ws.Route(ws.GET("/pods"). + To(h.handlePodMetricsQuery). + Doc("Get pod-level metric data of the whole cluster's pods."). + Param(ws.QueryParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both pod CPU usage and memory usage: `pod_cpu_usage|pod_memory_usage`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). + Param(ws.QueryParameter("resources_filter", "The pod filter consists of a regexp pattern. It specifies which pod data to return. For example, the following filter matches any pod whose name begins with redis: `redis.*`.").DataType("string").Required(false)). + Param(ws.QueryParameter("start", "Start time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1559347200. ").DataType("string").Required(false)). + Param(ws.QueryParameter("end", "End time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1561939200. ").DataType("string").Required(false)). + Param(ws.QueryParameter("step", "Time interval. Retrieve metric data at a fixed interval within the time range of start and end. It requires both **start** and **end** are provided. The format is [0-9]+[smhdwy]. Defaults to 10m (i.e. 10 min).").DataType("string").DefaultValue("10m").Required(false)). + Param(ws.QueryParameter("time", "A timestamp in Unix time format. Retrieve metric data at a single point in time. Defaults to now. Time and the combination of start, end, step are mutually exclusive.").DataType("string").Required(false)). + Param(ws.QueryParameter("sort_metric", "Sort pods by the specified metric. Not applicable if **start** and **end** are provided.").DataType("string").Required(false)). + Param(ws.QueryParameter("sort_type", "Sort order. One of asc, desc.").DefaultValue("desc.").DataType("string").Required(false)). + Param(ws.QueryParameter("page", "The page number. This field paginates result data of each metric, then returns a specific page. For example, setting **page** to 2 returns the second page. It only applies to sorted metric data.").DataType("integer").Required(false)). + Param(ws.QueryParameter("limit", "Page size, the maximum number of results in a single page. Defaults to 5.").DataType("integer").Required(false).DefaultValue("5")). + Metadata(restfulspec.KeyOpenAPITags, []string{constants.PodMetricsTag}). + Writes(model.Metrics{}). + Returns(http.StatusOK, respOK, model.Metrics{})). + Produces(restful.MIME_JSON) + ws.Route(ws.GET("/namespaces/{namespace}/pods"). To(h.handlePodMetricsQuery). Doc("Get pod-level metric data of the specific namespace's pods."). diff --git a/pkg/simple/client/monitoring/prometheus/promql.go b/pkg/simple/client/monitoring/prometheus/promql.go index d76b031b4..a04c86565 100644 --- a/pkg/simple/client/monitoring/prometheus/promql.go +++ b/pkg/simple/client/monitoring/prometheus/promql.go @@ -349,6 +349,12 @@ func makePodMetricExpr(tmpl string, o monitoring.QueryOptions) string { } } + // For monitoring pods in the whole cluster + // Get /pods + if o.NamespaceName == "" && o.NodeName == "" { + podSelector = fmt.Sprintf(`pod=~"%s"`, o.ResourceFilter) + } + // For monitoring pods in the specific namespace // GET /namespaces/{namespace}/workloads/{kind}/{workload}/pods or // GET /namespaces/{namespace}/pods/{pod} or diff --git a/pkg/simple/client/monitoring/prometheus/promql_test.go b/pkg/simple/client/monitoring/prometheus/promql_test.go index dec976a91..60f71d1bd 100644 --- a/pkg/simple/client/monitoring/prometheus/promql_test.go +++ b/pkg/simple/client/monitoring/prometheus/promql_test.go @@ -135,6 +135,13 @@ func TestMakeExpr(t *testing.T) { PodName: "elasticsearch-12345", }, }, + { + name: "pod_net_bytes_transmitted", + opts: monitoring.QueryOptions{ + Level: monitoring.LevelPod, + ResourceFilter: "elasticsearch-0", + }, + }, { name: "container_cpu_usage", opts: monitoring.QueryOptions{ diff --git a/pkg/simple/client/monitoring/prometheus/testdata/promqls.go b/pkg/simple/client/monitoring/prometheus/testdata/promqls.go index 65c035c64..56112c51a 100644 --- a/pkg/simple/client/monitoring/prometheus/testdata/promqls.go +++ b/pkg/simple/client/monitoring/prometheus/testdata/promqls.go @@ -31,6 +31,7 @@ var PromQLs = map[string]string{ "pod_cpu_usage": `round(sum by (namespace, pod) (irate(container_cpu_usage_seconds_total{job="kubelet", pod!="", image!=""}[5m])) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{owner_kind="ReplicaSet", owner_name=~"^elasticsearch-[^-]{1,10}$"} * on (namespace, pod) group_left(node) kube_pod_info{pod=~"elasticsearch-0", namespace="default"}, 0.001)`, "pod_memory_usage": `sum by (namespace, pod) (container_memory_usage_bytes{job="kubelet", pod!="", image!=""}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{} * on (namespace, pod) group_left(node) kube_pod_info{pod="elasticsearch-12345", namespace="default"}`, "pod_memory_usage_wo_cache": `sum by (namespace, pod) (container_memory_working_set_bytes{job="kubelet", pod!="", image!=""}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{} * on (namespace, pod) group_left(node) kube_pod_info{pod="elasticsearch-12345", node="i-2dazc1d6"}`, + "pod_net_bytes_transmitted": `sum by (namespace, pod) (irate(container_network_transmit_bytes_total{pod!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m])) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{} * on (namespace, pod) group_left(node) kube_pod_info{pod=~"elasticsearch-0"}`, "container_cpu_usage": `round(sum by (namespace, pod, container) (irate(container_cpu_usage_seconds_total{job="kubelet", container!="POD", container!="", image!="", pod="elasticsearch-12345", namespace="default", container="syscall"}[5m])), 0.001)`, "container_memory_usage": `sum by (namespace, pod, container) (container_memory_usage_bytes{job="kubelet", container!="POD", container!="", image!="", pod="elasticsearch-12345", namespace="default", container=~"syscall"})`, "pvc_inodes_available": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_inodes_free) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{namespace="default", persistentvolumeclaim="db-123"}`,