From fc56333d3cd5502971ba71dfd79eabea1e9b30a1 Mon Sep 17 00:00:00 2001 From: huanggze Date: Fri, 24 Apr 2020 15:56:07 +0800 Subject: [PATCH] monitoring: some bug fix Signed-off-by: huanggze --- pkg/kapis/monitoring/v1alpha3/helper.go | 27 ++- pkg/kapis/monitoring/v1alpha3/helper_test.go | 59 +++++- .../label_replace.go => prometheus.go} | 25 ++- ...bel_replace_test.go => prometheus_test.go} | 2 +- pkg/models/monitoring/expressions/registry.go | 2 +- pkg/models/monitoring/sort_page_test.go | 94 ++++++++-- .../testdata/null-node-metrics-sorted.json | 53 ++++++ .../testdata/null-node-metrics.json | 45 +++++ .../monitoring/prometheus/prometheus.go | 19 +- .../monitoring/prometheus/prometheus_test.go | 30 ++- .../client/monitoring/prometheus/promql.go | 70 +++---- .../monitoring/prometheus/promql_test.go | 171 +++++++++++++++--- .../prometheus/testdata/metadata-prom.json | 10 + .../monitoring/prometheus/testdata/promqls.go | 12 +- pkg/simple/client/monitoring/query_options.go | 2 + pkg/simple/client/monitoring/types.go | 7 +- 16 files changed, 507 insertions(+), 121 deletions(-) rename pkg/models/monitoring/expressions/{prometheus/label_replace.go => prometheus.go} (71%) rename pkg/models/monitoring/expressions/{prometheus/label_replace_test.go => prometheus_test.go} (98%) create mode 100644 pkg/models/monitoring/testdata/null-node-metrics-sorted.json create mode 100644 pkg/models/monitoring/testdata/null-node-metrics.json diff --git a/pkg/kapis/monitoring/v1alpha3/helper.go b/pkg/kapis/monitoring/v1alpha3/helper.go index 363822b56..9bbb2e2f8 100644 --- a/pkg/kapis/monitoring/v1alpha3/helper.go +++ b/pkg/kapis/monitoring/v1alpha3/helper.go @@ -235,22 +235,33 @@ func (h handler) makeQueryOptions(r reqParams, lvl monitoring.Level) (q queryOpt if err != nil { return q, err } - cts := ns.CreationTimestamp.Time - if q.start.Before(cts) { - q.start = cts - } - if q.end.Before(cts) { - return q, errors.New(ErrNoHit) + + // Query should happen no earlier than namespace's creation time. + // For range query, check and mutate `start`. For instant query, check and mutate `time`. + // In range query, if `start` and `end` are both before namespace's creation time, it causes no hit. + if !q.isRangeQuery() { + if q.time.Before(cts) { + q.time = cts + } + } else { + if q.start.Before(cts) { + q.start = cts + } + if q.end.Before(cts) { + return q, errors.New(ErrNoHit) + } } + } // Parse sorting and paging params if r.target != "" { + q.target = r.target q.page = DefaultPage q.limit = DefaultLimit - if q.order != model.OrderAscending { - r.order = DefaultOrder + if r.order != model.OrderAscending { + q.order = DefaultOrder } if r.page != "" { q.page, err = strconv.Atoi(r.page) diff --git a/pkg/kapis/monitoring/v1alpha3/helper_test.go b/pkg/kapis/monitoring/v1alpha3/helper_test.go index 4a099622e..b7aed1b23 100644 --- a/pkg/kapis/monitoring/v1alpha3/helper_test.go +++ b/pkg/kapis/monitoring/v1alpha3/helper_test.go @@ -2,12 +2,12 @@ package v1alpha3 import ( "fmt" + "github.com/google/go-cmp/cmp" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes/fake" model "kubesphere.io/kubesphere/pkg/models/monitoring" "kubesphere.io/kubesphere/pkg/simple/client/monitoring" - "reflect" "testing" "time" ) @@ -100,6 +100,32 @@ func TestParseRequestParams(t *testing.T) { }, expectedErr: false, }, + { + params: reqParams{ + time: "1585830000", + namespaceName: "default", + }, + lvl: monitoring.LevelNamespace, + namespace: corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "default", + CreationTimestamp: metav1.Time{ + Time: time.Unix(1585836666, 0), + }, + }, + }, + expected: queryOptions{ + time: time.Unix(1585836666, 0), + identifier: model.IdentifierNamespace, + metricFilter: ".*", + namedMetrics: model.NamespaceMetrics, + option: monitoring.NamespaceOption{ + ResourceFilter: ".*", + NamespaceName: "default", + }, + }, + expectedErr: false, + }, { params: reqParams{ start: "1585830000", @@ -151,6 +177,33 @@ func TestParseRequestParams(t *testing.T) { }, expectedErr: false, }, + { + params: reqParams{ + time: "1585830000", + workspaceName: "system-workspace", + metricFilter: "namespace_memory_usage_wo_cache|namespace_memory_limit_hard|namespace_cpu_usage", + page: "1", + limit: "10", + order: "desc", + target: "namespace_cpu_usage", + }, + lvl: monitoring.LevelNamespace, + expected: queryOptions{ + time: time.Unix(1585830000, 0), + metricFilter: "namespace_memory_usage_wo_cache|namespace_memory_limit_hard|namespace_cpu_usage", + namedMetrics: model.NamespaceMetrics, + option: monitoring.NamespaceOption{ + ResourceFilter: ".*", + WorkspaceName: "system-workspace", + }, + target: "namespace_cpu_usage", + identifier: "namespace", + order: "desc", + page: 1, + limit: 10, + }, + expectedErr: false, + }, } for i, tt := range tests { @@ -170,8 +223,8 @@ func TestParseRequestParams(t *testing.T) { t.Fatalf("failed to catch error.") } - if !reflect.DeepEqual(result, tt.expected) { - t.Fatalf("unexpected return: %v.", result) + if diff := cmp.Diff(result, tt.expected, cmp.AllowUnexported(result, tt.expected)); diff != "" { + t.Fatalf("%T differ (-got, +want): %s", tt.expected, diff) } }) } diff --git a/pkg/models/monitoring/expressions/prometheus/label_replace.go b/pkg/models/monitoring/expressions/prometheus.go similarity index 71% rename from pkg/models/monitoring/expressions/prometheus/label_replace.go rename to pkg/models/monitoring/expressions/prometheus.go index 6e541598f..9bff99142 100644 --- a/pkg/models/monitoring/expressions/prometheus/label_replace.go +++ b/pkg/models/monitoring/expressions/prometheus.go @@ -1,15 +1,14 @@ -package prometheus +package expressions import ( "fmt" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/storage/metric" - "kubesphere.io/kubesphere/pkg/models/monitoring/expressions" ) func init() { - expressions.Register("prometheus", labelReplace) + register("prometheus", labelReplace) } func labelReplace(input, ns string) (string, error) { @@ -18,7 +17,7 @@ func labelReplace(input, ns string) (string, error) { return "", err } - SetRecursive(root, ns) + setRecursive(root, ns) if err != nil { return "", err } @@ -27,39 +26,39 @@ func labelReplace(input, ns string) (string, error) { } // Inspired by https://github.com/openshift/prom-label-proxy -func SetRecursive(node promql.Node, namespace string) (err error) { +func setRecursive(node promql.Node, namespace string) (err error) { switch n := node.(type) { case *promql.EvalStmt: - if err := SetRecursive(n.Expr, namespace); err != nil { + if err := setRecursive(n.Expr, namespace); err != nil { return err } case promql.Expressions: for _, e := range n { - if err := SetRecursive(e, namespace); err != nil { + if err := setRecursive(e, namespace); err != nil { return err } } case *promql.AggregateExpr: - if err := SetRecursive(n.Expr, namespace); err != nil { + if err := setRecursive(n.Expr, namespace); err != nil { return err } case *promql.BinaryExpr: - if err := SetRecursive(n.LHS, namespace); err != nil { + if err := setRecursive(n.LHS, namespace); err != nil { return err } - if err := SetRecursive(n.RHS, namespace); err != nil { + if err := setRecursive(n.RHS, namespace); err != nil { return err } case *promql.Call: - if err := SetRecursive(n.Args, namespace); err != nil { + if err := setRecursive(n.Args, namespace); err != nil { return err } case *promql.ParenExpr: - if err := SetRecursive(n.Expr, namespace); err != nil { + if err := setRecursive(n.Expr, namespace); err != nil { return err } case *promql.UnaryExpr: - if err := SetRecursive(n.Expr, namespace); err != nil { + if err := setRecursive(n.Expr, namespace); err != nil { return err } case *promql.NumberLiteral, *promql.StringLiteral: diff --git a/pkg/models/monitoring/expressions/prometheus/label_replace_test.go b/pkg/models/monitoring/expressions/prometheus_test.go similarity index 98% rename from pkg/models/monitoring/expressions/prometheus/label_replace_test.go rename to pkg/models/monitoring/expressions/prometheus_test.go index d265dc093..05fafdaa9 100644 --- a/pkg/models/monitoring/expressions/prometheus/label_replace_test.go +++ b/pkg/models/monitoring/expressions/prometheus_test.go @@ -1,4 +1,4 @@ -package prometheus +package expressions import ( "fmt" diff --git a/pkg/models/monitoring/expressions/registry.go b/pkg/models/monitoring/expressions/registry.go index 12a606ad4..8775cd136 100644 --- a/pkg/models/monitoring/expressions/registry.go +++ b/pkg/models/monitoring/expressions/registry.go @@ -4,6 +4,6 @@ type labelReplaceFn func(expr, ns string) (string, error) var ReplaceNamespaceFns = make(map[string]labelReplaceFn) -func Register(name string, fn labelReplaceFn) { +func register(name string, fn labelReplaceFn) { ReplaceNamespaceFns[name] = fn } diff --git a/pkg/models/monitoring/sort_page_test.go b/pkg/models/monitoring/sort_page_test.go index 6dce082d3..cdb750cef 100644 --- a/pkg/models/monitoring/sort_page_test.go +++ b/pkg/models/monitoring/sort_page_test.go @@ -10,21 +10,52 @@ import ( func TestSort(t *testing.T) { tests := []struct { - name string target string order string identifier string - source string + raw string expected string }{ - {"sort in ascending order", "node_cpu_utilisation", "asc", "node", "source-node-metrics.json", "sorted-node-metrics-asc.json"}, - {"sort in descending order", "node_memory_utilisation", "desc", "node", "source-node-metrics.json", "sorted-node-metrics-desc.json"}, - {"sort faulty metrics", "node_memory_utilisation", "desc", "node", "faulty-node-metrics.json", "faulty-node-metrics-sorted.json"}, - {"sort metrics with an blank node", "node_memory_utilisation", "desc", "node", "blank-node-metrics.json", "blank-node-metrics-sorted.json"}, + { + target: "node_cpu_utilisation", + order: "asc", + identifier: "node", + raw: "source-node-metrics.json", + expected: "sorted-node-metrics-asc.json", + }, + { + target: "node_memory_utilisation", + order: "desc", + identifier: "node", + raw: "source-node-metrics.json", + expected: "sorted-node-metrics-desc.json", + }, + { + target: "node_memory_utilisation", + order: "desc", + identifier: "node", + raw: "faulty-node-metrics.json", + expected: "faulty-node-metrics-sorted.json", + }, + { + target: "node_memory_utilisation", + order: "desc", + identifier: "node", + raw: "blank-node-metrics.json", + expected: "blank-node-metrics-sorted.json", + }, + { + target: "node_memory_utilisation", + order: "desc", + identifier: "node", + raw: "null-node-metrics.json", + expected: "null-node-metrics-sorted.json", + }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - source, expected, err := jsonFromFile(tt.source, tt.expected) + + for i, tt := range tests { + t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + source, expected, err := jsonFromFile(tt.raw, tt.expected) if err != nil { t.Fatal(err) } @@ -39,21 +70,46 @@ func TestSort(t *testing.T) { func TestPage(t *testing.T) { tests := []struct { - name string page int limit int - source string + raw string expected string }{ - {"page 0 limit 5", 0, 5, "sorted-node-metrics-asc.json", "sorted-node-metrics-asc.json"}, - {"page 1 limit 5", 1, 5, "sorted-node-metrics-asc.json", "paged-node-metrics-1.json"}, - {"page 2 limit 5", 2, 5, "sorted-node-metrics-asc.json", "paged-node-metrics-2.json"}, - {"page 3 limit 5", 3, 5, "sorted-node-metrics-asc.json", "paged-node-metrics-3.json"}, - {"page faulty metrics", 1, 2, "faulty-node-metrics-sorted.json", "faulty-node-metrics-paged.json"}, + { + page: 0, + limit: 5, + raw: "sorted-node-metrics-asc.json", + expected: "sorted-node-metrics-asc.json", + }, + { + page: 1, + limit: 5, + raw: "sorted-node-metrics-asc.json", + expected: "paged-node-metrics-1.json", + }, + { + page: 2, + limit: 5, + raw: "sorted-node-metrics-asc.json", + expected: "paged-node-metrics-2.json", + }, + { + page: 3, + limit: 5, + raw: "sorted-node-metrics-asc.json", + expected: "paged-node-metrics-3.json", + }, + { + page: 1, + limit: 2, + raw: "faulty-node-metrics-sorted.json", + expected: "faulty-node-metrics-paged.json", + }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - source, expected, err := jsonFromFile(tt.source, tt.expected) + + for i, tt := range tests { + t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + source, expected, err := jsonFromFile(tt.raw, tt.expected) if err != nil { t.Fatal(err) } diff --git a/pkg/models/monitoring/testdata/null-node-metrics-sorted.json b/pkg/models/monitoring/testdata/null-node-metrics-sorted.json new file mode 100644 index 000000000..405270145 --- /dev/null +++ b/pkg/models/monitoring/testdata/null-node-metrics-sorted.json @@ -0,0 +1,53 @@ +{ + "results": [ + { + "metric_name": "node_disk_size_utilisation", + "data": { + "resultType": "vector", + "result": [ + {}, + {}, + {} + ] + } + }, + { + "metric_name": "node_memory_utilisation", + "data": { + "resultType": "vector", + "result": [ + { + "metric": { + "node": "i-2dazc1d6" + }, + "value": [ + 1585658599.195, + 0.5286875837861773 + ] + }, + { + "metric": { + "node": "i-hgcoippu" + }, + "value": [ + 1585658599.195, + 0.2497060264216553 + ] + }, + { + "metric": { + "node": "i-ezjb7gsk" + }, + "value": [ + 1585658599.195, + 0.23637090535053928 + ] + } + ] + } + } + ], + "page": 1, + "total_page": 1, + "total_item": 3 +} \ No newline at end of file diff --git a/pkg/models/monitoring/testdata/null-node-metrics.json b/pkg/models/monitoring/testdata/null-node-metrics.json new file mode 100644 index 000000000..23f9468f9 --- /dev/null +++ b/pkg/models/monitoring/testdata/null-node-metrics.json @@ -0,0 +1,45 @@ +{ + "results": [ + { + "metric_name": "node_disk_size_utilisation", + "data": { + "resultType": "vector" + } + }, + { + "metric_name": "node_memory_utilisation", + "data": { + "resultType": "vector", + "result": [ + { + "metric": { + "node": "i-2dazc1d6" + }, + "value": [ + 1585658599.195, + 0.5286875837861773 + ] + }, + { + "metric": { + "node": "i-ezjb7gsk" + }, + "value": [ + 1585658599.195, + 0.23637090535053928 + ] + }, + { + "metric": { + "node": "i-hgcoippu" + }, + "value": [ + 1585658599.195, + 0.2497060264216553 + ] + } + ] + } + } + ] +} \ No newline at end of file diff --git a/pkg/simple/client/monitoring/prometheus/prometheus.go b/pkg/simple/client/monitoring/prometheus/prometheus.go index a40571c13..86e5662da 100644 --- a/pkg/simple/client/monitoring/prometheus/prometheus.go +++ b/pkg/simple/client/monitoring/prometheus/prometheus.go @@ -138,13 +138,20 @@ func (p prometheus) GetMetadata(namespace string) []monitoring.Metadata { return meta } + // Deduplication + set := make(map[string]bool) for _, item := range items { - meta = append(meta, monitoring.Metadata{ - Metric: item.Metric, - Type: string(item.Type), - Help: item.Help, - }) + _, ok := set[item.Metric] + if !ok { + set[item.Metric] = true + meta = append(meta, monitoring.Metadata{ + Metric: item.Metric, + Type: string(item.Type), + Help: item.Help, + }) + } } + return meta } @@ -186,7 +193,7 @@ func parseQueryResp(value model.Value) monitoring.MetricData { mv.Metadata[string(k)] = string(v) } - mv.Sample = monitoring.Point{float64(v.Timestamp) / 1000, float64(v.Value)} + mv.Sample = &monitoring.Point{float64(v.Timestamp) / 1000, float64(v.Value)} res.MetricValues = append(res.MetricValues, mv) } diff --git a/pkg/simple/client/monitoring/prometheus/prometheus_test.go b/pkg/simple/client/monitoring/prometheus/prometheus_test.go index 16c60ee0e..35d018c82 100644 --- a/pkg/simple/client/monitoring/prometheus/prometheus_test.go +++ b/pkg/simple/client/monitoring/prometheus/prometheus_test.go @@ -14,16 +14,21 @@ import ( func TestGetNamedMetrics(t *testing.T) { tests := []struct { - name string fakeResp string expected string }{ - {"prom returns good values", "metrics-vector-type-prom.json", "metrics-vector-type-res.json"}, - {"prom returns error", "metrics-error-prom.json", "metrics-error-res.json"}, + { + fakeResp: "metrics-vector-type-prom.json", + expected: "metrics-vector-type-res.json", + }, + { + fakeResp: "metrics-error-prom.json", + expected: "metrics-error-res.json", + }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { + for i, tt := range tests { + t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { expected := make([]monitoring.Metric, 0) err := jsonFromFile(tt.expected, &expected) if err != nil { @@ -44,16 +49,21 @@ func TestGetNamedMetrics(t *testing.T) { func TestGetNamedMetricsOverTime(t *testing.T) { tests := []struct { - name string fakeResp string expected string }{ - {"prom returns good values", "metrics-matrix-type-prom.json", "metrics-matrix-type-res.json"}, - {"prom returns error", "metrics-error-prom.json", "metrics-error-res.json"}, + { + fakeResp: "metrics-matrix-type-prom.json", + expected: "metrics-matrix-type-res.json", + }, + { + fakeResp: "metrics-error-prom.json", + expected: "metrics-error-res.json", + }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { + for i, tt := range tests { + t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { expected := make([]monitoring.Metric, 0) err := jsonFromFile(tt.expected, &expected) if err != nil { diff --git a/pkg/simple/client/monitoring/prometheus/promql.go b/pkg/simple/client/monitoring/prometheus/promql.go index 06ced3d34..ed411ca23 100644 --- a/pkg/simple/client/monitoring/prometheus/promql.go +++ b/pkg/simple/client/monitoring/prometheus/promql.go @@ -114,8 +114,8 @@ var promQLTemplates = map[string]string{ "workspace_cpu_usage": `round(sum by (label_kubesphere_io_workspace) (namespace:container_cpu_usage_seconds_total:sum_rate{namespace!="", $1}), 0.001)`, "workspace_memory_usage": `sum by (label_kubesphere_io_workspace) (namespace:container_memory_usage_bytes:sum{namespace!="", $1})`, "workspace_memory_usage_wo_cache": `sum by (label_kubesphere_io_workspace) (namespace:container_memory_usage_bytes_wo_cache:sum{namespace!="", $1})`, - "workspace_net_bytes_transmitted": `sum by (label_kubesphere_io_workspace) (sum by (namespace) (irate(container_network_transmit_bytes_total{namespace!="", pod_name!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m])) * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, - "workspace_net_bytes_received": `sum by (label_kubesphere_io_workspace) (sum by (namespace) (irate(container_network_receive_bytes_total{namespace!="", pod_name!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m])) * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, + "workspace_net_bytes_transmitted": `sum by (label_kubesphere_io_workspace) (sum by (namespace) (irate(container_network_transmit_bytes_total{namespace!="", pod!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m])) * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, + "workspace_net_bytes_received": `sum by (label_kubesphere_io_workspace) (sum by (namespace) (irate(container_network_receive_bytes_total{namespace!="", pod!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m])) * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, "workspace_pod_count": `sum by (label_kubesphere_io_workspace) (kube_pod_status_phase{phase!~"Failed|Succeeded", namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{$1}))`, "workspace_pod_running_count": `sum by (label_kubesphere_io_workspace) (kube_pod_status_phase{phase="Running", namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{$1}))`, "workspace_pod_succeeded_count": `sum by (label_kubesphere_io_workspace) (kube_pod_status_phase{phase="Succeeded", namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{$1}))`, @@ -138,8 +138,8 @@ var promQLTemplates = map[string]string{ "namespace_cpu_usage": `round(namespace:container_cpu_usage_seconds_total:sum_rate{namespace!="", $1}, 0.001)`, "namespace_memory_usage": `namespace:container_memory_usage_bytes:sum{namespace!="", $1}`, "namespace_memory_usage_wo_cache": `namespace:container_memory_usage_bytes_wo_cache:sum{namespace!="", $1}`, - "namespace_net_bytes_transmitted": `sum by (namespace) (irate(container_network_transmit_bytes_total{namespace!="", pod_name!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m]) * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, - "namespace_net_bytes_received": `sum by (namespace) (irate(container_network_receive_bytes_total{namespace!="", pod_name!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m]) * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, + "namespace_net_bytes_transmitted": `sum by (namespace) (irate(container_network_transmit_bytes_total{namespace!="", pod!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m]) * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, + "namespace_net_bytes_received": `sum by (namespace) (irate(container_network_receive_bytes_total{namespace!="", pod!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m]) * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, "namespace_pod_count": `sum by (namespace) (kube_pod_status_phase{phase!~"Failed|Succeeded", namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, "namespace_pod_running_count": `sum by (namespace) (kube_pod_status_phase{phase="Running", namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, "namespace_pod_succeeded_count": `sum by (namespace) (kube_pod_status_phase{phase="Succeeded", namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, @@ -181,16 +181,16 @@ var promQLTemplates = map[string]string{ "workload_statefulset_unavailable_replicas_ratio": `namespace:statefulset_unavailable_replicas:ratio{$1}`, // pod - "pod_cpu_usage": `round(label_join(sum by (namespace, pod_name) (irate(container_cpu_usage_seconds_total{job="kubelet", pod_name!="", image!=""}[5m])), "pod", "", "pod_name") * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}, 0.001)`, - "pod_memory_usage": `label_join(sum by (namespace, pod_name) (container_memory_usage_bytes{job="kubelet", pod_name!="", image!=""}), "pod", "", "pod_name") * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`, - "pod_memory_usage_wo_cache": `label_join(sum by (namespace, pod_name) (container_memory_working_set_bytes{job="kubelet", pod_name!="", image!=""}), "pod", "", "pod_name") * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`, - "pod_net_bytes_transmitted": `label_join(sum by (namespace, pod_name) (irate(container_network_transmit_bytes_total{pod_name!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m])), "pod", "", "pod_name") * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`, - "pod_net_bytes_received": `label_join(sum by (namespace, pod_name) (irate(container_network_receive_bytes_total{pod_name!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m])), "pod", "", "pod_name") * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`, + "pod_cpu_usage": `round(sum by (namespace, pod) (irate(container_cpu_usage_seconds_total{job="kubelet", pod!="", image!=""}[5m])) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}, 0.001)`, + "pod_memory_usage": `sum by (namespace, pod) (container_memory_usage_bytes{job="kubelet", pod!="", image!=""}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`, + "pod_memory_usage_wo_cache": `sum by (namespace, pod) (container_memory_working_set_bytes{job="kubelet", pod!="", image!=""}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`, + "pod_net_bytes_transmitted": `sum by (namespace, pod) (irate(container_network_transmit_bytes_total{pod!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m])) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`, + "pod_net_bytes_received": `sum by (namespace, pod) (irate(container_network_receive_bytes_total{pod!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m])) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`, // container - "container_cpu_usage": `round(sum by (namespace, pod_name, container_name) (irate(container_cpu_usage_seconds_total{job="kubelet", container_name!="POD", container_name!="", image!="", $1}[5m])), 0.001)`, - "container_memory_usage": `sum by (namespace, pod_name, container_name) (container_memory_usage_bytes{job="kubelet", container_name!="POD", container_name!="", image!="", $1})`, - "container_memory_usage_wo_cache": `sum by (namespace, pod_name, container_name) (container_memory_working_set_bytes{job="kubelet", container_name!="POD", container_name!="", image!="", $1})`, + "container_cpu_usage": `round(sum by (namespace, pod, container) (irate(container_cpu_usage_seconds_total{job="kubelet", container!="POD", container!="", image!="", $1}[5m])), 0.001)`, + "container_memory_usage": `sum by (namespace, pod, container) (container_memory_usage_bytes{job="kubelet", container!="POD", container!="", image!="", $1})`, + "container_memory_usage_wo_cache": `sum by (namespace, pod, container) (container_memory_working_set_bytes{job="kubelet", container!="POD", container!="", image!="", $1})`, // pvc "pvc_inodes_available": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_inodes_free) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{$1}`, @@ -255,25 +255,25 @@ var promQLTemplates = map[string]string{ "prometheus_tsdb_head_samples_appended_rate": `prometheus:prometheus_tsdb_head_samples_appended:sum_rate`, } -func makeExpr(metric string, opt monitoring.QueryOptions) string { +func makeExpr(metric string, opts monitoring.QueryOptions) string { tmpl := promQLTemplates[metric] - switch opt.Level { + switch opts.Level { case monitoring.LevelCluster: return tmpl case monitoring.LevelNode: - return makeNodeMetricExpr(tmpl, opt) + return makeNodeMetricExpr(tmpl, opts) case monitoring.LevelWorkspace: - return makeWorkspaceMetricExpr(tmpl, opt) + return makeWorkspaceMetricExpr(tmpl, opts) case monitoring.LevelNamespace: - return makeNamespaceMetricExpr(tmpl, opt) + return makeNamespaceMetricExpr(tmpl, opts) case monitoring.LevelWorkload: - return makeWorkloadMetricExpr(tmpl, opt) + return makeWorkloadMetricExpr(metric, tmpl, opts) case monitoring.LevelPod: - return makePodMetricExpr(tmpl, opt) + return makePodMetricExpr(tmpl, opts) case monitoring.LevelContainer: - return makeContainerMetricExpr(tmpl, opt) + return makeContainerMetricExpr(tmpl, opts) case monitoring.LevelPVC: - return makePVCMetricExpr(tmpl, opt) + return makePVCMetricExpr(tmpl, opts) case monitoring.LevelComponent: return tmpl default: @@ -322,23 +322,31 @@ func makeNamespaceMetricExpr(tmpl string, o monitoring.QueryOptions) string { return strings.Replace(tmpl, "$1", namespaceSelector, -1) } -func makeWorkloadMetricExpr(tmpl string, o monitoring.QueryOptions) string { +func makeWorkloadMetricExpr(metric, tmpl string, o monitoring.QueryOptions) string { var kindSelector, workloadSelector string + switch o.WorkloadKind { case "deployment": o.WorkloadKind = Deployment - kindSelector = fmt.Sprintf(`namespace="%s", deployment!="", deployment=~"%s"`, o.NamespaceName, o.ResourceFilter) case "statefulset": o.WorkloadKind = StatefulSet - kindSelector = fmt.Sprintf(`namespace="%s", statefulset!="", statefulset=~"%s"`, o.NamespaceName, o.ResourceFilter) case "daemonset": o.WorkloadKind = DaemonSet - kindSelector = fmt.Sprintf(`namespace="%s", daemonset!="", daemonset=~"%s"`, o.NamespaceName, o.ResourceFilter) default: o.WorkloadKind = ".*" - kindSelector = fmt.Sprintf(`namespace="%s"`, o.NamespaceName) } workloadSelector = fmt.Sprintf(`namespace="%s", workload=~"%s:%s"`, o.NamespaceName, o.WorkloadKind, o.ResourceFilter) + + if strings.Contains(metric, "deployment") { + kindSelector = fmt.Sprintf(`namespace="%s", deployment!="", deployment=~"%s"`, o.NamespaceName, o.ResourceFilter) + } + if strings.Contains(metric, "statefulset") { + kindSelector = fmt.Sprintf(`namespace="%s", statefulset!="", statefulset=~"%s"`, o.NamespaceName, o.ResourceFilter) + } + if strings.Contains(metric, "daemonset") { + kindSelector = fmt.Sprintf(`namespace="%s", daemonset!="", daemonset=~"%s"`, o.NamespaceName, o.ResourceFilter) + } + return strings.NewReplacer("$1", workloadSelector, "$2", kindSelector).Replace(tmpl) } @@ -350,11 +358,11 @@ func makePodMetricExpr(tmpl string, o monitoring.QueryOptions) string { if o.WorkloadName != "" { switch o.WorkloadKind { case "deployment": - workloadSelector = fmt.Sprintf(`owner_kind="ReplicaSet", owner_name=~"^%s-[^-]{1,10}$"`, o.WorkloadKind) + workloadSelector = fmt.Sprintf(`owner_kind="ReplicaSet", owner_name=~"^%s-[^-]{1,10}$"`, o.WorkloadName) case "statefulset": - workloadSelector = fmt.Sprintf(`owner_kind="StatefulSet", owner_name="%s"`, o.WorkloadKind) + workloadSelector = fmt.Sprintf(`owner_kind="StatefulSet", owner_name="%s"`, o.WorkloadName) case "daemonset": - workloadSelector = fmt.Sprintf(`owner_kind="DaemonSet", owner_name="%s"`, o.WorkloadKind) + workloadSelector = fmt.Sprintf(`owner_kind="DaemonSet", owner_name="%s"`, o.WorkloadName) } } @@ -385,9 +393,9 @@ func makePodMetricExpr(tmpl string, o monitoring.QueryOptions) string { func makeContainerMetricExpr(tmpl string, o monitoring.QueryOptions) string { var containerSelector string if o.ContainerName != "" { - containerSelector = fmt.Sprintf(`pod_name="%s", namespace="%s", container_name="%s"`, o.PodName, o.NamespaceName, o.ContainerName) + containerSelector = fmt.Sprintf(`pod="%s", namespace="%s", container="%s"`, o.PodName, o.NamespaceName, o.ContainerName) } else { - containerSelector = fmt.Sprintf(`pod_name="%s", namespace="%s", container_name=~"%s"`, o.PodName, o.NamespaceName, o.ResourceFilter) + containerSelector = fmt.Sprintf(`pod="%s", namespace="%s", container=~"%s"`, o.PodName, o.NamespaceName, o.ResourceFilter) } return strings.Replace(tmpl, "$1", containerSelector, -1) } diff --git a/pkg/simple/client/monitoring/prometheus/promql_test.go b/pkg/simple/client/monitoring/prometheus/promql_test.go index 66293cfeb..dc66e6d07 100644 --- a/pkg/simple/client/monitoring/prometheus/promql_test.go +++ b/pkg/simple/client/monitoring/prometheus/promql_test.go @@ -10,33 +10,162 @@ import ( func TestMakeExpr(t *testing.T) { tests := []struct { name string - opt monitoring.QueryOptions + opts monitoring.QueryOptions }{ - {"cluster_cpu_utilisation", monitoring.QueryOptions{Level: monitoring.LevelCluster}}, - {"node_cpu_utilisation", monitoring.QueryOptions{Level: monitoring.LevelNode, NodeName: "i-2dazc1d6"}}, - {"node_cpu_total", monitoring.QueryOptions{Level: monitoring.LevelNode, ResourceFilter: "i-2dazc1d6|i-ezjb7gsk"}}, - {"workspace_cpu_usage", monitoring.QueryOptions{Level: monitoring.LevelWorkspace, WorkspaceName: "system-workspace"}}, - {"workspace_memory_usage", monitoring.QueryOptions{Level: monitoring.LevelWorkspace, ResourceFilter: "system-workspace|demo"}}, - {"namespace_cpu_usage", monitoring.QueryOptions{Level: monitoring.LevelNamespace, NamespaceName: "kube-system"}}, - {"namespace_memory_usage", monitoring.QueryOptions{Level: monitoring.LevelNamespace, ResourceFilter: "kube-system|default"}}, - {"namespace_memory_usage_wo_cache", monitoring.QueryOptions{Level: monitoring.LevelNamespace, WorkspaceName: "system-workspace", ResourceFilter: "kube-system|default"}}, - {"workload_cpu_usage", monitoring.QueryOptions{Level: monitoring.LevelWorkload, WorkloadKind: "deployment", NamespaceName: "default", ResourceFilter: "apiserver|coredns"}}, - {"workload_deployment_replica_available", monitoring.QueryOptions{Level: monitoring.LevelWorkload, WorkloadKind: ".*", NamespaceName: "default", ResourceFilter: "apiserver|coredns"}}, - {"pod_cpu_usage", monitoring.QueryOptions{Level: monitoring.LevelPod, NamespaceName: "default", WorkloadKind: "deployment", WorkloadName: "elasticsearch", ResourceFilter: "elasticsearch-0"}}, - {"pod_memory_usage", monitoring.QueryOptions{Level: monitoring.LevelPod, NamespaceName: "default", PodName: "elasticsearch-12345"}}, - {"pod_memory_usage_wo_cache", monitoring.QueryOptions{Level: monitoring.LevelPod, NodeName: "i-2dazc1d6", PodName: "elasticsearch-12345"}}, - {"container_cpu_usage", monitoring.QueryOptions{Level: monitoring.LevelContainer, NamespaceName: "default", PodName: "elasticsearch-12345", ContainerName: "syscall"}}, - {"container_memory_usage", monitoring.QueryOptions{Level: monitoring.LevelContainer, NamespaceName: "default", PodName: "elasticsearch-12345", ResourceFilter: "syscall"}}, - {"pvc_inodes_available", monitoring.QueryOptions{Level: monitoring.LevelPVC, NamespaceName: "default", PersistentVolumeClaimName: "db-123"}}, - {"pvc_inodes_used", monitoring.QueryOptions{Level: monitoring.LevelPVC, NamespaceName: "default", ResourceFilter: "db-123"}}, - {"pvc_inodes_total", monitoring.QueryOptions{Level: monitoring.LevelPVC, StorageClassName: "default", ResourceFilter: "db-123"}}, - {"etcd_server_list", monitoring.QueryOptions{Level: monitoring.LevelComponent}}, + { + name: "cluster_cpu_utilisation", + opts: monitoring.QueryOptions{ + Level: monitoring.LevelCluster, + }, + }, + { + name: "node_cpu_utilisation", + opts: monitoring.QueryOptions{ + Level: monitoring.LevelNode, + NodeName: "i-2dazc1d6", + }, + }, + { + name: "node_cpu_total", + opts: monitoring.QueryOptions{ + Level: monitoring.LevelNode, + ResourceFilter: "i-2dazc1d6|i-ezjb7gsk", + }, + }, + { + name: "workspace_cpu_usage", + opts: monitoring.QueryOptions{ + Level: monitoring.LevelWorkspace, + WorkspaceName: "system-workspace", + }, + }, + { + name: "workspace_memory_usage", + opts: monitoring.QueryOptions{ + Level: monitoring.LevelWorkspace, + ResourceFilter: "system-workspace|demo", + }, + }, + { + name: "namespace_cpu_usage", + opts: monitoring.QueryOptions{ + Level: monitoring.LevelNamespace, + NamespaceName: "kube-system", + }, + }, + { + name: "namespace_memory_usage", + opts: monitoring.QueryOptions{ + Level: monitoring.LevelNamespace, + ResourceFilter: "kube-system|default", + }, + }, + { + name: "namespace_memory_usage_wo_cache", + opts: monitoring.QueryOptions{ + Level: monitoring.LevelNamespace, + WorkspaceName: "system-workspace", + ResourceFilter: "kube-system|default", + }, + }, + { + name: "workload_cpu_usage", + opts: monitoring.QueryOptions{ + Level: monitoring.LevelWorkload, + WorkloadKind: "deployment", + NamespaceName: "default", + ResourceFilter: "apiserver|coredns", + }, + }, + { + name: "workload_deployment_replica_available", + opts: monitoring.QueryOptions{ + Level: monitoring.LevelWorkload, + WorkloadKind: ".*", + NamespaceName: "default", + ResourceFilter: "apiserver|coredns", + }, + }, + { + name: "pod_cpu_usage", + opts: monitoring.QueryOptions{ + Level: monitoring.LevelPod, + NamespaceName: "default", + WorkloadKind: "deployment", + WorkloadName: "elasticsearch", + ResourceFilter: "elasticsearch-0", + }, + }, + { + name: "pod_memory_usage", + opts: monitoring.QueryOptions{ + Level: monitoring.LevelPod, + NamespaceName: "default", + PodName: "elasticsearch-12345", + }, + }, + { + name: "pod_memory_usage_wo_cache", + opts: monitoring.QueryOptions{ + Level: monitoring.LevelPod, + NodeName: "i-2dazc1d6", + PodName: "elasticsearch-12345", + }, + }, + { + name: "container_cpu_usage", + opts: monitoring.QueryOptions{ + Level: monitoring.LevelContainer, + NamespaceName: "default", + PodName: "elasticsearch-12345", + ContainerName: "syscall", + }, + }, + { + name: "container_memory_usage", + opts: monitoring.QueryOptions{ + Level: monitoring.LevelContainer, + NamespaceName: "default", + PodName: "elasticsearch-12345", + ResourceFilter: "syscall", + }, + }, + { + name: "pvc_inodes_available", + opts: monitoring.QueryOptions{ + Level: monitoring.LevelPVC, + NamespaceName: "default", + PersistentVolumeClaimName: "db-123", + }, + }, + { + name: "pvc_inodes_used", + opts: monitoring.QueryOptions{ + Level: monitoring.LevelPVC, + NamespaceName: "default", + ResourceFilter: "db-123", + }, + }, + { + name: "pvc_inodes_total", + opts: monitoring.QueryOptions{ + Level: monitoring.LevelPVC, + StorageClassName: "default", + ResourceFilter: "db-123", + }, + }, + { + name: "etcd_server_list", + opts: monitoring.QueryOptions{ + Level: monitoring.LevelComponent, + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { expected := testdata.PromQLs[tt.name] - result := makeExpr(tt.name, tt.opt) + result := makeExpr(tt.name, tt.opts) if diff := cmp.Diff(result, expected); diff != "" { t.Fatalf("%T differ (-got, +want): %s", expected, diff) } diff --git a/pkg/simple/client/monitoring/prometheus/testdata/metadata-prom.json b/pkg/simple/client/monitoring/prometheus/testdata/metadata-prom.json index e9fca8153..1ada466c0 100644 --- a/pkg/simple/client/monitoring/prometheus/testdata/metadata-prom.json +++ b/pkg/simple/client/monitoring/prometheus/testdata/metadata-prom.json @@ -11,6 +11,16 @@ "help": "The total number of ZooKeeper failures.", "unit": "" }, + { + "target": { + "instance": "127.0.0.1:9090", + "job": "prometheus" + }, + "metric": "prometheus_tsdb_reloads_total", + "type": "counter", + "help": "Number of times the database reloaded block data from disk.", + "unit": "" + }, { "target": { "instance": "127.0.0.1:9090", diff --git a/pkg/simple/client/monitoring/prometheus/testdata/promqls.go b/pkg/simple/client/monitoring/prometheus/testdata/promqls.go index 66d3fcb57..7a1e69401 100644 --- a/pkg/simple/client/monitoring/prometheus/testdata/promqls.go +++ b/pkg/simple/client/monitoring/prometheus/testdata/promqls.go @@ -10,12 +10,12 @@ var PromQLs = map[string]string{ "namespace_memory_usage": `namespace:container_memory_usage_bytes:sum{namespace!="", namespace=~"kube-system|default"}`, "namespace_memory_usage_wo_cache": `namespace:container_memory_usage_bytes_wo_cache:sum{namespace!="", label_kubesphere_io_workspace="system-workspace", namespace=~"kube-system|default"}`, "workload_cpu_usage": `round(namespace:workload_cpu_usage:sum{namespace="default", workload=~"Deployment:apiserver|coredns"}, 0.001)`, - "workload_deployment_replica_available": `label_join(sum (label_join(label_replace(kube_deployment_status_replicas_available{namespace="default"}, "owner_kind", "Deployment", "", ""), "workload", "", "deployment")) by (namespace, owner_kind, workload), "workload", ":", "owner_kind", "workload")`, - "pod_cpu_usage": `round(label_join(sum by (namespace, pod_name) (irate(container_cpu_usage_seconds_total{job="kubelet", pod_name!="", image!=""}[5m])), "pod", "", "pod_name") * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{owner_kind="ReplicaSet", owner_name=~"^deployment-[^-]{1,10}$"} * on (namespace, pod) group_left(node) kube_pod_info{pod=~"elasticsearch-0", namespace="default"}, 0.001)`, - "pod_memory_usage": `label_join(sum by (namespace, pod_name) (container_memory_usage_bytes{job="kubelet", pod_name!="", image!=""}), "pod", "", "pod_name") * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{} * on (namespace, pod) group_left(node) kube_pod_info{pod="elasticsearch-12345", namespace="default"}`, - "pod_memory_usage_wo_cache": `label_join(sum by (namespace, pod_name) (container_memory_working_set_bytes{job="kubelet", pod_name!="", image!=""}), "pod", "", "pod_name") * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{} * on (namespace, pod) group_left(node) kube_pod_info{pod="elasticsearch-12345", node="i-2dazc1d6"}`, - "container_cpu_usage": `round(sum by (namespace, pod_name, container_name) (irate(container_cpu_usage_seconds_total{job="kubelet", container_name!="POD", container_name!="", image!="", pod_name="elasticsearch-12345", namespace="default", container_name="syscall"}[5m])), 0.001)`, - "container_memory_usage": `sum by (namespace, pod_name, container_name) (container_memory_usage_bytes{job="kubelet", container_name!="POD", container_name!="", image!="", pod_name="elasticsearch-12345", namespace="default", container_name=~"syscall"})`, + "workload_deployment_replica_available": `label_join(sum (label_join(label_replace(kube_deployment_status_replicas_available{namespace="default", deployment!="", deployment=~"apiserver|coredns"}, "owner_kind", "Deployment", "", ""), "workload", "", "deployment")) by (namespace, owner_kind, workload), "workload", ":", "owner_kind", "workload")`, + "pod_cpu_usage": `round(sum by (namespace, pod) (irate(container_cpu_usage_seconds_total{job="kubelet", pod!="", image!=""}[5m])) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{owner_kind="ReplicaSet", owner_name=~"^elasticsearch-[^-]{1,10}$"} * on (namespace, pod) group_left(node) kube_pod_info{pod=~"elasticsearch-0", namespace="default"}, 0.001)`, + "pod_memory_usage": `sum by (namespace, pod) (container_memory_usage_bytes{job="kubelet", pod!="", image!=""}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{} * on (namespace, pod) group_left(node) kube_pod_info{pod="elasticsearch-12345", namespace="default"}`, + "pod_memory_usage_wo_cache": `sum by (namespace, pod) (container_memory_working_set_bytes{job="kubelet", pod!="", image!=""}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{} * on (namespace, pod) group_left(node) kube_pod_info{pod="elasticsearch-12345", node="i-2dazc1d6"}`, + "container_cpu_usage": `round(sum by (namespace, pod, container) (irate(container_cpu_usage_seconds_total{job="kubelet", container!="POD", container!="", image!="", pod="elasticsearch-12345", namespace="default", container="syscall"}[5m])), 0.001)`, + "container_memory_usage": `sum by (namespace, pod, container) (container_memory_usage_bytes{job="kubelet", container!="POD", container!="", image!="", pod="elasticsearch-12345", namespace="default", container=~"syscall"})`, "pvc_inodes_available": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_inodes_free) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{namespace="default", persistentvolumeclaim="db-123"}`, "pvc_inodes_used": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_inodes_used) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{namespace="default", persistentvolumeclaim=~"db-123"}`, "pvc_inodes_total": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_inodes) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{storageclass="default", persistentvolumeclaim=~"db-123"}`, diff --git a/pkg/simple/client/monitoring/query_options.go b/pkg/simple/client/monitoring/query_options.go index 6759f4572..089cd3be4 100644 --- a/pkg/simple/client/monitoring/query_options.go +++ b/pkg/simple/client/monitoring/query_options.go @@ -103,9 +103,11 @@ type PodOption struct { func (po PodOption) Apply(o *QueryOptions) { o.Level = LevelPod o.ResourceFilter = po.ResourceFilter + o.NodeName = po.NodeName o.NamespaceName = po.NamespaceName o.WorkloadKind = po.WorkloadKind o.WorkloadName = po.WorkloadName + o.PodName = po.PodName } type ContainerOption struct { diff --git a/pkg/simple/client/monitoring/types.go b/pkg/simple/client/monitoring/types.go index baf732434..238289427 100644 --- a/pkg/simple/client/monitoring/types.go +++ b/pkg/simple/client/monitoring/types.go @@ -26,8 +26,11 @@ type Point [2]float64 type MetricValue struct { Metadata map[string]string `json:"metric,omitempty" description:"time series labels"` - Sample Point `json:"value,omitempty" description:"time series, values of vector type"` - Series []Point `json:"values,omitempty" description:"time series, values of matrix type"` + // The type of Point is a float64 array with fixed length of 2. + // So Point will always be initialized as [0, 0], rather than nil. + // To allow empty Sample, we should declare Sample to type *Point + Sample *Point `json:"value,omitempty" description:"time series, values of vector type"` + Series []Point `json:"values,omitempty" description:"time series, values of matrix type"` } func (p Point) Timestamp() float64 {