From df6ed5e995813f1bb46c66026ec9df44335319fa Mon Sep 17 00:00:00 2001 From: huanggze Date: Tue, 17 Sep 2019 12:10:27 +0800 Subject: [PATCH] refactor monitor module Signed-off-by: huanggze --- pkg/apis/monitoring/v1alpha2/register.go | 424 +++-- pkg/apiserver/monitoring/monitoring.go | 421 ++--- pkg/models/metrics/constants.go | 41 + pkg/models/metrics/metrics.go | 1817 ++++++++------------ pkg/models/metrics/metrics_rules.go | 502 ++++++ pkg/models/metrics/metricsrule.go | 284 --- pkg/models/metrics/metricsruleconst.go | 776 --------- pkg/models/metrics/namespaces.go | 26 +- pkg/models/metrics/types.go | 60 +- pkg/models/metrics/util.go | 146 +- pkg/models/workspaces/workspaces.go | 18 +- pkg/simple/client/prometheus/prometheus.go | 70 +- 12 files changed, 1709 insertions(+), 2876 deletions(-) create mode 100644 pkg/models/metrics/constants.go create mode 100644 pkg/models/metrics/metrics_rules.go delete mode 100644 pkg/models/metrics/metricsrule.go delete mode 100644 pkg/models/metrics/metricsruleconst.go diff --git a/pkg/apis/monitoring/v1alpha2/register.go b/pkg/apis/monitoring/v1alpha2/register.go index 54b6d9501..ebc8a1fb1 100644 --- a/pkg/apis/monitoring/v1alpha2/register.go +++ b/pkg/apis/monitoring/v1alpha2/register.go @@ -50,13 +50,14 @@ func addWebService(c *restful.Container) error { Param(ws.QueryParameter("end", "End time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1561939200. ").DataType("string").Required(false)). Param(ws.QueryParameter("step", "Time interval. Retrieve metric data at a fixed interval within the time range of start and end. It requires both **start** and **end** are provided. The format is [0-9]+[smhdwy]. Defaults to 10m (i.e. 10 min).").DataType("string").DefaultValue("10m").Required(false)). Param(ws.QueryParameter("time", "A timestamp in Unix time format. Retrieve metric data at a single point in time. Defaults to now. Time and the combination of start, end, step are mutually exclusive.").DataType("string").Required(false)). + Param(ws.QueryParameter("type", "Additional operations. Currently available types is statistics. It retrieves the total number of workspaces, devops projects, namespaces, accounts in the cluster at the moment.").DataType("string").Required(false)). Metadata(restfulspec.KeyOpenAPITags, []string{constants.ClusterMetricsTag}). - Writes(metrics.FormatedLevelMetric{}). - Returns(http.StatusOK, RespOK, metrics.FormatedLevelMetric{})). + Writes(metrics.Response{}). + Returns(http.StatusOK, RespOK, metrics.Response{})). Consumes(restful.MIME_JSON, restful.MIME_XML). Produces(restful.MIME_JSON) - ws.Route(ws.GET("/nodes").To(monitoring.MonitorAllNodes). + ws.Route(ws.GET("/nodes").To(monitoring.MonitorNode). Doc("Get node-level metric data of all nodes."). Param(ws.QueryParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both node CPU usage and disk usage: `node_cpu_usage|node_disk_size_usage`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). Param(ws.QueryParameter("resources_filter", "The node filter consists of a regexp pattern. It specifies which node data to return. For example, the following filter matches both node i-caojnter and i-cmu82ogj: `i-caojnter|i-cmu82ogj`.").DataType("string").Required(false)). @@ -69,12 +70,12 @@ func addWebService(c *restful.Container) error { Param(ws.QueryParameter("page", "The page number. This field paginates result data of each metric, then returns a specific page. For example, setting **page** to 2 returns the second page. It only applies to sorted metric data.").DataType("integer").Required(false)). Param(ws.QueryParameter("limit", "Page size, the maximum number of results in a single page. Defaults to 5.").DataType("integer").Required(false).DefaultValue("5")). Metadata(restfulspec.KeyOpenAPITags, []string{constants.NodeMetricsTag}). - Writes(metrics.FormatedLevelMetric{}). - Returns(http.StatusOK, RespOK, metrics.FormatedLevelMetric{})). + Writes(metrics.Response{}). + Returns(http.StatusOK, RespOK, metrics.Response{})). Consumes(restful.MIME_JSON, restful.MIME_XML). Produces(restful.MIME_JSON) - ws.Route(ws.GET("/nodes/{node}").To(monitoring.MonitorSpecificNode). + ws.Route(ws.GET("/nodes/{node}").To(monitoring.MonitorNode). Doc("Get node-level metric data of the specific node."). Param(ws.PathParameter("node", "Node name.").DataType("string").Required(true)). Param(ws.QueryParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both node CPU usage and disk usage: `node_cpu_usage|node_disk_size_usage`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). @@ -83,12 +84,64 @@ func addWebService(c *restful.Container) error { Param(ws.QueryParameter("step", "Time interval. Retrieve metric data at a fixed interval within the time range of start and end. It requires both **start** and **end** are provided. The format is [0-9]+[smhdwy]. Defaults to 10m (i.e. 10 min).").DataType("string").DefaultValue("10m").Required(false)). Param(ws.QueryParameter("time", "A timestamp in Unix time format. Retrieve metric data at a single point in time. Defaults to now. Time and the combination of start, end, step are mutually exclusive.").DataType("string").Required(false)). Metadata(restfulspec.KeyOpenAPITags, []string{constants.NodeMetricsTag}). - Writes(metrics.FormatedLevelMetric{}). - Returns(http.StatusOK, RespOK, metrics.FormatedLevelMetric{})). + Writes(metrics.Response{}). + Returns(http.StatusOK, RespOK, metrics.Response{})). Consumes(restful.MIME_JSON, restful.MIME_XML). Produces(restful.MIME_JSON) - ws.Route(ws.GET("/namespaces").To(monitoring.MonitorAllNamespaces). + ws.Route(ws.GET("/workspaces").To(monitoring.MonitorWorkspace). + Doc("Get workspace-level metric data of all workspaces."). + Param(ws.QueryParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both workspace CPU usage and memory usage: `workspace_cpu_usage|workspace_memory_usage`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). + Param(ws.QueryParameter("resources_filter", "The workspace filter consists of a regexp pattern. It specifies which workspace data to return.").DataType("string").Required(false)). + Param(ws.QueryParameter("start", "Start time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1559347200. ").DataType("string").Required(false)). + Param(ws.QueryParameter("end", "End time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1561939200. ").DataType("string").Required(false)). + Param(ws.QueryParameter("step", "Time interval. Retrieve metric data at a fixed interval within the time range of start and end. It requires both **start** and **end** are provided. The format is [0-9]+[smhdwy]. Defaults to 10m (i.e. 10 min).").DataType("string").DefaultValue("10m").Required(false)). + Param(ws.QueryParameter("time", "A timestamp in Unix time format. Retrieve metric data at a single point in time. Defaults to now. Time and the combination of start, end, step are mutually exclusive.").DataType("string").Required(false)). + Param(ws.QueryParameter("sort_metric", "Sort workspaces by the specified metric. Not applicable if **start** and **end** are provided.").DataType("string").Required(false)). + Param(ws.QueryParameter("sort_type", "Sort order. One of asc, desc.").DefaultValue("desc.").DataType("string").Required(false)). + Param(ws.QueryParameter("page", "The page number. This field paginates result data of each metric, then returns a specific page. For example, setting **page** to 2 returns the second page. It only applies to sorted metric data.").DataType("integer").Required(false)). + Param(ws.QueryParameter("limit", "Page size, the maximum number of results in a single page. Defaults to 5.").DataType("integer").Required(false).DefaultValue("5")). + Metadata(restfulspec.KeyOpenAPITags, []string{constants.WorkspaceMetricsTag}). + Writes(metrics.Response{}). + Returns(http.StatusOK, RespOK, metrics.Response{})). + Consumes(restful.MIME_JSON, restful.MIME_XML). + Produces(restful.MIME_JSON) + + ws.Route(ws.GET("/workspaces/{workspace}").To(monitoring.MonitorWorkspace). + Doc("Get workspace-level metric data of a specific workspace."). + Param(ws.PathParameter("workspace", "Workspace name.").DataType("string").Required(true)). + Param(ws.QueryParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both workspace CPU usage and memory usage: `workspace_cpu_usage|workspace_memory_usage`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). + Param(ws.QueryParameter("start", "Start time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1559347200. ").DataType("string").Required(false)). + Param(ws.QueryParameter("end", "End time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1561939200. ").DataType("string").Required(false)). + Param(ws.QueryParameter("step", "Time interval. Retrieve metric data at a fixed interval within the time range of start and end. It requires both **start** and **end** are provided. The format is [0-9]+[smhdwy]. Defaults to 10m (i.e. 10 min).").DataType("string").DefaultValue("10m").Required(false)). + Param(ws.QueryParameter("time", "A timestamp in Unix time format. Retrieve metric data at a single point in time. Defaults to now. Time and the combination of start, end, step are mutually exclusive.").DataType("string").Required(false)). + Param(ws.QueryParameter("type", "Additional operations. Currently available types is statistics. It retrieves the total number of namespaces, devops projects, members and roles in this workspace at the moment.").DataType("string").Required(false)). + Metadata(restfulspec.KeyOpenAPITags, []string{constants.WorkspaceMetricsTag}). + Writes(metrics.Response{}). + Returns(http.StatusOK, RespOK, metrics.Response{})). + Consumes(restful.MIME_JSON, restful.MIME_XML). + Produces(restful.MIME_JSON) + + ws.Route(ws.GET("/workspaces/{workspace}/namespaces").To(monitoring.MonitorNamespace). + Doc("Get namespace-level metric data of a specific workspace."). + Param(ws.PathParameter("workspace", "Workspace name.").DataType("string").Required(true)). + Param(ws.QueryParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both namespace CPU usage and memory usage: `namespace_cpu_usage|namespace_memory_usage`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). + Param(ws.QueryParameter("resources_filter", "The namespace filter consists of a regexp pattern. It specifies which namespace data to return. For example, the following filter matches both namespace test and kube-system: `test|kube-system`.").DataType("string").Required(false)). + Param(ws.QueryParameter("start", "Start time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1559347200. ").DataType("string").Required(false)). + Param(ws.QueryParameter("end", "End time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1561939200. ").DataType("string").Required(false)). + Param(ws.QueryParameter("step", "Time interval. Retrieve metric data at a fixed interval within the time range of start and end. It requires both **start** and **end** are provided. The format is [0-9]+[smhdwy]. Defaults to 10m (i.e. 10 min).").DataType("string").DefaultValue("10m").Required(false)). + Param(ws.QueryParameter("time", "A timestamp in Unix time format. Retrieve metric data at a single point in time. Defaults to now. Time and the combination of start, end, step are mutually exclusive.").DataType("string").Required(false)). + Param(ws.QueryParameter("sort_metric", "Sort namespaces by the specified metric. Not applicable if **start** and **end** are provided.").DataType("string").Required(false)). + Param(ws.QueryParameter("sort_type", "Sort order. One of asc, desc.").DefaultValue("desc.").DataType("string").Required(false)). + Param(ws.QueryParameter("page", "The page number. This field paginates result data of each metric, then returns a specific page. For example, setting **page** to 2 returns the second page. It only applies to sorted metric data.").DataType("integer").Required(false)). + Param(ws.QueryParameter("limit", "Page size, the maximum number of results in a single page. Defaults to 5.").DataType("integer").Required(false).DefaultValue("5")). + Metadata(restfulspec.KeyOpenAPITags, []string{constants.NamespaceMetricsTag}). + Writes(metrics.Response{}). + Returns(http.StatusOK, RespOK, metrics.Response{})). + Consumes(restful.MIME_JSON, restful.MIME_XML). + Produces(restful.MIME_JSON) + + ws.Route(ws.GET("/namespaces").To(monitoring.MonitorNamespace). Doc("Get namespace-level metric data of all namespaces."). Param(ws.QueryParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both namespace CPU usage and memory usage: `namespace_cpu_usage|namespace_memory_usage`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). Param(ws.QueryParameter("resources_filter", "The namespace filter consists of a regexp pattern. It specifies which namespace data to return. For example, the following filter matches both namespace test and kube-system: `test|kube-system`.").DataType("string").Required(false)). @@ -101,12 +154,12 @@ func addWebService(c *restful.Container) error { Param(ws.QueryParameter("page", "The page number. This field paginates result data of each metric, then returns a specific page. For example, setting **page** to 2 returns the second page. It only applies to sorted metric data.").DataType("integer").Required(false)). Param(ws.QueryParameter("limit", "Page size, the maximum number of results in a single page. Defaults to 5.").DataType("integer").Required(false).DefaultValue("5")). Metadata(restfulspec.KeyOpenAPITags, []string{constants.NamespaceMetricsTag}). - Writes(metrics.FormatedLevelMetric{}). - Returns(http.StatusOK, RespOK, metrics.FormatedLevelMetric{})). + Writes(metrics.Response{}). + Returns(http.StatusOK, RespOK, metrics.Response{})). Consumes(restful.MIME_JSON, restful.MIME_XML). Produces(restful.MIME_JSON) - ws.Route(ws.GET("/namespaces/{namespace}").To(monitoring.MonitorSpecificNamespace). + ws.Route(ws.GET("/namespaces/{namespace}").To(monitoring.MonitorNamespace). Doc("Get namespace-level metric data of the specific namespace."). Param(ws.PathParameter("namespace", "The name of the namespace.").DataType("string").Required(true)). Param(ws.QueryParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both namespace CPU usage and memory usage: `namespace_cpu_usage|namespace_memory_usage`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). @@ -115,12 +168,51 @@ func addWebService(c *restful.Container) error { Param(ws.QueryParameter("step", "Time interval. Retrieve metric data at a fixed interval within the time range of start and end. It requires both **start** and **end** are provided. The format is [0-9]+[smhdwy]. Defaults to 10m (i.e. 10 min).").DataType("string").DefaultValue("10m").Required(false)). Param(ws.QueryParameter("time", "A timestamp in Unix time format. Retrieve metric data at a single point in time. Defaults to now. Time and the combination of start, end, step are mutually exclusive.").DataType("string").Required(false)). Metadata(restfulspec.KeyOpenAPITags, []string{constants.NamespaceMetricsTag}). - Writes(metrics.FormatedLevelMetric{}). - Returns(http.StatusOK, RespOK, metrics.FormatedLevelMetric{})). + Writes(metrics.Response{}). + Returns(http.StatusOK, RespOK, metrics.Response{})). Consumes(restful.MIME_JSON, restful.MIME_XML). Produces(restful.MIME_JSON) - ws.Route(ws.GET("/namespaces/{namespace}/pods").To(monitoring.MonitorAllPodsOfSpecificNamespace). + ws.Route(ws.GET("/namespaces/{namespace}/workloads").To(monitoring.MonitorWorkload). + Doc("Get workload-level metric data of a specific namespace's workloads."). + Param(ws.PathParameter("namespace", "The name of the namespace.").DataType("string").Required(true)). + Param(ws.QueryParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both workload CPU usage and memory usage: `workload_cpu_usage|workload_memory_usage`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). + Param(ws.QueryParameter("resources_filter", "The workload filter consists of a regexp pattern. It specifies which workload data to return. For example, the following filter matches any workload whose name begins with prometheus: `prometheus.*`.").DataType("string").Required(false)). + Param(ws.QueryParameter("start", "Start time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1559347200. ").DataType("string").Required(false)). + Param(ws.QueryParameter("end", "End time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1561939200. ").DataType("string").Required(false)). + Param(ws.QueryParameter("step", "Time interval. Retrieve metric data at a fixed interval within the time range of start and end. It requires both **start** and **end** are provided. The format is [0-9]+[smhdwy]. Defaults to 10m (i.e. 10 min).").DataType("string").DefaultValue("10m").Required(false)). + Param(ws.QueryParameter("time", "A timestamp in Unix time format. Retrieve metric data at a single point in time. Defaults to now. Time and the combination of start, end, step are mutually exclusive.").DataType("string").Required(false)). + Param(ws.QueryParameter("sort_metric", "Sort workloads by the specified metric. Not applicable if **start** and **end** are provided.").DataType("string").Required(false)). + Param(ws.QueryParameter("sort_type", "Sort order. One of asc, desc.").DefaultValue("desc.").DataType("string").Required(false)). + Param(ws.QueryParameter("page", "The page number. This field paginates result data of each metric, then returns a specific page. For example, setting **page** to 2 returns the second page. It only applies to sorted metric data.").DataType("integer").Required(false)). + Param(ws.QueryParameter("limit", "Page size, the maximum number of results in a single page. Defaults to 5.").DataType("integer").Required(false).DefaultValue("5")). + Metadata(restfulspec.KeyOpenAPITags, []string{constants.WorkloadMetricsTag}). + Writes(metrics.Response{}). + Returns(http.StatusOK, RespOK, metrics.Response{})). + Consumes(restful.MIME_JSON, restful.MIME_XML). + Produces(restful.MIME_JSON) + + ws.Route(ws.GET("/namespaces/{namespace}/workloads/{kind}").To(monitoring.MonitorWorkload). + Doc("Get workload-level metric data of all workloads which belongs to a specific kind."). + Param(ws.PathParameter("namespace", "The name of the namespace.").DataType("string").Required(true)). + Param(ws.PathParameter("kind", "Workload kind. One of deployment, daemonset, statefulset.").DataType("string").Required(true)). + Param(ws.QueryParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both workload CPU usage and memory usage: `workload_cpu_usage|workload_memory_usage`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). + Param(ws.QueryParameter("resources_filter", "The workload filter consists of a regexp pattern. It specifies which workload data to return. For example, the following filter matches any workload whose name begins with prometheus: `prometheus.*`.").DataType("string").Required(false)). + Param(ws.QueryParameter("start", "Start time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1559347200. ").DataType("string").Required(false)). + Param(ws.QueryParameter("end", "End time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1561939200. ").DataType("string").Required(false)). + Param(ws.QueryParameter("step", "Time interval. Retrieve metric data at a fixed interval within the time range of start and end. It requires both **start** and **end** are provided. The format is [0-9]+[smhdwy]. Defaults to 10m (i.e. 10 min).").DataType("string").DefaultValue("10m").Required(false)). + Param(ws.QueryParameter("time", "A timestamp in Unix time format. Retrieve metric data at a single point in time. Defaults to now. Time and the combination of start, end, step are mutually exclusive.").DataType("string").Required(false)). + Param(ws.QueryParameter("sort_metric", "Sort workloads by the specified metric. Not applicable if **start** and **end** are provided.").DataType("string").Required(false)). + Param(ws.QueryParameter("sort_type", "Sort order. One of asc, desc.").DefaultValue("desc.").DataType("string").Required(false)). + Param(ws.QueryParameter("page", "The page number. This field paginates result data of each metric, then returns a specific page. For example, setting **page** to 2 returns the second page. It only applies to sorted metric data.").DataType("integer").Required(false)). + Param(ws.QueryParameter("limit", "Page size, the maximum number of results in a single page. Defaults to 5.").DataType("integer").Required(false).DefaultValue("5")). + Metadata(restfulspec.KeyOpenAPITags, []string{constants.WorkloadMetricsTag}). + Writes(metrics.Response{}). + Returns(http.StatusOK, RespOK, metrics.Response{})). + Consumes(restful.MIME_JSON, restful.MIME_XML). + Produces(restful.MIME_JSON) + + ws.Route(ws.GET("/namespaces/{namespace}/pods").To(monitoring.MonitorPod). Doc("Get pod-level metric data of the specific namespace's pods."). Param(ws.PathParameter("namespace", "The name of the namespace.").DataType("string").Required(true)). Param(ws.QueryParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both pod CPU usage and memory usage: `pod_cpu_usage|pod_memory_usage`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). @@ -134,12 +226,12 @@ func addWebService(c *restful.Container) error { Param(ws.QueryParameter("page", "The page number. This field paginates result data of each metric, then returns a specific page. For example, setting **page** to 2 returns the second page. It only applies to sorted metric data.").DataType("integer").Required(false)). Param(ws.QueryParameter("limit", "Page size, the maximum number of results in a single page. Defaults to 5.").DataType("integer").Required(false).DefaultValue("5")). Metadata(restfulspec.KeyOpenAPITags, []string{constants.PodMetricsTag}). - Writes(metrics.FormatedLevelMetric{}). - Returns(http.StatusOK, RespOK, metrics.FormatedLevelMetric{})). + Writes(metrics.Response{}). + Returns(http.StatusOK, RespOK, metrics.Response{})). Consumes(restful.MIME_JSON, restful.MIME_XML). Produces(restful.MIME_JSON) - ws.Route(ws.GET("/namespaces/{namespace}/pods/{pod}").To(monitoring.MonitorSpecificPodOfSpecificNamespace). + ws.Route(ws.GET("/namespaces/{namespace}/pods/{pod}").To(monitoring.MonitorPod). Doc("Get pod-level metric data of a specific pod. Navigate to the pod by the pod's namespace."). Param(ws.PathParameter("namespace", "The name of the namespace.").DataType("string").Required(true)). Param(ws.PathParameter("pod", "Pod name.").DataType("string").Required(true)). @@ -149,155 +241,12 @@ func addWebService(c *restful.Container) error { Param(ws.QueryParameter("step", "Time interval. Retrieve metric data at a fixed interval within the time range of start and end. It requires both **start** and **end** are provided. The format is [0-9]+[smhdwy]. Defaults to 10m (i.e. 10 min).").DataType("string").DefaultValue("10m").Required(false)). Param(ws.QueryParameter("time", "A timestamp in Unix time format. Retrieve metric data at a single point in time. Defaults to now. Time and the combination of start, end, step are mutually exclusive.").DataType("string").Required(false)). Metadata(restfulspec.KeyOpenAPITags, []string{constants.PodMetricsTag}). - Writes(metrics.FormatedLevelMetric{}). - Returns(http.StatusOK, RespOK, metrics.FormatedLevelMetric{})). + Writes(metrics.Response{}). + Returns(http.StatusOK, RespOK, metrics.Response{})). Consumes(restful.MIME_JSON, restful.MIME_XML). Produces(restful.MIME_JSON) - ws.Route(ws.GET("/storageclasses/{storageclass}/persistentvolumeclaims").To(monitoring.MonitorAllPVCsOfSpecificStorageClass). - Doc("Get PVC-level metric data of the specific storageclass's PVCs."). - Param(ws.PathParameter("storageclass", "The name of the storageclass.").DataType("string").Required(true)). - Param(ws.PathParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both PVC available and used inodes: `pvc_inodes_available|pvc_inodes_used`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). - Param(ws.QueryParameter("resources_filter", "The PVC filter consists of a regexp pattern. It specifies which PVC data to return. For example, the following filter matches any pod whose name begins with redis: `redis.*`.").DataType("string").Required(false)). - Param(ws.QueryParameter("start", "Start time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1559347200. ").DataType("string").Required(false)). - Param(ws.QueryParameter("end", "End time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1561939200. ").DataType("string").Required(false)). - Param(ws.QueryParameter("step", "Time interval. Retrieve metric data at a fixed interval within the time range of start and end. It requires both **start** and **end** are provided. The format is [0-9]+[smhdwy]. Defaults to 10m (i.e. 10 min).").DataType("string").DefaultValue("10m").Required(false)). - Param(ws.QueryParameter("time", "A timestamp in Unix time format. Retrieve metric data at a single point in time. Defaults to now. Time and the combination of start, end, step are mutually exclusive.").DataType("string").Required(false)). - Param(ws.QueryParameter("sort_metric", "Sort PVCs by the specified metric. Not applicable if **start** and **end** are provided.").DataType("string").Required(false)). - Param(ws.QueryParameter("sort_type", "Sort order. One of asc, desc.").DefaultValue("desc.").DataType("string").Required(false)). - Param(ws.QueryParameter("page", "The page number. This field paginates result data of each metric, then returns a specific page. For example, setting **page** to 2 returns the second page. It only applies to sorted metric data.").DataType("integer").Required(false)). - Param(ws.QueryParameter("limit", "Page size, the maximum number of results in a single page. Defaults to 5.").DataType("integer").Required(false).DefaultValue("5")). - Metadata(restfulspec.KeyOpenAPITags, []string{constants.PVCMetricsTag}). - Writes(metrics.FormatedLevelMetric{}). - Returns(http.StatusOK, RespOK, metrics.FormatedLevelMetric{})). - Consumes(restful.MIME_JSON, restful.MIME_XML). - Produces(restful.MIME_JSON) - - ws.Route(ws.GET("/namespaces/{namespace}/persistentvolumeclaims").To(monitoring.MonitorAllPVCsOfSpecificNamespace). - Doc("Get PVC-level metric data of the specific namespace's PVCs."). - Param(ws.PathParameter("namespace", "The name of the namespace.").DataType("string").Required(true)). - Param(ws.PathParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both PVC available and used inodes: `pvc_inodes_available|pvc_inodes_used`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). - Param(ws.QueryParameter("resources_filter", "The PVC filter consists of a regexp pattern. It specifies which PVC data to return. For example, the following filter matches any pod whose name begins with redis: `redis.*`.").DataType("string").Required(false)). - Param(ws.QueryParameter("start", "Start time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1559347200. ").DataType("string").Required(false)). - Param(ws.QueryParameter("end", "End time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1561939200. ").DataType("string").Required(false)). - Param(ws.QueryParameter("step", "Time interval. Retrieve metric data at a fixed interval within the time range of start and end. It requires both **start** and **end** are provided. The format is [0-9]+[smhdwy]. Defaults to 10m (i.e. 10 min).").DataType("string").DefaultValue("10m").Required(false)). - Param(ws.QueryParameter("time", "A timestamp in Unix time format. Retrieve metric data at a single point in time. Defaults to now. Time and the combination of start, end, step are mutually exclusive.").DataType("string").Required(false)). - Param(ws.QueryParameter("sort_metric", "Sort PVCs by the specified metric. Not applicable if **start** and **end** are provided.").DataType("string").Required(false)). - Param(ws.QueryParameter("sort_type", "Sort order. One of asc, desc.").DefaultValue("desc.").DataType("string").Required(false)). - Param(ws.QueryParameter("page", "The page number. This field paginates result data of each metric, then returns a specific page. For example, setting **page** to 2 returns the second page. It only applies to sorted metric data.").DataType("integer").Required(false)). - Param(ws.QueryParameter("limit", "Page size, the maximum number of results in a single page. Defaults to 5.").DataType("integer").Required(false).DefaultValue("5")). - Metadata(restfulspec.KeyOpenAPITags, []string{constants.PVCMetricsTag}). - Writes(metrics.FormatedLevelMetric{}). - Returns(http.StatusOK, RespOK, metrics.FormatedLevelMetric{})). - Consumes(restful.MIME_JSON, restful.MIME_XML). - Produces(restful.MIME_JSON) - - ws.Route(ws.GET("/namespaces/{namespace}/persistentvolumeclaims/{pvc}").To(monitoring.MonitorSpecificPVCofSpecificNamespace). - Doc("Get PVC-level metric data of a specific PVC. Navigate to the PVC by the PVC's namespace."). - Param(ws.PathParameter("namespace", "The name of the namespace.").DataType("string").Required(true)). - Param(ws.PathParameter("pvc", "PVC name.").DataType("string").Required(true)). - Param(ws.PathParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both PVC available and used inodes: `pvc_inodes_available|pvc_inodes_used`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). - Param(ws.QueryParameter("start", "Start time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1559347200. ").DataType("string").Required(false)). - Param(ws.QueryParameter("end", "End time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1561939200. ").DataType("string").Required(false)). - Param(ws.QueryParameter("step", "Time interval. Retrieve metric data at a fixed interval within the time range of start and end. It requires both **start** and **end** are provided. The format is [0-9]+[smhdwy]. Defaults to 10m (i.e. 10 min).").DataType("string").DefaultValue("10m").Required(false)). - Param(ws.QueryParameter("time", "A timestamp in Unix time format. Retrieve metric data at a single point in time. Defaults to now. Time and the combination of start, end, step are mutually exclusive.").DataType("string").Required(false)). - Metadata(restfulspec.KeyOpenAPITags, []string{constants.PVCMetricsTag}). - Writes(metrics.FormatedLevelMetric{}). - Returns(http.StatusOK, RespOK, metrics.FormatedLevelMetric{})). - Consumes(restful.MIME_JSON, restful.MIME_XML). - Produces(restful.MIME_JSON) - - ws.Route(ws.GET("/nodes/{node}/pods").To(monitoring.MonitorAllPodsOnSpecificNode). - Doc("Get pod-level metric data of all pods on a specific node."). - Param(ws.PathParameter("node", "Node name.").DataType("string").Required(true)). - Param(ws.QueryParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both pod CPU usage and memory usage: `pod_cpu_usage|pod_memory_usage`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). - Param(ws.QueryParameter("resources_filter", "The pod filter consists of a regexp pattern. It specifies which pod data to return. For example, the following filter matches any pod whose name begins with redis: `redis.*`.").DataType("string").Required(false)). - Param(ws.QueryParameter("start", "Start time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1559347200. ").DataType("string").Required(false)). - Param(ws.QueryParameter("end", "End time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1561939200. ").DataType("string").Required(false)). - Param(ws.QueryParameter("step", "Time interval. Retrieve metric data at a fixed interval within the time range of start and end. It requires both **start** and **end** are provided. The format is [0-9]+[smhdwy]. Defaults to 10m (i.e. 10 min).").DataType("string").DefaultValue("10m").Required(false)). - Param(ws.QueryParameter("time", "A timestamp in Unix time format. Retrieve metric data at a single point in time. Defaults to now. Time and the combination of start, end, step are mutually exclusive.").DataType("string").Required(false)). - Param(ws.QueryParameter("sort_metric", "Sort pods by the specified metric. Not applicable if **start** and **end** are provided.").DataType("string").Required(false)). - Param(ws.QueryParameter("sort_type", "Sort order. One of asc, desc.").DefaultValue("desc.").DataType("string").Required(false)). - Param(ws.QueryParameter("page", "The page number. This field paginates result data of each metric, then returns a specific page. For example, setting **page** to 2 returns the second page. It only applies to sorted metric data.").DataType("integer").Required(false)). - Param(ws.QueryParameter("limit", "Page size, the maximum number of results in a single page. Defaults to 5.").DataType("integer").Required(false).DefaultValue("5")). - Metadata(restfulspec.KeyOpenAPITags, []string{constants.PodMetricsTag}). - Writes(metrics.FormatedLevelMetric{}). - Returns(http.StatusOK, RespOK, metrics.FormatedLevelMetric{})). - Consumes(restful.MIME_JSON, restful.MIME_XML). - Produces(restful.MIME_JSON) - - ws.Route(ws.GET("/nodes/{node}/pods/{pod}").To(monitoring.MonitorSpecificPodOnSpecificNode). - Doc("Get pod-level metric data of a specific pod. Navigate to the pod by the node where it is scheduled."). - Param(ws.PathParameter("node", "Node name.").DataType("string").Required(true)). - Param(ws.PathParameter("pod", "Pod name.").DataType("string").Required(true)). - Param(ws.QueryParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both pod CPU usage and memory usage: `pod_cpu_usage|pod_memory_usage`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). - Param(ws.QueryParameter("start", "Start time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1559347200. ").DataType("string").Required(false)). - Param(ws.QueryParameter("end", "End time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1561939200. ").DataType("string").Required(false)). - Param(ws.QueryParameter("step", "Time interval. Retrieve metric data at a fixed interval within the time range of start and end. It requires both **start** and **end** are provided. The format is [0-9]+[smhdwy]. Defaults to 10m (i.e. 10 min).").DataType("string").DefaultValue("10m").Required(false)). - Param(ws.QueryParameter("time", "A timestamp in Unix time format. Retrieve metric data at a single point in time. Defaults to now. Time and the combination of start, end, step are mutually exclusive.").DataType("string").Required(false)). - Metadata(restfulspec.KeyOpenAPITags, []string{constants.PodMetricsTag}). - Writes(metrics.FormatedLevelMetric{}). - Returns(http.StatusOK, RespOK, metrics.FormatedLevelMetric{})). - Consumes(restful.MIME_JSON, restful.MIME_XML). - Produces(restful.MIME_JSON) - - ws.Route(ws.GET("/nodes/{node}/pods/{pod}/containers").To(monitoring.MonitorAllContainersOnSpecificNode). - Doc("Get container-level metric data of a specific pod's containers. Navigate to the pod by the node where it is scheduled."). - Param(ws.PathParameter("node", "Node name.").DataType("string").Required(true)). - Param(ws.PathParameter("pod", "Pod name.").DataType("string").Required(true)). - Param(ws.QueryParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both container CPU usage and memory usage: `container_cpu_usage|container_memory_usage`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). - Param(ws.QueryParameter("resources_filter", "The container filter consists of a regexp pattern. It specifies which container data to return. For example, the following filter matches container prometheus and prometheus-config-reloader: `prometheus|prometheus-config-reloader`.").DataType("string").Required(false)). - Param(ws.QueryParameter("start", "Start time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1559347200. ").DataType("string").Required(false)). - Param(ws.QueryParameter("end", "End time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1561939200. ").DataType("string").Required(false)). - Param(ws.QueryParameter("step", "Time interval. Retrieve metric data at a fixed interval within the time range of start and end. It requires both **start** and **end** are provided. The format is [0-9]+[smhdwy]. Defaults to 10m (i.e. 10 min).").DataType("string").DefaultValue("10m").Required(false)). - Param(ws.QueryParameter("time", "A timestamp in Unix time format. Retrieve metric data at a single point in time. Defaults to now. Time and the combination of start, end, step are mutually exclusive.").DataType("string").Required(false)). - Param(ws.QueryParameter("sort_metric", "Sort containers by the specified metric. Not applicable if **start** and **end** are provided.").DataType("string").Required(false)). - Param(ws.QueryParameter("sort_type", "Sort order. One of asc, desc.").DefaultValue("desc.").DataType("string").Required(false)). - Param(ws.QueryParameter("page", "The page number. This field paginates result data of each metric, then returns a specific page. For example, setting **page** to 2 returns the second page. It only applies to sorted metric data.").DataType("integer").Required(false)). - Param(ws.QueryParameter("limit", "Page size, the maximum number of results in a single page. Defaults to 5.").DataType("integer").Required(false).DefaultValue("5")). - Metadata(restfulspec.KeyOpenAPITags, []string{constants.ContainerMetricsTag}). - Writes(metrics.FormatedLevelMetric{}). - Returns(http.StatusOK, RespOK, metrics.FormatedLevelMetric{})). - Consumes(restful.MIME_JSON, restful.MIME_XML). - Produces(restful.MIME_JSON) - - ws.Route(ws.GET("/namespaces/{namespace}/pods/{pod}/containers").To(monitoring.MonitorAllContainersOfSpecificNamespace). - Doc("Get container-level metric data of a specific pod's containers. Navigate to the pod by the pod's namespace."). - Param(ws.PathParameter("namespace", "The name of the namespace.").DataType("string").Required(true)). - Param(ws.PathParameter("pod", "Pod name.").DataType("string").Required(true)). - Param(ws.QueryParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both container CPU usage and memory usage: `container_cpu_usage|container_memory_usage`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). - Param(ws.QueryParameter("resources_filter", "The container filter consists of a regexp pattern. It specifies which container data to return. For example, the following filter matches container prometheus and prometheus-config-reloader: `prometheus|prometheus-config-reloader`.").DataType("string").Required(false)). - Param(ws.QueryParameter("start", "Start time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1559347200. ").DataType("string").Required(false)). - Param(ws.QueryParameter("end", "End time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1561939200. ").DataType("string").Required(false)). - Param(ws.QueryParameter("step", "Time interval. Retrieve metric data at a fixed interval within the time range of start and end. It requires both **start** and **end** are provided. The format is [0-9]+[smhdwy]. Defaults to 10m (i.e. 10 min).").DataType("string").DefaultValue("10m").Required(false)). - Param(ws.QueryParameter("time", "A timestamp in Unix time format. Retrieve metric data at a single point in time. Defaults to now. Time and the combination of start, end, step are mutually exclusive.").DataType("string").Required(false)). - Param(ws.QueryParameter("sort_metric", "Sort containers by the specified metric. Not applicable if **start** and **end** are provided.").DataType("string").Required(false)). - Param(ws.QueryParameter("sort_type", "Sort order. One of asc, desc.").DefaultValue("desc.").DataType("string").Required(false)). - Param(ws.QueryParameter("page", "The page number. This field paginates result data of each metric, then returns a specific page. For example, setting **page** to 2 returns the second page. It only applies to sorted metric data.").DataType("integer").Required(false)). - Param(ws.QueryParameter("limit", "Page size, the maximum number of results in a single page. Defaults to 5.").DataType("integer").Required(false).DefaultValue("5")). - Metadata(restfulspec.KeyOpenAPITags, []string{constants.ContainerMetricsTag}). - Writes(metrics.FormatedLevelMetric{}). - Returns(http.StatusOK, RespOK, metrics.FormatedLevelMetric{})). - Consumes(restful.MIME_JSON, restful.MIME_XML). - Produces(restful.MIME_JSON) - - ws.Route(ws.GET("/namespaces/{namespace}/pods/{pod}/containers/{container}").To(monitoring.MonitorSpecificContainerOfSpecificNamespace). - Doc("Get container-level metric data of a specific container. Navigate to the container by the pod name and the namespace."). - Param(ws.PathParameter("namespace", "The name of the namespace.").DataType("string").Required(true)). - Param(ws.PathParameter("pod", "Pod name.").DataType("string").Required(true)). - Param(ws.PathParameter("container", "Container name.").DataType("string").Required(true)). - Param(ws.QueryParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both container CPU usage and memory usage: `container_cpu_usage|container_memory_usage`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). - Param(ws.QueryParameter("start", "Start time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1559347200. ").DataType("string").Required(false)). - Param(ws.QueryParameter("end", "End time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1561939200. ").DataType("string").Required(false)). - Param(ws.QueryParameter("step", "Time interval. Retrieve metric data at a fixed interval within the time range of start and end. It requires both **start** and **end** are provided. The format is [0-9]+[smhdwy]. Defaults to 10m (i.e. 10 min).").DataType("string").DefaultValue("10m").Required(false)). - Param(ws.QueryParameter("time", "A timestamp in Unix time format. Retrieve metric data at a single point in time. Defaults to now. Time and the combination of start, end, step are mutually exclusive.").DataType("string").Required(false)). - Metadata(restfulspec.KeyOpenAPITags, []string{constants.ContainerMetricsTag}). - Writes(metrics.FormatedLevelMetric{}). - Returns(http.StatusOK, RespOK, metrics.FormatedLevelMetric{})). - Consumes(restful.MIME_JSON, restful.MIME_XML). - Produces(restful.MIME_JSON) - - ws.Route(ws.GET("/namespaces/{namespace}/workloads/{kind}/{workload}/pods").To(monitoring.MonitorSpecificWorkload). + ws.Route(ws.GET("/namespaces/{namespace}/workloads/{kind}/{workload}/pods").To(monitoring.MonitorPod). Doc("Get pod-level metric data of a specific workload's pods. Navigate to the workload by the namespace."). Param(ws.PathParameter("namespace", "The name of the namespace.").DataType("string").Required(true)). Param(ws.PathParameter("kind", "Workload kind. One of deployment, daemonset, statefulset.").DataType("string").Required(true)). @@ -313,82 +262,131 @@ func addWebService(c *restful.Container) error { Param(ws.QueryParameter("page", "The page number. This field paginates result data of each metric, then returns a specific page. For example, setting **page** to 2 returns the second page. It only applies to sorted metric data.").DataType("integer").Required(false)). Param(ws.QueryParameter("limit", "Page size, the maximum number of results in a single page. Defaults to 5.").DataType("integer").Required(false).DefaultValue("5")). Metadata(restfulspec.KeyOpenAPITags, []string{constants.PodMetricsTag}). - Writes(metrics.FormatedLevelMetric{}). - Returns(http.StatusOK, RespOK, metrics.FormatedLevelMetric{})). + Writes(metrics.Response{}). + Returns(http.StatusOK, RespOK, metrics.Response{})). Consumes(restful.MIME_JSON, restful.MIME_XML). Produces(restful.MIME_JSON) - ws.Route(ws.GET("/namespaces/{namespace}/workloads/{kind}").To(monitoring.MonitorAllWorkloadsOfSpecificKind). - Doc("Get workload-level metric data of all workloads which belongs to a specific kind."). + ws.Route(ws.GET("/nodes/{node}/pods").To(monitoring.MonitorPod). + Doc("Get pod-level metric data of all pods on a specific node."). + Param(ws.PathParameter("node", "Node name.").DataType("string").Required(true)). + Param(ws.QueryParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both pod CPU usage and memory usage: `pod_cpu_usage|pod_memory_usage`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). + Param(ws.QueryParameter("resources_filter", "The pod filter consists of a regexp pattern. It specifies which pod data to return. For example, the following filter matches any pod whose name begins with redis: `redis.*`.").DataType("string").Required(false)). + Param(ws.QueryParameter("start", "Start time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1559347200. ").DataType("string").Required(false)). + Param(ws.QueryParameter("end", "End time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1561939200. ").DataType("string").Required(false)). + Param(ws.QueryParameter("step", "Time interval. Retrieve metric data at a fixed interval within the time range of start and end. It requires both **start** and **end** are provided. The format is [0-9]+[smhdwy]. Defaults to 10m (i.e. 10 min).").DataType("string").DefaultValue("10m").Required(false)). + Param(ws.QueryParameter("time", "A timestamp in Unix time format. Retrieve metric data at a single point in time. Defaults to now. Time and the combination of start, end, step are mutually exclusive.").DataType("string").Required(false)). + Param(ws.QueryParameter("sort_metric", "Sort pods by the specified metric. Not applicable if **start** and **end** are provided.").DataType("string").Required(false)). + Param(ws.QueryParameter("sort_type", "Sort order. One of asc, desc.").DefaultValue("desc.").DataType("string").Required(false)). + Param(ws.QueryParameter("page", "The page number. This field paginates result data of each metric, then returns a specific page. For example, setting **page** to 2 returns the second page. It only applies to sorted metric data.").DataType("integer").Required(false)). + Param(ws.QueryParameter("limit", "Page size, the maximum number of results in a single page. Defaults to 5.").DataType("integer").Required(false).DefaultValue("5")). + Metadata(restfulspec.KeyOpenAPITags, []string{constants.PodMetricsTag}). + Writes(metrics.Response{}). + Returns(http.StatusOK, RespOK, metrics.Response{})). + Consumes(restful.MIME_JSON, restful.MIME_XML). + Produces(restful.MIME_JSON) + + ws.Route(ws.GET("/nodes/{node}/pods/{pod}").To(monitoring.MonitorPod). + Doc("Get pod-level metric data of a specific pod. Navigate to the pod by the node where it is scheduled."). + Param(ws.PathParameter("node", "Node name.").DataType("string").Required(true)). + Param(ws.PathParameter("pod", "Pod name.").DataType("string").Required(true)). + Param(ws.QueryParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both pod CPU usage and memory usage: `pod_cpu_usage|pod_memory_usage`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). + Param(ws.QueryParameter("start", "Start time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1559347200. ").DataType("string").Required(false)). + Param(ws.QueryParameter("end", "End time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1561939200. ").DataType("string").Required(false)). + Param(ws.QueryParameter("step", "Time interval. Retrieve metric data at a fixed interval within the time range of start and end. It requires both **start** and **end** are provided. The format is [0-9]+[smhdwy]. Defaults to 10m (i.e. 10 min).").DataType("string").DefaultValue("10m").Required(false)). + Param(ws.QueryParameter("time", "A timestamp in Unix time format. Retrieve metric data at a single point in time. Defaults to now. Time and the combination of start, end, step are mutually exclusive.").DataType("string").Required(false)). + Metadata(restfulspec.KeyOpenAPITags, []string{constants.PodMetricsTag}). + Writes(metrics.Response{}). + Returns(http.StatusOK, RespOK, metrics.Response{})). + Consumes(restful.MIME_JSON, restful.MIME_XML). + Produces(restful.MIME_JSON) + + ws.Route(ws.GET("/namespaces/{namespace}/pods/{pod}/containers").To(monitoring.MonitorContainer). + Doc("Get container-level metric data of a specific pod's containers. Navigate to the pod by the pod's namespace."). Param(ws.PathParameter("namespace", "The name of the namespace.").DataType("string").Required(true)). - Param(ws.PathParameter("kind", "Workload kind. One of deployment, daemonset, statefulset.").DataType("string").Required(true)). - Param(ws.QueryParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both workload CPU usage and memory usage: `workload_pod_cpu_usage|workload_pod_memory_usage`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). - Param(ws.QueryParameter("resources_filter", "The workload filter consists of a regexp pattern. It specifies which workload data to return. For example, the following filter matches any workload whose name begins with prometheus: `prometheus.*`.").DataType("string").Required(false)). + Param(ws.PathParameter("pod", "Pod name.").DataType("string").Required(true)). + Param(ws.QueryParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both container CPU usage and memory usage: `container_cpu_usage|container_memory_usage`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). + Param(ws.QueryParameter("resources_filter", "The container filter consists of a regexp pattern. It specifies which container data to return. For example, the following filter matches container prometheus and prometheus-config-reloader: `prometheus|prometheus-config-reloader`.").DataType("string").Required(false)). Param(ws.QueryParameter("start", "Start time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1559347200. ").DataType("string").Required(false)). Param(ws.QueryParameter("end", "End time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1561939200. ").DataType("string").Required(false)). Param(ws.QueryParameter("step", "Time interval. Retrieve metric data at a fixed interval within the time range of start and end. It requires both **start** and **end** are provided. The format is [0-9]+[smhdwy]. Defaults to 10m (i.e. 10 min).").DataType("string").DefaultValue("10m").Required(false)). Param(ws.QueryParameter("time", "A timestamp in Unix time format. Retrieve metric data at a single point in time. Defaults to now. Time and the combination of start, end, step are mutually exclusive.").DataType("string").Required(false)). - Param(ws.QueryParameter("sort_metric", "Sort workloads by the specified metric. Not applicable if **start** and **end** are provided.").DataType("string").Required(false)). + Param(ws.QueryParameter("sort_metric", "Sort containers by the specified metric. Not applicable if **start** and **end** are provided.").DataType("string").Required(false)). Param(ws.QueryParameter("sort_type", "Sort order. One of asc, desc.").DefaultValue("desc.").DataType("string").Required(false)). Param(ws.QueryParameter("page", "The page number. This field paginates result data of each metric, then returns a specific page. For example, setting **page** to 2 returns the second page. It only applies to sorted metric data.").DataType("integer").Required(false)). Param(ws.QueryParameter("limit", "Page size, the maximum number of results in a single page. Defaults to 5.").DataType("integer").Required(false).DefaultValue("5")). - Metadata(restfulspec.KeyOpenAPITags, []string{constants.WorkloadMetricsTag}). - Writes(metrics.FormatedLevelMetric{}). - Returns(http.StatusOK, RespOK, metrics.FormatedLevelMetric{})). + Metadata(restfulspec.KeyOpenAPITags, []string{constants.ContainerMetricsTag}). + Writes(metrics.Response{}). + Returns(http.StatusOK, RespOK, metrics.Response{})). Consumes(restful.MIME_JSON, restful.MIME_XML). Produces(restful.MIME_JSON) - ws.Route(ws.GET("/namespaces/{namespace}/workloads").To(monitoring.MonitorAllWorkloadsOfSpecificNamespace). - Doc("Get workload-level metric data of a specific namespace's workloads."). + ws.Route(ws.GET("/namespaces/{namespace}/pods/{pod}/containers/{container}").To(monitoring.MonitorContainer). + Doc("Get container-level metric data of a specific container. Navigate to the container by the pod name and the namespace."). Param(ws.PathParameter("namespace", "The name of the namespace.").DataType("string").Required(true)). - Param(ws.QueryParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both workload CPU usage and memory usage: `workload_pod_cpu_usage|workload_pod_memory_usage`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). - Param(ws.QueryParameter("resources_filter", "The workload filter consists of a regexp pattern. It specifies which workload data to return. For example, the following filter matches any workload whose name begins with prometheus: `prometheus.*`.").DataType("string").Required(false)). + Param(ws.PathParameter("pod", "Pod name.").DataType("string").Required(true)). + Param(ws.PathParameter("container", "Container name.").DataType("string").Required(true)). + Param(ws.QueryParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both container CPU usage and memory usage: `container_cpu_usage|container_memory_usage`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). Param(ws.QueryParameter("start", "Start time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1559347200. ").DataType("string").Required(false)). Param(ws.QueryParameter("end", "End time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1561939200. ").DataType("string").Required(false)). Param(ws.QueryParameter("step", "Time interval. Retrieve metric data at a fixed interval within the time range of start and end. It requires both **start** and **end** are provided. The format is [0-9]+[smhdwy]. Defaults to 10m (i.e. 10 min).").DataType("string").DefaultValue("10m").Required(false)). Param(ws.QueryParameter("time", "A timestamp in Unix time format. Retrieve metric data at a single point in time. Defaults to now. Time and the combination of start, end, step are mutually exclusive.").DataType("string").Required(false)). - Param(ws.QueryParameter("sort_metric", "Sort workloads by the specified metric. Not applicable if **start** and **end** are provided.").DataType("string").Required(false)). - Param(ws.QueryParameter("sort_type", "Sort order. One of asc, desc.").DefaultValue("desc.").DataType("string").Required(false)). - Param(ws.QueryParameter("page", "The page number. This field paginates result data of each metric, then returns a specific page. For example, setting **page** to 2 returns the second page. It only applies to sorted metric data.").DataType("integer").Required(false)). - Param(ws.QueryParameter("limit", "Page size, the maximum number of results in a single page. Defaults to 5.").DataType("integer").Required(false).DefaultValue("5")). - Metadata(restfulspec.KeyOpenAPITags, []string{constants.WorkloadMetricsTag}). - Writes(metrics.FormatedLevelMetric{}). - Returns(http.StatusOK, RespOK, metrics.FormatedLevelMetric{})). + Metadata(restfulspec.KeyOpenAPITags, []string{constants.ContainerMetricsTag}). + Writes(metrics.Response{}). + Returns(http.StatusOK, RespOK, metrics.Response{})). Consumes(restful.MIME_JSON, restful.MIME_XML). Produces(restful.MIME_JSON) - // list all namespace in this workspace by selected metrics - ws.Route(ws.GET("/workspaces/{workspace}").To(monitoring.MonitorSpecificWorkspace). - Doc("Get workspace-level metric data of a specific workspace."). - Param(ws.PathParameter("workspace", "Workspace name.").DataType("string").Required(true)). - Param(ws.QueryParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both workspace CPU usage and memory usage: `workspace_cpu_usage|workspace_memory_usage`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). + ws.Route(ws.GET("/storageclasses/{storageclass}/persistentvolumeclaims").To(monitoring.MonitorPVC). + Doc("Get PVC-level metric data of the specific storageclass's PVCs."). + Param(ws.PathParameter("storageclass", "The name of the storageclass.").DataType("string").Required(true)). + Param(ws.PathParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both PVC available and used inodes: `pvc_inodes_available|pvc_inodes_used`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). + Param(ws.QueryParameter("resources_filter", "The PVC filter consists of a regexp pattern. It specifies which PVC data to return. For example, the following filter matches any pod whose name begins with redis: `redis.*`.").DataType("string").Required(false)). Param(ws.QueryParameter("start", "Start time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1559347200. ").DataType("string").Required(false)). Param(ws.QueryParameter("end", "End time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1561939200. ").DataType("string").Required(false)). Param(ws.QueryParameter("step", "Time interval. Retrieve metric data at a fixed interval within the time range of start and end. It requires both **start** and **end** are provided. The format is [0-9]+[smhdwy]. Defaults to 10m (i.e. 10 min).").DataType("string").DefaultValue("10m").Required(false)). Param(ws.QueryParameter("time", "A timestamp in Unix time format. Retrieve metric data at a single point in time. Defaults to now. Time and the combination of start, end, step are mutually exclusive.").DataType("string").Required(false)). - Param(ws.QueryParameter("type", "Additional operations. Currently available types is statistics. It retrieves the total number of namespaces, devops projects, members and roles in this workspace at the moment.").DataType("string").Required(false)). - Metadata(restfulspec.KeyOpenAPITags, []string{constants.WorkspaceMetricsTag}). - Writes(metrics.FormatedLevelMetric{}). - Returns(http.StatusOK, RespOK, metrics.FormatedLevelMetric{})). - Consumes(restful.MIME_JSON, restful.MIME_XML). - Produces(restful.MIME_JSON) - - ws.Route(ws.GET("/workspaces").To(monitoring.MonitorAllWorkspaces). - Doc("Get workspace-level metric data of all workspaces."). - Param(ws.QueryParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both workspace CPU usage and memory usage: `workspace_cpu_usage|workspace_memory_usage`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). - Param(ws.QueryParameter("resources_filter", "The workspace filter consists of a regexp pattern. It specifies which workspace data to return.").DataType("string").Required(false)). - Param(ws.QueryParameter("start", "Start time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1559347200. ").DataType("string").Required(false)). - Param(ws.QueryParameter("end", "End time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1561939200. ").DataType("string").Required(false)). - Param(ws.QueryParameter("step", "Time interval. Retrieve metric data at a fixed interval within the time range of start and end. It requires both **start** and **end** are provided. The format is [0-9]+[smhdwy]. Defaults to 10m (i.e. 10 min).").DataType("string").DefaultValue("10m").Required(false)). - Param(ws.QueryParameter("time", "A timestamp in Unix time format. Retrieve metric data at a single point in time. Defaults to now. Time and the combination of start, end, step are mutually exclusive.").DataType("string").Required(false)). - Param(ws.QueryParameter("sort_metric", "Sort workspaces by the specified metric. Not applicable if **start** and **end** are provided.").DataType("string").Required(false)). + Param(ws.QueryParameter("sort_metric", "Sort PVCs by the specified metric. Not applicable if **start** and **end** are provided.").DataType("string").Required(false)). Param(ws.QueryParameter("sort_type", "Sort order. One of asc, desc.").DefaultValue("desc.").DataType("string").Required(false)). Param(ws.QueryParameter("page", "The page number. This field paginates result data of each metric, then returns a specific page. For example, setting **page** to 2 returns the second page. It only applies to sorted metric data.").DataType("integer").Required(false)). Param(ws.QueryParameter("limit", "Page size, the maximum number of results in a single page. Defaults to 5.").DataType("integer").Required(false).DefaultValue("5")). - Param(ws.QueryParameter("type", "Additional operations. Currently available types is statistics. It retrieves the total number of workspaces, devops projects, namespaces, accounts in the cluster at the moment.").DataType("string").Required(false)). - Metadata(restfulspec.KeyOpenAPITags, []string{constants.WorkspaceMetricsTag}). - Writes(metrics.FormatedLevelMetric{}). - Returns(http.StatusOK, RespOK, metrics.FormatedLevelMetric{})). + Metadata(restfulspec.KeyOpenAPITags, []string{constants.PVCMetricsTag}). + Writes(metrics.Response{}). + Returns(http.StatusOK, RespOK, metrics.Response{})). + Consumes(restful.MIME_JSON, restful.MIME_XML). + Produces(restful.MIME_JSON) + + ws.Route(ws.GET("/namespaces/{namespace}/persistentvolumeclaims").To(monitoring.MonitorPVC). + Doc("Get PVC-level metric data of the specific namespace's PVCs."). + Param(ws.PathParameter("namespace", "The name of the namespace.").DataType("string").Required(true)). + Param(ws.PathParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both PVC available and used inodes: `pvc_inodes_available|pvc_inodes_used`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). + Param(ws.QueryParameter("resources_filter", "The PVC filter consists of a regexp pattern. It specifies which PVC data to return. For example, the following filter matches any pod whose name begins with redis: `redis.*`.").DataType("string").Required(false)). + Param(ws.QueryParameter("start", "Start time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1559347200. ").DataType("string").Required(false)). + Param(ws.QueryParameter("end", "End time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1561939200. ").DataType("string").Required(false)). + Param(ws.QueryParameter("step", "Time interval. Retrieve metric data at a fixed interval within the time range of start and end. It requires both **start** and **end** are provided. The format is [0-9]+[smhdwy]. Defaults to 10m (i.e. 10 min).").DataType("string").DefaultValue("10m").Required(false)). + Param(ws.QueryParameter("time", "A timestamp in Unix time format. Retrieve metric data at a single point in time. Defaults to now. Time and the combination of start, end, step are mutually exclusive.").DataType("string").Required(false)). + Param(ws.QueryParameter("sort_metric", "Sort PVCs by the specified metric. Not applicable if **start** and **end** are provided.").DataType("string").Required(false)). + Param(ws.QueryParameter("sort_type", "Sort order. One of asc, desc.").DefaultValue("desc.").DataType("string").Required(false)). + Param(ws.QueryParameter("page", "The page number. This field paginates result data of each metric, then returns a specific page. For example, setting **page** to 2 returns the second page. It only applies to sorted metric data.").DataType("integer").Required(false)). + Param(ws.QueryParameter("limit", "Page size, the maximum number of results in a single page. Defaults to 5.").DataType("integer").Required(false).DefaultValue("5")). + Metadata(restfulspec.KeyOpenAPITags, []string{constants.PVCMetricsTag}). + Writes(metrics.Response{}). + Returns(http.StatusOK, RespOK, metrics.Response{})). + Consumes(restful.MIME_JSON, restful.MIME_XML). + Produces(restful.MIME_JSON) + + ws.Route(ws.GET("/namespaces/{namespace}/persistentvolumeclaims/{pvc}").To(monitoring.MonitorPVC). + Doc("Get PVC-level metric data of a specific PVC. Navigate to the PVC by the PVC's namespace."). + Param(ws.PathParameter("namespace", "The name of the namespace.").DataType("string").Required(true)). + Param(ws.PathParameter("pvc", "PVC name.").DataType("string").Required(true)). + Param(ws.PathParameter("metrics_filter", "The metric name filter consists of a regexp pattern. It specifies which metric data to return. For example, the following filter matches both PVC available and used inodes: `pvc_inodes_available|pvc_inodes_used`. View available metrics at [kubesphere.io](https://docs.kubesphere.io/advanced-v2.0/zh-CN/api-reference/monitoring-metrics/).").DataType("string").Required(false)). + Param(ws.QueryParameter("start", "Start time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1559347200. ").DataType("string").Required(false)). + Param(ws.QueryParameter("end", "End time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1561939200. ").DataType("string").Required(false)). + Param(ws.QueryParameter("step", "Time interval. Retrieve metric data at a fixed interval within the time range of start and end. It requires both **start** and **end** are provided. The format is [0-9]+[smhdwy]. Defaults to 10m (i.e. 10 min).").DataType("string").DefaultValue("10m").Required(false)). + Param(ws.QueryParameter("time", "A timestamp in Unix time format. Retrieve metric data at a single point in time. Defaults to now. Time and the combination of start, end, step are mutually exclusive.").DataType("string").Required(false)). + Metadata(restfulspec.KeyOpenAPITags, []string{constants.PVCMetricsTag}). + Writes(metrics.Response{}). + Returns(http.StatusOK, RespOK, metrics.Response{})). Consumes(restful.MIME_JSON, restful.MIME_XML). Produces(restful.MIME_JSON) @@ -401,8 +399,8 @@ func addWebService(c *restful.Container) error { Param(ws.QueryParameter("step", "Time interval. Retrieve metric data at a fixed interval within the time range of start and end. It requires both **start** and **end** are provided. The format is [0-9]+[smhdwy]. Defaults to 10m (i.e. 10 min).").DataType("string").DefaultValue("10m").Required(false)). Param(ws.QueryParameter("time", "A timestamp in Unix time format. Retrieve metric data at a single point in time. Defaults to now. Time and the combination of start, end, step are mutually exclusive.").DataType("string").Required(false)). Metadata(restfulspec.KeyOpenAPITags, []string{constants.ComponentMetricsTag}). - Writes(metrics.FormatedLevelMetric{}). - Returns(http.StatusOK, RespOK, metrics.FormatedLevelMetric{})). + Writes(metrics.Response{}). + Returns(http.StatusOK, RespOK, metrics.Response{})). Consumes(restful.MIME_JSON, restful.MIME_XML). Produces(restful.MIME_JSON) diff --git a/pkg/apiserver/monitoring/monitoring.go b/pkg/apiserver/monitoring/monitoring.go index e42a4c07e..446156fe8 100644 --- a/pkg/apiserver/monitoring/monitoring.go +++ b/pkg/apiserver/monitoring/monitoring.go @@ -18,393 +18,186 @@ package monitoring import ( - "fmt" "github.com/emicklei/go-restful" - "k8s.io/klog" "kubesphere.io/kubesphere/pkg/informers" "kubesphere.io/kubesphere/pkg/models/metrics" - "kubesphere.io/kubesphere/pkg/simple/client" "net/url" "strconv" "strings" - "time" ) -func MonitorAllPodsOfSpecificNamespace(request *restful.Request, response *restful.Response) { - MonitorPod(request, response) -} - -func MonitorSpecificPodOfSpecificNamespace(request *restful.Request, response *restful.Response) { - MonitorPod(request, response) -} - -func MonitorAllPodsOnSpecificNode(request *restful.Request, response *restful.Response) { - MonitorPod(request, response) -} - -func MonitorSpecificPodOnSpecificNode(request *restful.Request, response *restful.Response) { - MonitorPod(request, response) -} - -func MonitorPod(request *restful.Request, response *restful.Response) { - requestParams := ParseMonitoringRequestParams(request) - podName := requestParams.PodName - if podName != "" { - requestParams.ResourcesFilter = fmt.Sprintf("^%s$", requestParams.PodName) - } - - rawMetrics := metrics.GetPodLevelMetrics(requestParams) - // sorting - sortedMetrics, maxMetricCount := metrics.Sort(requestParams.SortMetricName, requestParams.SortType, rawMetrics) - // paging - pagedMetrics := metrics.Page(requestParams.PageNum, requestParams.LimitNum, sortedMetrics, maxMetricCount) - response.WriteAsJson(pagedMetrics) -} - -func MonitorAllContainersOnSpecificNode(request *restful.Request, response *restful.Response) { - MonitorContainer(request, response) -} - -func MonitorAllContainersOfSpecificNamespace(request *restful.Request, response *restful.Response) { - MonitorContainer(request, response) -} - -func MonitorSpecificContainerOfSpecificNamespace(request *restful.Request, response *restful.Response) { - MonitorContainer(request, response) -} - -func MonitorContainer(request *restful.Request, response *restful.Response) { - requestParams := ParseMonitoringRequestParams(request) - rawMetrics := metrics.GetContainerLevelMetrics(requestParams) - // sorting - sortedMetrics, maxMetricCount := metrics.Sort(requestParams.SortMetricName, requestParams.SortType, rawMetrics) - // paging - pagedMetrics := metrics.Page(requestParams.PageNum, requestParams.LimitNum, sortedMetrics, maxMetricCount) - - response.WriteAsJson(pagedMetrics) -} - -func MonitorSpecificWorkload(request *restful.Request, response *restful.Response) { - MonitorWorkload(request, response) -} - -func MonitorAllWorkloadsOfSpecificKind(request *restful.Request, response *restful.Response) { - MonitorWorkload(request, response) -} - -func MonitorAllWorkloadsOfSpecificNamespace(request *restful.Request, response *restful.Response) { - MonitorWorkload(request, response) -} - -func MonitorWorkload(request *restful.Request, response *restful.Response) { - requestParams := ParseMonitoringRequestParams(request) - - rawMetrics := metrics.GetWorkloadLevelMetrics(requestParams) - - // sorting - sortedMetrics, maxMetricCount := metrics.Sort(requestParams.SortMetricName, requestParams.SortType, rawMetrics) - - // paging - pagedMetrics := metrics.Page(requestParams.PageNum, requestParams.LimitNum, sortedMetrics, maxMetricCount) - - response.WriteAsJson(pagedMetrics) - -} - -func MonitorAllWorkspaces(request *restful.Request, response *restful.Response) { - - requestParams := ParseMonitoringRequestParams(request) - - tp := requestParams.Tp - if tp == "statistics" { - // merge multiple metric: all-devops, all-roles, all-projects...this api is designed for admin - res := metrics.GetAllWorkspacesStatistics() - response.WriteAsJson(res) - - } else { - rawMetrics := metrics.MonitorAllWorkspaces(requestParams) - - // sorting - sortedMetrics, maxMetricCount := metrics.Sort(requestParams.SortMetricName, requestParams.SortType, rawMetrics) - - // paging - pagedMetrics := metrics.Page(requestParams.PageNum, requestParams.LimitNum, sortedMetrics, maxMetricCount) - - response.WriteAsJson(pagedMetrics) - } -} - -func MonitorSpecificWorkspace(request *restful.Request, response *restful.Response) { - requestParams := ParseMonitoringRequestParams(request) - - tp := requestParams.Tp - if tp == "rank" { - // multiple - rawMetrics := metrics.GetWorkspaceLevelMetrics(requestParams) - - // sorting - sortedMetrics, maxMetricCount := metrics.Sort(requestParams.SortMetricName, requestParams.SortType, rawMetrics) - - // paging - pagedMetrics := metrics.Page(requestParams.PageNum, requestParams.LimitNum, sortedMetrics, maxMetricCount) - response.WriteAsJson(pagedMetrics) - - } else if tp == "statistics" { - wsName := requestParams.WsName - - // merge multiple metric: devops, roles, projects... - res := metrics.MonitorOneWorkspaceStatistics(wsName) - response.WriteAsJson(res) - } else { - res := metrics.GetWorkspaceLevelMetrics(requestParams) - response.WriteAsJson(res) - } -} - -func MonitorAllNamespaces(request *restful.Request, response *restful.Response) { - MonitorNamespace(request, response) -} - -func MonitorSpecificNamespace(request *restful.Request, response *restful.Response) { - MonitorNamespace(request, response) -} - -func MonitorNamespace(request *restful.Request, response *restful.Response) { - requestParams := ParseMonitoringRequestParams(request) - // multiple - rawMetrics := metrics.GetNamespaceLevelMetrics(requestParams) - - // sorting - sortedMetrics, maxMetricCount := metrics.Sort(requestParams.SortMetricName, requestParams.SortType, rawMetrics) - // paging - pagedMetrics := metrics.Page(requestParams.PageNum, requestParams.LimitNum, sortedMetrics, maxMetricCount) - response.WriteAsJson(pagedMetrics) -} - func MonitorCluster(request *restful.Request, response *restful.Response) { - requestParams := ParseMonitoringRequestParams(request) + r := ParseRequestParams(request) - metricName := requestParams.MetricsName - if metricName != "" { - prometheusClient, err := client.ClientSets().Prometheus() - if err != nil { - if _, ok := err.(client.ClientSetNotEnabledError); ok { - klog.Error("monitoring is not enabled") - return - } else { - klog.Errorf("get prometheus client failed %+v", err) - } - } - - // single - queryType, params := metrics.AssembleClusterMetricRequestInfo(requestParams, metricName) - metricsStr := prometheusClient.SendMonitoringRequest(queryType, params) - res := metrics.ReformatJson(metricsStr, metricName, map[string]string{metrics.MetricLevelCluster: "local"}) - - response.WriteAsJson(res) + // TODO: expose kubesphere iam and devops statistics in prometheus format + var res *metrics.Response + if r.Type == "statistics" { + res = metrics.GetClusterStatistics() } else { - // multiple - res := metrics.GetClusterLevelMetrics(requestParams) - response.WriteAsJson(res) + res = metrics.GetClusterMetrics(r) } -} -func MonitorAllNodes(request *restful.Request, response *restful.Response) { - MonitorNode(request, response) -} - -func MonitorSpecificNode(request *restful.Request, response *restful.Response) { - MonitorNode(request, response) + response.WriteAsJson(res) } func MonitorNode(request *restful.Request, response *restful.Response) { - requestParams := ParseMonitoringRequestParams(request) + r := ParseRequestParams(request) + res := metrics.GetNodeMetrics(r) + res, metricsNum := res.SortBy(r.SortMetric, r.SortType) + res = res.Page(r.PageNum, r.LimitNum, metricsNum) + response.WriteAsJson(res) +} - metricName := requestParams.MetricsName - if metricName != "" { - prometheusClient, err := client.ClientSets().Prometheus() - if err != nil { - if _, ok := err.(client.ClientSetNotEnabledError); ok { - klog.Error("monitoring is not enabled") - return - } else { - klog.Errorf("get prometheus client failed %+v", err) - } - } - // single - queryType, params := metrics.AssembleNodeMetricRequestInfo(requestParams, metricName) - metricsStr := prometheusClient.SendMonitoringRequest(queryType, params) - res := metrics.ReformatJson(metricsStr, metricName, map[string]string{metrics.MetricLevelNode: ""}) - // The raw node-exporter result doesn't include ip address information - // Thereby, append node ip address to .data.result[].metric +func MonitorWorkspace(request *restful.Request, response *restful.Response) { + r := ParseRequestParams(request) - nodeAddress := metrics.GetNodeAddressInfo() - metrics.AddNodeAddressMetric(res, nodeAddress) - - response.WriteAsJson(res) + // TODO: expose kubesphere iam and devops statistics in prometheus format + var res *metrics.Response + if r.Type == "statistics" && r.WorkspaceName != "" { + res = metrics.GetWorkspaceStatistics(r.WorkspaceName) } else { - // multiple - rawMetrics := metrics.GetNodeLevelMetrics(requestParams) - nodeAddress := metrics.GetNodeAddressInfo() - - for i := 0; i < len(rawMetrics.Results); i++ { - metrics.AddNodeAddressMetric(&rawMetrics.Results[i], nodeAddress) - } - - // sorting - sortedMetrics, maxMetricCount := metrics.Sort(requestParams.SortMetricName, requestParams.SortType, rawMetrics) - // paging - pagedMetrics := metrics.Page(requestParams.PageNum, requestParams.LimitNum, sortedMetrics, maxMetricCount) - - response.WriteAsJson(pagedMetrics) + res = metrics.GetWorkspaceMetrics(r) + res, metricsNum := res.SortBy(r.SortMetric, r.SortType) + res = res.Page(r.PageNum, r.LimitNum, metricsNum) } + + response.WriteAsJson(res) } -func MonitorAllPVCsOfSpecificNamespace(request *restful.Request, response *restful.Response) { - MonitorPVC(request, response) +func MonitorNamespace(request *restful.Request, response *restful.Response) { + r := ParseRequestParams(request) + res := metrics.GetNamespaceMetrics(r) + res, metricsNum := res.SortBy(r.SortMetric, r.SortType) + res = res.Page(r.PageNum, r.LimitNum, metricsNum) + response.WriteAsJson(res) } -func MonitorAllPVCsOfSpecificStorageClass(request *restful.Request, response *restful.Response) { - MonitorPVC(request, response) +func MonitorWorkload(request *restful.Request, response *restful.Response) { + r := ParseRequestParams(request) + res := metrics.GetWorkloadMetrics(r) + res, metricsNum := res.SortBy(r.SortMetric, r.SortType) + res = res.Page(r.PageNum, r.LimitNum, metricsNum) + response.WriteAsJson(res) } -func MonitorSpecificPVCofSpecificNamespace(request *restful.Request, response *restful.Response) { - MonitorPVC(request, response) +func MonitorPod(request *restful.Request, response *restful.Response) { + r := ParseRequestParams(request) + res := metrics.GetPodMetrics(r) + res, metricsNum := res.SortBy(r.SortMetric, r.SortType) + res = res.Page(r.PageNum, r.LimitNum, metricsNum) + response.WriteAsJson(res) +} + +func MonitorContainer(request *restful.Request, response *restful.Response) { + r := ParseRequestParams(request) + res := metrics.GetContainerMetrics(r) + res, metricsNum := res.SortBy(r.SortMetric, r.SortType) + res = res.Page(r.PageNum, r.LimitNum, metricsNum) + response.WriteAsJson(res) } func MonitorPVC(request *restful.Request, response *restful.Response) { - requestParams := ParseMonitoringRequestParams(request) - pvcName := requestParams.PVCName - if pvcName != "" { - requestParams.ResourcesFilter = fmt.Sprintf("^%s$", requestParams.PVCName) - } - - rawMetrics := metrics.GetPVCLevelMetrics(requestParams) - // sorting - sortedMetrics, maxMetricCount := metrics.Sort(requestParams.SortMetricName, requestParams.SortType, rawMetrics) - // paging - pagedMetrics := metrics.Page(requestParams.PageNum, requestParams.LimitNum, sortedMetrics, maxMetricCount) - response.WriteAsJson(pagedMetrics) + r := ParseRequestParams(request) + res := metrics.GetPVCMetrics(r) + res, metricsNum := res.SortBy(r.SortMetric, r.SortType) + res = res.Page(r.PageNum, r.LimitNum, metricsNum) + response.WriteAsJson(res) } func MonitorComponent(request *restful.Request, response *restful.Response) { - requestParams := ParseMonitoringRequestParams(request) - - if requestParams.MetricsFilter == "" { - requestParams.MetricsFilter = requestParams.ComponentName + "_.*" - } - - rawMetrics := metrics.GetComponentLevelMetrics(requestParams) - - response.WriteAsJson(rawMetrics) + r := ParseRequestParams(request) + res := metrics.GetComponentMetrics(r) + response.WriteAsJson(res) } -func ParseMonitoringRequestParams(request *restful.Request) *metrics.MonitoringRequestParams { - instantTime := strings.Trim(request.QueryParameter("time"), " ") +func ParseRequestParams(request *restful.Request) metrics.RequestParams { + var requestParams metrics.RequestParams + + queryTime := strings.Trim(request.QueryParameter("time"), " ") start := strings.Trim(request.QueryParameter("start"), " ") end := strings.Trim(request.QueryParameter("end"), " ") step := strings.Trim(request.QueryParameter("step"), " ") - timeout := strings.Trim(request.QueryParameter("timeout"), " ") - - sortMetricName := strings.Trim(request.QueryParameter("sort_metric"), " ") + sortMetric := strings.Trim(request.QueryParameter("sort_metric"), " ") sortType := strings.Trim(request.QueryParameter("sort_type"), " ") pageNum := strings.Trim(request.QueryParameter("page"), " ") limitNum := strings.Trim(request.QueryParameter("limit"), " ") tp := strings.Trim(request.QueryParameter("type"), " ") - metricsFilter := strings.Trim(request.QueryParameter("metrics_filter"), " ") resourcesFilter := strings.Trim(request.QueryParameter("resources_filter"), " ") - - metricsName := strings.Trim(request.QueryParameter("metrics_name"), " ") + nodeName := strings.Trim(request.PathParameter("node"), " ") + workspaceName := strings.Trim(request.PathParameter("workspace"), " ") + namespaceName := strings.Trim(request.PathParameter("namespace"), " ") + workloadKind := strings.Trim(request.PathParameter("kind"), " ") workloadName := strings.Trim(request.PathParameter("workload"), " ") - - nodeId := strings.Trim(request.PathParameter("node"), " ") - wsName := strings.Trim(request.PathParameter("workspace"), " ") - nsName := strings.Trim(request.PathParameter("namespace"), " ") podName := strings.Trim(request.PathParameter("pod"), " ") + containerName := strings.Trim(request.PathParameter("container"), " ") pvcName := strings.Trim(request.PathParameter("pvc"), " ") storageClassName := strings.Trim(request.PathParameter("storageclass"), " ") - containerName := strings.Trim(request.PathParameter("container"), " ") - workloadKind := strings.Trim(request.PathParameter("kind"), " ") componentName := strings.Trim(request.PathParameter("component"), " ") - var requestParams = metrics.MonitoringRequestParams{ - SortMetricName: sortMetricName, + requestParams = metrics.RequestParams{ + SortMetric: sortMetric, SortType: sortType, PageNum: pageNum, LimitNum: limitNum, - Tp: tp, + Type: tp, MetricsFilter: metricsFilter, ResourcesFilter: resourcesFilter, - MetricsName: metricsName, + NodeName: nodeName, + WorkspaceName: workspaceName, + NamespaceName: namespaceName, + WorkloadKind: workloadKind, WorkloadName: workloadName, - NodeId: nodeId, - WsName: wsName, - NsName: nsName, PodName: podName, + ContainerName: containerName, PVCName: pvcName, StorageClassName: storageClassName, - ContainerName: containerName, - WorkloadKind: workloadKind, ComponentName: componentName, } - if timeout == "" { - timeout = metrics.DefaultQueryTimeout + if metricsFilter == "" { + requestParams.MetricsFilter = ".*" } - if step == "" { - step = metrics.DefaultQueryStep + if resourcesFilter == "" { + requestParams.ResourcesFilter = ".*" } - // Whether query or query_range request - u := url.Values{} - if start != "" && end != "" { + v := url.Values{} - u.Set("start", convertTimeGranularity(start)) - u.Set("end", convertTimeGranularity(end)) - u.Set("step", step) - u.Set("timeout", timeout) + if start != "" && end != "" { // range query - // range query start time must be greater than the namespace creation time - if nsName != "" { + // metrics from a deleted namespace should be hidden + // therefore, for range query, if range query start time is less than the namespace creation time, set it to creation time + // it is the same with query at a fixed time point + if namespaceName != "" { nsLister := informers.SharedInformerFactory().Core().V1().Namespaces().Lister() - ns, err := nsLister.Get(nsName) + ns, err := nsLister.Get(namespaceName) if err == nil { - queryStartTime := u.Get("start") - nsCreationTime := strconv.FormatInt(ns.CreationTimestamp.Unix(), 10) - if nsCreationTime > queryStartTime { - u.Set("start", nsCreationTime) + creationTime := ns.CreationTimestamp.Time.Unix() + queryStart, err := strconv.ParseInt(start, 10, 64) + if err == nil && queryStart < creationTime { + start = strconv.FormatInt(creationTime, 10) } } } - requestParams.QueryType = metrics.RangeQueryType - requestParams.Params = u + v.Set("start", start) + v.Set("end", end) - return &requestParams - } - if instantTime != "" { - u.Set("time", instantTime) - u.Set("timeout", timeout) - requestParams.QueryType = metrics.DefaultQueryType - requestParams.Params = u - return &requestParams - } else { - u.Set("timeout", timeout) - requestParams.QueryType = metrics.DefaultQueryType - requestParams.Params = u - return &requestParams - } -} + if step == "" { + v.Set("step", metrics.DefaultQueryStep) + } else { + v.Set("step", step) + } + requestParams.QueryParams = v + requestParams.QueryType = metrics.RangeQuery -func convertTimeGranularity(ts string) string { - timeFloat, err := strconv.ParseFloat(ts, 64) - if err != nil { - klog.Errorf("convert second timestamp %s to minute timestamp failed", ts) - return strconv.FormatInt(int64(time.Now().Unix()), 10) + return requestParams + } else if queryTime != "" { // query + v.Set("time", queryTime) } - timeInt := int64(timeFloat) - // convert second timestamp to minute timestamp - secondTime := time.Unix(timeInt, 0).Truncate(time.Minute).Unix() - return strconv.FormatInt(secondTime, 10) + + requestParams.QueryParams = v + requestParams.QueryType = metrics.Query + return requestParams } diff --git a/pkg/models/metrics/constants.go b/pkg/models/metrics/constants.go new file mode 100644 index 000000000..d20edcb7d --- /dev/null +++ b/pkg/models/metrics/constants.go @@ -0,0 +1,41 @@ +/* + + Copyright 2019 The KubeSphere Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +*/ +package metrics + +const ( + MonitorLevelCluster = "cluster" + MonitorLevelNode = "node" + MonitorLevelWorkspace = "workspace" + MonitorLevelNamespace = "namespace" + MonitorLevelPod = "pod" + MonitorLevelContainer = "container" + MonitorLevelPVC = "pvc" + MonitorLevelWorkload = "workload" + MonitorLevelComponent = "component" + + ChannelMaxCapacity = 100 + + // prometheus query type + RangeQuery = "query_range" + Query = "query" + DefaultQueryStep = "10m" + + StatefulSet = "StatefulSet" + DaemonSet = "DaemonSet" + Deployment = "Deployment" +) diff --git a/pkg/models/metrics/metrics.go b/pkg/models/metrics/metrics.go index 40f817de3..64955a706 100644 --- a/pkg/models/metrics/metrics.go +++ b/pkg/models/metrics/metrics.go @@ -19,1259 +19,782 @@ package metrics import ( + "fmt" + "github.com/json-iterator/go" "k8s.io/klog" - "kubesphere.io/kubesphere/pkg/informers" + "kubesphere.io/kubesphere/pkg/models/workspaces" + cs "kubesphere.io/kubesphere/pkg/simple/client" "net/url" "regexp" - "runtime/debug" - "sort" "strings" "sync" "time" - "github.com/json-iterator/go" - - "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/labels" - - "kubesphere.io/kubesphere/pkg/models/workspaces" - cs "kubesphere.io/kubesphere/pkg/simple/client" + "kubesphere.io/kubesphere/pkg/simple/client/prometheus" ) var jsonIter = jsoniter.ConfigCompatibleWithStandardLibrary -const ( - ChannelMaxCapacityWorkspaceMetric = 800 - ChannelMaxCapacity = 100 -) - -type FormatedLevelMetric struct { - MetricsLevel string `json:"metrics_level" description:"metric level, eg. cluster"` - Results []FormatedMetric `json:"results" description:"actual array of results"` - CurrentPage int `json:"page,omitempty" description:"current page returned"` - TotalPage int `json:"total_page,omitempty" description:"total number of pages"` - TotalItem int `json:"total_item,omitempty" description:"page size"` -} - -type FormatedMetric struct { - MetricName string `json:"metric_name,omitempty" description:"metric name, eg. scheduler_up_sum"` - Status string `json:"status" description:"result status, one of error, success"` - Data FormatedMetricData `json:"data,omitempty" description:"actual metric result"` -} - -type FormatedMetricData struct { - Result []map[string]interface{} `json:"result" description:"metric data including metric metadata, time points and values"` - ResultType string `json:"resultType" description:"result type, one of matrix, vector"` -} - -type MetricResultValues []MetricResultValue - -type MetricResultValue struct { - timestamp float64 - value string -} - -type MetricItem struct { - MetricLabel map[string]string `json:"metric"` - Value []interface{} `json:"value"` -} - -type CommonMetricsResult struct { - Status string `json:"status"` - Data CommonMetricsData `json:"data"` -} - -type CommonMetricsData struct { - Result []CommonResultItem `json:"result"` - ResultType string `json:"resultType"` -} - -type CommonResultItem struct { - KubePodMetric KubePodMetric `json:"metric"` - Value interface{} `json:"value"` -} - -type KubePodMetric struct { - CreatedByKind string `json:"created_by_kind"` - CreatedByName string `json:"created_by_name"` - Namespace string `json:"namespace"` - Pod string `json:"pod"` -} - -type ComponentStatus struct { - Name string `json:"metric_name,omitempty"` - Namespace string `json:"namespace,omitempty"` - Labels map[string]string `json:"labels,omitempty"` - ComponentStatus []OneComponentStatus `json:"component"` -} - -type OneComponentStatus struct { - // Valid value: "Healthy" - Type string `json:"type"` - // Valid values for "Healthy": "True", "False", or "Unknown". - Status string `json:"status"` - // Message about the condition for a component. - Message string `json:"message,omitempty"` - // Condition error code for a component. - Error string `json:"error,omitempty"` -} - -func getAllWorkspaceNames(formatedMetric *FormatedMetric) map[string]int { - - var wsMap = make(map[string]int) - - for i := 0; i < len(formatedMetric.Data.Result); i++ { - // metricDesc needs clear naming - metricDesc := formatedMetric.Data.Result[i][ResultItemMetric] - metricDescMap, ensure := metricDesc.(map[string]interface{}) - if ensure { - if wsLabel, exist := metricDescMap[WorkspaceJoinedKey]; exist { - wsMap[wsLabel.(string)] = 1 - } - } - } - return wsMap -} - -func getAllWorkspaces() map[string]int { - +func GetClusterMetrics(params RequestParams) *Response { client, err := cs.ClientSets().Prometheus() if err != nil { + klog.Error(err) return nil } - paramValues := make(url.Values) - paramValues.Set("query", WorkspaceNamespaceLabelRule) - params := paramValues.Encode() - res := client.SendSecondaryMonitoringRequest(DefaultQueryType, params) + ch := make(chan APIResponse, ChannelMaxCapacity) + var wg sync.WaitGroup - metric := ReformatJson(res, "", map[string]string{"workspace": "workspace"}) + // for each metric, make PromQL expression and send the request to Prometheus servers + for _, metricName := range clusterMetrics { + matched, _ := regexp.MatchString(params.MetricsFilter, metricName) + if matched { + wg.Add(1) + go func(metricName string, params RequestParams) { + exp := makePromqlForCluster(metricName, params) + v := url.Values{} + for key, value := range params.QueryParams { + v[key] = value + } + v.Set("query", exp) + response := client.QueryToK8SPrometheus(params.QueryType, v.Encode()) + ch <- APIResponse{ + MetricName: metricName, + APIResponse: response, + } + wg.Done() + }(metricName, params) + } + } + wg.Wait() + close(ch) - return getAllWorkspaceNames(metric) + var apiResponse []APIResponse + for e := range ch { + apiResponse = append(apiResponse, e) + } + + return &Response{ + MetricsLevel: MonitorLevelCluster, + Results: apiResponse, + } } -func getPodNameRegexInWorkload(res, filter string) string { - - data := []byte(res) - var dat CommonMetricsResult - jsonErr := jsonIter.Unmarshal(data, &dat) - if jsonErr != nil { - klog.Errorln("json parse failed", jsonErr.Error(), res) +func GetNodeMetrics(params RequestParams) *Response { + client, err := cs.ClientSets().Prometheus() + if err != nil { + klog.Error(err) + return nil } - var podNames []string - for _, item := range dat.Data.Result { - podName := item.KubePodMetric.Pod + ch := make(chan APIResponse, ChannelMaxCapacity) + var wg sync.WaitGroup - if filter != "" { - if bol, _ := regexp.MatchString(filter, podName); bol { - podNames = append(podNames, podName) - } + // for each metric, make PromQL expression and send the request to Prometheus servers + for _, metricName := range nodeMetrics { + matched, _ := regexp.MatchString(params.MetricsFilter, metricName) + if matched { + wg.Add(1) + go func(metricName string, params RequestParams) { + exp := makePromqlForNode(metricName, params) + v := url.Values{} + for key, value := range params.QueryParams { + v[key] = value + } + v.Set("query", exp) + response := client.QueryToK8SPrometheus(params.QueryType, v.Encode()) + + // add label resouce_name, node_ip, node_role to each metric result item + // resouce_name serves as a unique identifier for the monitored resource + // it will be used during metrics sorting + for _, item := range response.Data.Result { + nodeName := item.Metric["node"] + item.Metric["resource_name"] = nodeName + item.Metric["node_ip"], item.Metric["node_role"] = getNodeAddressAndRole(nodeName) + } + + ch <- APIResponse{ + MetricName: metricName, + APIResponse: response, + } + wg.Done() + }(metricName, params) + } + } + wg.Wait() + close(ch) + + var apiResponse []APIResponse + for e := range ch { + apiResponse = append(apiResponse, e) + } + + return &Response{ + MetricsLevel: MonitorLevelNode, + Results: apiResponse, + } +} + +func GetWorkspaceMetrics(params RequestParams) *Response { + client, err := cs.ClientSets().Prometheus() + if err != nil { + klog.Error(err) + return nil + } + + ch := make(chan APIResponse, ChannelMaxCapacity) + var wg sync.WaitGroup + + // for each metric, make PromQL expression and send the request to Prometheus servers + for _, metricName := range workspaceMetrics { + matched, _ := regexp.MatchString(params.MetricsFilter, metricName) + if matched { + wg.Add(1) + go func(metricName string, params RequestParams) { + exp := makePromqlForWorkspace(metricName, params) + v := url.Values{} + for key, value := range params.QueryParams { + v[key] = value + } + v.Set("query", exp) + response := client.QueryToK8SPrometheus(params.QueryType, v.Encode()) + + // add label resouce_name + for _, item := range response.Data.Result { + item.Metric["resource_name"] = item.Metric["label_kubesphere_io_workspace"] + } + + ch <- APIResponse{ + MetricName: metricName, + APIResponse: response, + } + wg.Done() + }(metricName, params) + } + } + wg.Wait() + close(ch) + + var apiResponse []APIResponse + for e := range ch { + apiResponse = append(apiResponse, e) + } + + return &Response{ + MetricsLevel: MonitorLevelWorkspace, + Results: apiResponse, + } +} + +func GetNamespaceMetrics(params RequestParams) *Response { + client, err := cs.ClientSets().Prometheus() + if err != nil { + klog.Error(err) + return nil + } + + ch := make(chan APIResponse, ChannelMaxCapacity) + var wg sync.WaitGroup + + // for each metric, make PromQL expression and send the request to Prometheus servers + for _, metricName := range namespaceMetrics { + matched, _ := regexp.MatchString(params.MetricsFilter, metricName) + if matched { + wg.Add(1) + go func(metricName string, params RequestParams) { + exp := makePromqlForNamespace(metricName, params) + v := url.Values{} + for key, value := range params.QueryParams { + v[key] = value + } + v.Set("query", exp) + response := client.QueryToK8SPrometheus(params.QueryType, v.Encode()) + + // add label resouce_name + for _, item := range response.Data.Result { + item.Metric["resource_name"] = item.Metric["namespace"] + } + + ch <- APIResponse{ + MetricName: metricName, + APIResponse: response, + } + wg.Done() + }(metricName, params) + } + } + wg.Wait() + close(ch) + + var apiResponse []APIResponse + for e := range ch { + apiResponse = append(apiResponse, e) + } + + return &Response{ + MetricsLevel: MonitorLevelNamespace, + Results: apiResponse, + } +} + +func GetWorkloadMetrics(params RequestParams) *Response { + client, err := cs.ClientSets().Prometheus() + if err != nil { + klog.Error(err) + return nil + } + + ch := make(chan APIResponse, ChannelMaxCapacity) + var wg sync.WaitGroup + + // for each metric, make PromQL expression and send the request to Prometheus servers + for _, metricName := range workloadMetrics { + matched, _ := regexp.MatchString(params.MetricsFilter, metricName) + if matched { + wg.Add(1) + go func(metricName string, params RequestParams) { + exp := makePromqlForWorkload(metricName, params) + v := url.Values{} + for key, value := range params.QueryParams { + v[key] = value + } + v.Set("query", exp) + response := client.QueryToK8SPrometheus(params.QueryType, v.Encode()) + + // add label resouce_name + for _, item := range response.Data.Result { + item.Metric["resource_name"] = item.Metric["workload"] + } + + ch <- APIResponse{ + MetricName: metricName, + APIResponse: response, + } + wg.Done() + }(metricName, params) + } + } + wg.Wait() + close(ch) + + var apiResponse []APIResponse + for e := range ch { + apiResponse = append(apiResponse, e) + } + + return &Response{ + MetricsLevel: MonitorLevelWorkload, + Results: apiResponse, + } +} + +func GetPodMetrics(params RequestParams) *Response { + client, err := cs.ClientSets().Prometheus() + if err != nil { + klog.Error(err) + return nil + } + + ch := make(chan APIResponse, ChannelMaxCapacity) + var wg sync.WaitGroup + + // for each metric, make PromQL expression and send the request to Prometheus servers + for _, metricName := range podMetrics { + matched, _ := regexp.MatchString(params.MetricsFilter, metricName) + if matched { + wg.Add(1) + go func(metricName string, params RequestParams) { + exp := makePromqlForPod(metricName, params) + v := url.Values{} + for key, value := range params.QueryParams { + v[key] = value + } + v.Set("query", exp) + response := client.QueryToK8SPrometheus(params.QueryType, v.Encode()) + + // add label resouce_name + for _, item := range response.Data.Result { + item.Metric["resource_name"] = item.Metric["pod_name"] + } + + ch <- APIResponse{ + MetricName: metricName, + APIResponse: response, + } + wg.Done() + }(metricName, params) + } + } + wg.Wait() + close(ch) + + var apiResponse []APIResponse + for e := range ch { + apiResponse = append(apiResponse, e) + } + + return &Response{ + MetricsLevel: MonitorLevelPod, + Results: apiResponse, + } +} + +func GetContainerMetrics(params RequestParams) *Response { + client, err := cs.ClientSets().Prometheus() + if err != nil { + klog.Error(err) + return nil + } + + ch := make(chan APIResponse, ChannelMaxCapacity) + var wg sync.WaitGroup + + // for each metric, make PromQL expression and send the request to Prometheus servers + for _, metricName := range containerMetrics { + matched, _ := regexp.MatchString(params.MetricsFilter, metricName) + if matched { + wg.Add(1) + go func(metricName string, params RequestParams) { + exp := makePromqlForContainer(metricName, params) + v := url.Values{} + for key, value := range params.QueryParams { + v[key] = value + } + v.Set("query", exp) + response := client.QueryToK8SPrometheus(params.QueryType, v.Encode()) + + // add label resouce_name + for _, item := range response.Data.Result { + item.Metric["resource_name"] = item.Metric["container_name"] + } + + ch <- APIResponse{ + MetricName: metricName, + APIResponse: response, + } + wg.Done() + }(metricName, params) + } + } + wg.Wait() + close(ch) + + var apiResponse []APIResponse + for e := range ch { + apiResponse = append(apiResponse, e) + } + + return &Response{ + MetricsLevel: MonitorLevelContainer, + Results: apiResponse, + } +} + +func GetPVCMetrics(params RequestParams) *Response { + client, err := cs.ClientSets().Prometheus() + if err != nil { + klog.Error(err) + return nil + } + + ch := make(chan APIResponse, ChannelMaxCapacity) + var wg sync.WaitGroup + + // for each metric, make PromQL expression and send the request to Prometheus servers + for _, metricName := range pvcMetrics { + matched, _ := regexp.MatchString(params.MetricsFilter, metricName) + if matched { + wg.Add(1) + go func(metricName string, params RequestParams) { + exp := makePromqlForPVC(metricName, params) + v := url.Values{} + for key, value := range params.QueryParams { + v[key] = value + } + v.Set("query", exp) + response := client.QueryToK8SPrometheus(params.QueryType, v.Encode()) + + // add label resouce_name + for _, item := range response.Data.Result { + item.Metric["resource_name"] = item.Metric["persistentvolumeclaim"] + } + + ch <- APIResponse{ + MetricName: metricName, + APIResponse: response, + } + wg.Done() + }(metricName, params) + } + } + wg.Wait() + close(ch) + + var apiResponse []APIResponse + for e := range ch { + apiResponse = append(apiResponse, e) + } + + return &Response{ + MetricsLevel: MonitorLevelPVC, + Results: apiResponse, + } +} + +func GetComponentMetrics(params RequestParams) *Response { + client, err := cs.ClientSets().Prometheus() + if err != nil { + klog.Error(err) + return nil + } + + ch := make(chan APIResponse, ChannelMaxCapacity) + var wg sync.WaitGroup + + // for each metric, make PromQL expression and send the request to Prometheus servers + for _, metricName := range componentMetrics { + matchComponentName, _ := regexp.MatchString(params.ComponentName, metricName) + matchMetricsFilter, _ := regexp.MatchString(params.MetricsFilter, metricName) + if matchComponentName && matchMetricsFilter { + wg.Add(1) + go func(metricName string, params RequestParams) { + exp := makePromqlForComponent(metricName, params) + v := url.Values{} + for key, value := range params.QueryParams { + v[key] = value + } + v.Set("query", exp) + response := client.QueryToK8SSystemPrometheus(params.QueryType, v.Encode()) + + // add node address when queried metric is etcd_server_list + if metricName == "etcd_server_list" { + for _, item := range response.Data.Result { + item.Metric["node_name"] = getNodeName(item.Metric["node_ip"]) + } + } + + ch <- APIResponse{ + MetricName: metricName, + APIResponse: response, + } + wg.Done() + }(metricName, params) + } + } + wg.Wait() + close(ch) + + var apiResponse []APIResponse + for e := range ch { + apiResponse = append(apiResponse, e) + } + + return &Response{ + MetricsLevel: MonitorLevelComponent, + Results: apiResponse, + } +} + +func makePromqlForCluster(metricName string, _ RequestParams) string { + return metricsPromqlMap[metricName] +} + +func makePromqlForNode(metricName string, params RequestParams) string { + var rule = metricsPromqlMap[metricName] + var nodeSelector string + + if params.NodeName != "" { + nodeSelector = fmt.Sprintf(`node="%s"`, params.NodeName) + } else { + nodeSelector = fmt.Sprintf(`node=~"%s"`, params.ResourcesFilter) + } + + return strings.Replace(rule, "$1", nodeSelector, -1) +} + +func makePromqlForWorkspace(metricName string, params RequestParams) string { + var exp = metricsPromqlMap[metricName] + var workspaceSelector string + + if params.WorkspaceName != "" { + workspaceSelector = fmt.Sprintf(`label_kubesphere_io_workspace="%s"`, params.WorkspaceName) + } else { + workspaceSelector = fmt.Sprintf(`label_kubesphere_io_workspace=~"%s"`, params.ResourcesFilter) + } + + return strings.Replace(exp, "$1", workspaceSelector, -1) +} + +func makePromqlForNamespace(metricName string, params RequestParams) string { + var exp = metricsPromqlMap[metricName] + var namespaceSelector string + + // For monitoring namespaces in the specific workspace + // GET /workspaces/{workspace}/namespaces + if params.WorkspaceName != "" { + namespaceSelector = fmt.Sprintf(`label_kubesphere_io_workspace="%s", namespace=~"%s"`, params.WorkspaceName, params.ResourcesFilter) + return strings.Replace(exp, "$1", namespaceSelector, -1) + } + + // For monitoring the specific namespaces + // GET /namespaces/{namespace} or + // GET /namespaces + if params.NamespaceName != "" { + namespaceSelector = fmt.Sprintf(`namespace="%s"`, params.NamespaceName) + } else { + namespaceSelector = fmt.Sprintf(`namespace=~"%s"`, params.ResourcesFilter) + } + return strings.Replace(exp, "$1", namespaceSelector, -1) +} + +func makePromqlForWorkload(metricName string, params RequestParams) string { + var exp = metricsPromqlMap[metricName] + var kind, kindSelector, workloadSelector string + + switch params.WorkloadKind { + case "deployment": + kind = Deployment + kindSelector = fmt.Sprintf(`namespace="%s", deployment!="", deployment=~"%s"`, params.NamespaceName, params.ResourcesFilter) + case "statefulset": + kind = StatefulSet + kindSelector = fmt.Sprintf(`namespace="%s", statefulset!="", statefulset=~"%s"`, params.NamespaceName, params.ResourcesFilter) + case "daemonset": + kind = DaemonSet + kindSelector = fmt.Sprintf(`namespace="%s", daemonset!="", daemonset=~"%s"`, params.NamespaceName, params.ResourcesFilter) + default: + kind = ".*" + kindSelector = fmt.Sprintf(`namespace="%s"`, params.NamespaceName) + } + + workloadSelector = fmt.Sprintf(`namespace="%s", workload=~"%s:%s"`, params.NamespaceName, kind, params.ResourcesFilter) + return strings.NewReplacer("$1", workloadSelector, "$2", kindSelector).Replace(exp) +} + +func makePromqlForPod(metricName string, params RequestParams) string { + var exp = metricsPromqlMap[metricName] + var podSelector, workloadSelector string + + // For monitoriong pods of the specific workload + // GET /namespaces/{namespace}/workloads/{kind}/{workload}/pods + if params.WorkloadName != "" { + switch params.WorkloadKind { + case "deployment": + workloadSelector = fmt.Sprintf(`owner_kind="ReplicaSet", owner_name=~"^%s-[^-]{1,10}$"`, params.WorkloadName) + case "statefulset": + workloadSelector = fmt.Sprintf(`owner_kind="StatefulSet", owner_name="%s"`, params.WorkloadName) + case "daemonset": + workloadSelector = fmt.Sprintf(`owner_kind="DaemonSet", owner_name="%s"`, params.WorkloadName) + } + } + + // For monitoring pods in the specific namespace + // GET /namespaces/{namespace}/workloads/{kind}/{workload}/pods or + // GET /namespaces/{namespace}/pods/{pod} or + // GET /namespaces/{namespace}/pods + if params.NamespaceName != "" { + if params.PodName != "" { + podSelector = fmt.Sprintf(`pod="%s", namespace="%s"`, params.PodName, params.NamespaceName) } else { - podNames = append(podNames, podName) - } - - } - - podNamesFilter := "^(" + strings.Join(podNames, "|") + ")$" - return podNamesFilter -} - -func unifyMetricHistoryTimeRange(fmtMetrics *FormatedMetric) { - - defer func() { - if err := recover(); err != nil { - klog.Errorln(err) - debug.PrintStack() - } - }() - - var timestampMap = make(map[float64]bool) - - if fmtMetrics.Data.ResultType == ResultTypeMatrix { - for i := range fmtMetrics.Data.Result { - values, exist := fmtMetrics.Data.Result[i][ResultItemValues] - if exist { - valueArray, sure := values.([]interface{}) - if sure { - for j := range valueArray { - timeAndValue := valueArray[j].([]interface{}) - timestampMap[float64(timeAndValue[0].(uint64))] = true - } - } - } + podSelector = fmt.Sprintf(`pod=~"%s", namespace="%s"`, params.ResourcesFilter, params.NamespaceName) } } - timestampArray := make([]float64, len(timestampMap)) - i := 0 - for timestamp := range timestampMap { - timestampArray[i] = timestamp - i++ - } - sort.Float64s(timestampArray) - - if fmtMetrics.Data.ResultType == ResultTypeMatrix { - for i := 0; i < len(fmtMetrics.Data.Result); i++ { - - values, exist := fmtMetrics.Data.Result[i][ResultItemValues] - if exist { - valueArray, sure := values.([]interface{}) - if sure { - - formatValueArray := make([][]interface{}, len(timestampArray)) - j := 0 - - for k := range timestampArray { - valueItem, sure := valueArray[j].([]interface{}) - if sure && float64(valueItem[0].(uint64)) == timestampArray[k] { - formatValueArray[k] = []interface{}{int64(timestampArray[k]), valueItem[1]} - j++ - } else { - formatValueArray[k] = []interface{}{int64(timestampArray[k]), "-1"} - } - } - fmtMetrics.Data.Result[i][ResultItemValues] = formatValueArray - } - } - } - } -} - -func AssembleSpecificWorkloadMetricRequestInfo(monitoringRequest *MonitoringRequestParams, metricName string) (string, string, bool) { - - client, err := cs.ClientSets().Prometheus() - if err != nil { - klog.Error(err) - return "", "", false - } - - nsName := monitoringRequest.NsName - wlName := monitoringRequest.WorkloadName - podsFilter := monitoringRequest.ResourcesFilter - - rule := MakeSpecificWorkloadRule(monitoringRequest.WorkloadKind, wlName, nsName) - paramValues := monitoringRequest.Params - params := makeRequestParamString(rule, paramValues) - - res := client.SendMonitoringRequest(DefaultQueryType, params) - - podNamesFilter := getPodNameRegexInWorkload(res, podsFilter) - - queryType := monitoringRequest.QueryType - rule = MakePodPromQL(metricName, nsName, "", "", podNamesFilter) - params = makeRequestParamString(rule, paramValues) - - return queryType, params, rule == "" -} - -func AssembleAllWorkloadMetricRequestInfo(monitoringRequest *MonitoringRequestParams, metricName string) (string, string) { - queryType := monitoringRequest.QueryType - - paramValues := monitoringRequest.Params - - rule := MakeWorkloadPromQL(metricName, monitoringRequest.NsName, monitoringRequest.ResourcesFilter, monitoringRequest.WorkloadKind) - - params := makeRequestParamString(rule, paramValues) - return queryType, params -} - -func AssemblePodMetricRequestInfo(monitoringRequest *MonitoringRequestParams, metricName string) (string, string, bool) { - queryType := monitoringRequest.QueryType - - paramValues := monitoringRequest.Params - - rule := MakePodPromQL(metricName, monitoringRequest.NsName, monitoringRequest.NodeId, monitoringRequest.PodName, monitoringRequest.ResourcesFilter) - params := makeRequestParamString(rule, paramValues) - return queryType, params, rule == "" -} - -func AssemblePVCMetricRequestInfo(monitoringRequest *MonitoringRequestParams, metricName string) (string, string, bool) { - queryType := monitoringRequest.QueryType - - paramValues := monitoringRequest.Params - - rule := MakePVCPromQL(metricName, monitoringRequest.NsName, monitoringRequest.PVCName, monitoringRequest.StorageClassName, monitoringRequest.ResourcesFilter) - params := makeRequestParamString(rule, paramValues) - return queryType, params, rule == "" -} - -func GetNodeAddressInfo() *map[string][]v1.NodeAddress { - nodeLister := informers.SharedInformerFactory().Core().V1().Nodes().Lister() - nodes, err := nodeLister.List(labels.Everything()) - - if err != nil { - klog.Errorln(err.Error()) - } - - var nodeAddress = make(map[string][]v1.NodeAddress) - - for _, node := range nodes { - nodeAddress[node.Name] = node.Status.Addresses - } - return &nodeAddress -} - -func AddNodeAddressMetric(nodeMetric *FormatedMetric, nodeAddress *map[string][]v1.NodeAddress) { - - for i := 0; i < len(nodeMetric.Data.Result); i++ { - metricDesc := nodeMetric.Data.Result[i][ResultItemMetric] - metricDescMap, ensure := metricDesc.(map[string]interface{}) - if ensure { - if nodeId, exist := metricDescMap[ResultItemMetricResourceName]; exist { - addr, exist := (*nodeAddress)[nodeId.(string)] - if exist { - metricDescMap["address"] = addr - } - } - } - } -} - -func MonitorContainer(monitoringRequest *MonitoringRequestParams, metricName string) *FormatedMetric { - client, err := cs.ClientSets().Prometheus() - if err != nil { - klog.Error(err) - return nil - } - - queryType, params := AssembleContainerMetricRequestInfo(monitoringRequest, metricName) - metricsStr := client.SendMonitoringRequest(queryType, params) - res := ReformatJson(metricsStr, metricName, map[string]string{MetricLevelContainerName: ""}) - return res -} - -func AssembleContainerMetricRequestInfo(monitoringRequest *MonitoringRequestParams, metricName string) (string, string) { - queryType := monitoringRequest.QueryType - - paramValues := monitoringRequest.Params - rule := MakeContainerPromQL(monitoringRequest.NsName, monitoringRequest.NodeId, monitoringRequest.PodName, monitoringRequest.ContainerName, metricName, monitoringRequest.ResourcesFilter) - params := makeRequestParamString(rule, paramValues) - - return queryType, params -} - -func AssembleNamespaceMetricRequestInfo(monitoringRequest *MonitoringRequestParams, metricName string) (string, string) { - queryType := monitoringRequest.QueryType - - paramValues := monitoringRequest.Params - rule := MakeNamespacePromQL(monitoringRequest.NsName, monitoringRequest.ResourcesFilter, metricName) - params := makeRequestParamString(rule, paramValues) - - return queryType, params -} - -func AssembleNamespaceMetricRequestInfoByNamesapce(monitoringRequest *MonitoringRequestParams, namespace string, metricName string) (string, string) { - queryType := monitoringRequest.QueryType - - paramValues := monitoringRequest.Params - rule := MakeNamespacePromQL(namespace, monitoringRequest.ResourcesFilter, metricName) - - params := makeRequestParamString(rule, paramValues) - - return queryType, params -} - -func AssembleSpecificWorkspaceMetricRequestInfo(monitoringRequest *MonitoringRequestParams, namespaceList []string, workspace string, metricName string) (string, string) { - - nsFilter := "^(" + strings.Join(namespaceList, "|") + ")$" - - queryType := monitoringRequest.QueryType - - rule := MakeSpecificWorkspacePromQL(metricName, nsFilter, workspace) - paramValues := monitoringRequest.Params - params := makeRequestParamString(rule, paramValues) - return queryType, params -} - -func AssembleAllWorkspaceMetricRequestInfo(monitoringRequest *MonitoringRequestParams, namespaceList []string, metricName string) (string, string) { - var nsFilter = "^()$" - - if namespaceList != nil { - nsFilter = "^(" + strings.Join(namespaceList, "|") + ")$" - } - - queryType := monitoringRequest.QueryType - - rule := MakeAllWorkspacesPromQL(metricName, nsFilter) - paramValues := monitoringRequest.Params - params := makeRequestParamString(rule, paramValues) - return queryType, params -} - -func makeRequestParamString(rule string, paramValues url.Values) string { - - defer func() { - if err := recover(); err != nil { - klog.Errorln(err) - debug.PrintStack() - } - }() - - var values = make(url.Values) - for key, v := range paramValues { - values.Set(key, v[0]) - } - - values.Set("query", rule) - - params := values.Encode() - - return params -} - -func filterNamespace(nsFilter string, namespaceList []string) []string { - var newNSlist []string - if nsFilter == "" { - nsFilter = ".*" - } - for _, ns := range namespaceList { - bol, _ := regexp.MatchString(nsFilter, ns) - if bol { - newNSlist = append(newNSlist, ns) - } - } - return newNSlist -} - -func MonitorAllWorkspaces(monitoringRequest *MonitoringRequestParams) *FormatedLevelMetric { - metricsFilter := monitoringRequest.MetricsFilter - if strings.Trim(metricsFilter, " ") == "" { - metricsFilter = ".*" - } - var filterMetricsName []string - for _, metricName := range WorkspaceMetricsNames { - bol, err := regexp.MatchString(metricsFilter, metricName) - if err == nil && bol { - filterMetricsName = append(filterMetricsName, metricName) + // For monitoring pods on the specific node + // GET /nodes/{node}/pods/{pod} + if params.NodeName != "" { + if params.PodName != "" { + podSelector = fmt.Sprintf(`pod="%s", node="%s"`, params.PodName, params.NodeName) + } else { + podSelector = fmt.Sprintf(`pod=~"%s", node="%s"`, params.ResourcesFilter, params.NodeName) } } - var wgAll sync.WaitGroup - var wsAllch = make(chan *[]FormatedMetric, ChannelMaxCapacityWorkspaceMetric) - - wsMap := getAllWorkspaces() - - for ws := range wsMap { - // Only execute Prometheus queries for specific metrics on specific workspaces - bol, err := regexp.MatchString(monitoringRequest.ResourcesFilter, ws) - if err == nil && bol { - // a workspace - wgAll.Add(1) - go collectWorkspaceMetric(monitoringRequest, ws, filterMetricsName, &wgAll, wsAllch) - } - } - - wgAll.Wait() - close(wsAllch) - - fmtMetricMap := make(map[string]FormatedMetric) - for oneWsMetric := range wsAllch { - if oneWsMetric != nil { - // aggregate workspace metric - for _, metric := range *oneWsMetric { - fm, exist := fmtMetricMap[metric.MetricName] - if exist { - if metric.Status == "error" { - fm.Status = metric.Status - } - fm.Data.Result = append(fm.Data.Result, metric.Data.Result...) - fmtMetricMap[metric.MetricName] = fm - } else { - fmtMetricMap[metric.MetricName] = metric - } - } - } - } - - var metricArray = make([]FormatedMetric, 0) - for _, metric := range fmtMetricMap { - metricArray = append(metricArray, metric) - } - - return &FormatedLevelMetric{ - MetricsLevel: MetricLevelClusterWorkspace, - Results: metricArray, - } + return strings.NewReplacer("$1", workloadSelector, "$2", podSelector).Replace(exp) } -func collectWorkspaceMetric(monitoringRequest *MonitoringRequestParams, ws string, filterMetricsName []string, wgAll *sync.WaitGroup, wsAllch chan *[]FormatedMetric) { - client, err := cs.ClientSets().Prometheus() - if err != nil { - klog.Error(err) - return +func makePromqlForContainer(metricName string, params RequestParams) string { + var exp = metricsPromqlMap[metricName] + var containerSelector string + + if params.ContainerName != "" { + containerSelector = fmt.Sprintf(`pod_name="%s", namespace="%s", container_name="%s"`, params.PodName, params.NamespaceName, params.ContainerName) + } else { + containerSelector = fmt.Sprintf(`pod_name="%s", namespace="%s", container_name=~"%s"`, params.PodName, params.NamespaceName, params.ResourcesFilter) } - defer wgAll.Done() - var wg sync.WaitGroup - var ch = make(chan *FormatedMetric, ChannelMaxCapacity) - namespaceArray, err := workspaces.WorkspaceNamespaces(ws) - if err != nil { - klog.Errorln(err) - } - - // add by namespace - for _, metricName := range filterMetricsName { - wg.Add(1) - go func(metricName string) { - - queryType, params := AssembleSpecificWorkspaceMetricRequestInfo(monitoringRequest, namespaceArray, ws, metricName) - metricsStr := client.SendMonitoringRequest(queryType, params) - ch <- ReformatJson(metricsStr, metricName, map[string]string{ResultItemMetricResourceName: ws}) - wg.Done() - }(metricName) - } - - wg.Wait() - close(ch) - - var metricsArray []FormatedMetric - for oneMetric := range ch { - if oneMetric != nil { - // add "workspace" to oneMetric "metric" field - for i := 0; i < len(oneMetric.Data.Result); i++ { - tmap, sure := oneMetric.Data.Result[i][ResultItemMetric].(map[string]interface{}) - if sure { - tmap[MetricLevelWorkspace] = ws - oneMetric.Data.Result[i][ResultItemMetric] = tmap - } - } - metricsArray = append(metricsArray, *oneMetric) - } - } - - wsAllch <- &metricsArray + return strings.Replace(exp, "$1", containerSelector, -1) } -func GetClusterLevelMetrics(monitoringRequest *MonitoringRequestParams) *FormatedLevelMetric { - client, err := cs.ClientSets().Prometheus() - if err != nil { - return nil - } +func makePromqlForPVC(metricName string, params RequestParams) string { + var exp = metricsPromqlMap[metricName] + var pvcSelector string - metricsFilter := monitoringRequest.MetricsFilter - if metricsFilter == "" { - metricsFilter = ".*" - } - - var ch = make(chan *FormatedMetric, ChannelMaxCapacity) - var wg sync.WaitGroup - - for _, metricName := range ClusterMetricsNames { - matched, err := regexp.MatchString(metricsFilter, metricName) - if err == nil && matched { - wg.Add(1) - go func(metricName string) { - queryType, params := AssembleClusterMetricRequestInfo(monitoringRequest, metricName) - metricsStr := client.SendMonitoringRequest(queryType, params) - ch <- ReformatJson(metricsStr, metricName, map[string]string{MetricLevelCluster: "local"}) - wg.Done() - }(metricName) + // For monitoring persistentvolumeclaims in the specific namespace + // GET /namespaces/{namespace}/persistentvolumeclaims/{persistentvolumeclaim} or + // GET /namespaces/{namespace}/persistentvolumeclaims + if params.NamespaceName != "" { + if params.PVCName != "" { + pvcSelector = fmt.Sprintf(`namespace="%s", persistentvolumeclaim="%s"`, params.NamespaceName, params.PVCName) + } else { + pvcSelector = fmt.Sprintf(`namespace="%s", persistentvolumeclaim=~"%s"`, params.NamespaceName, params.ResourcesFilter) } + return strings.Replace(exp, "$1", pvcSelector, -1) } - wg.Wait() - close(ch) - - var metricsArray []FormatedMetric - - for oneMetric := range ch { - if oneMetric != nil { - metricsArray = append(metricsArray, *oneMetric) - } - } - - return &FormatedLevelMetric{ - MetricsLevel: MetricLevelCluster, - Results: metricsArray, + // For monitoring persistentvolumeclaims of the specific storageclass + // GET /storageclasses/{storageclass}/persistentvolumeclaims + if params.StorageClassName != "" { + pvcSelector = fmt.Sprintf(`storageclass="%s", persistentvolumeclaim=~"%s"`, params.StorageClassName, params.ResourcesFilter) } + return strings.Replace(exp, "$1", pvcSelector, -1) } -func GetNodeLevelMetrics(monitoringRequest *MonitoringRequestParams) *FormatedLevelMetric { - client, err := cs.ClientSets().Prometheus() - if err != nil { - return nil - } - - metricsFilter := monitoringRequest.MetricsFilter - if metricsFilter == "" { - metricsFilter = ".*" - } - - var ch = make(chan *FormatedMetric, ChannelMaxCapacity) - var wg sync.WaitGroup - - for _, metricName := range NodeMetricsNames { - matched, err := regexp.MatchString(metricsFilter, metricName) - if err == nil && matched { - wg.Add(1) - go func(metricName string) { - queryType, params := AssembleNodeMetricRequestInfo(monitoringRequest, metricName) - metricsStr := client.SendMonitoringRequest(queryType, params) - ch <- ReformatJson(metricsStr, metricName, map[string]string{MetricLevelNode: ""}) - wg.Done() - }(metricName) - } - } - - wg.Wait() - close(ch) - - var metricsArray []FormatedMetric - - for oneMetric := range ch { - if oneMetric != nil { - metricsArray = append(metricsArray, *oneMetric) - } - } - - return &FormatedLevelMetric{ - MetricsLevel: MetricLevelNode, - Results: metricsArray, - } +func makePromqlForComponent(metricName string, _ RequestParams) string { + return metricsPromqlMap[metricName] } -func GetWorkspaceLevelMetrics(monitoringRequest *MonitoringRequestParams) *FormatedLevelMetric { - client, err := cs.ClientSets().Prometheus() - if err != nil { - klog.Error(err) - return nil - } +func GetClusterStatistics() *Response { - metricsFilter := monitoringRequest.MetricsFilter - if metricsFilter == "" { - metricsFilter = ".*" - } + now := time.Now().Unix() - var ch = make(chan *FormatedMetric, ChannelMaxCapacity) - var wg sync.WaitGroup + var metricsArray []APIResponse + workspaceStats := APIResponse{MetricName: MetricClusterWorkspaceCount} + devopsStats := APIResponse{MetricName: MetricClusterDevopsCount} + namespaceStats := APIResponse{MetricName: MetricClusterNamespaceCount} + accountStats := APIResponse{MetricName: MetricClusterAccountCount} - // a specific workspace's metrics - if monitoringRequest.WsName != "" { - namespaceArray, err := workspaces.WorkspaceNamespaces(monitoringRequest.WsName) + wg := sync.WaitGroup{} + wg.Add(4) + + go func() { + num, err := workspaces.WorkspaceCount() if err != nil { - klog.Errorln(err.Error()) - } - namespaceArray = filterNamespace(monitoringRequest.ResourcesFilter, namespaceArray) - - if monitoringRequest.Tp == "rank" { - for _, metricName := range NamespaceMetricsNames { - if metricName == MetricNameWorkspaceAllProjectCount { - continue - } - - matched, err := regexp.MatchString(metricsFilter, metricName) - if err != nil || !matched { - continue - } - - wg.Add(1) - go func(metricName string) { - - var chForOneMetric = make(chan *FormatedMetric, ChannelMaxCapacity) - var wgForOneMetric sync.WaitGroup - - for _, ns := range namespaceArray { - wgForOneMetric.Add(1) - go func(metricName string, namespace string) { - - queryType, params := AssembleNamespaceMetricRequestInfoByNamesapce(monitoringRequest, namespace, metricName) - metricsStr := client.SendMonitoringRequest(queryType, params) - chForOneMetric <- ReformatJson(metricsStr, metricName, map[string]string{ResultItemMetricResourceName: namespace}) - wgForOneMetric.Done() - }(metricName, ns) - } - - wgForOneMetric.Wait() - close(chForOneMetric) - - // ranking is for vector type result only - aggregatedResult := FormatedMetric{MetricName: metricName, Status: MetricStatusSuccess, Data: FormatedMetricData{Result: []map[string]interface{}{}, ResultType: ResultTypeVector}} - - for oneMetric := range chForOneMetric { - - if oneMetric != nil { - - // append .data.result[0] - if len(oneMetric.Data.Result) > 0 { - aggregatedResult.Data.Result = append(aggregatedResult.Data.Result, oneMetric.Data.Result[0]) - } - } - } - - ch <- &aggregatedResult - wg.Done() - }(metricName) - - } - + klog.Errorln(err) + workspaceStats.Status = "error" } else { - - workspace := monitoringRequest.WsName - - for _, metricName := range WorkspaceMetricsNames { - - if metricName == MetricNameWorkspaceAllProjectCount { - continue - } - - matched, err := regexp.MatchString(metricsFilter, metricName) - if err == nil && matched { - wg.Add(1) - go func(metricName string, workspace string) { - queryType, params := AssembleSpecificWorkspaceMetricRequestInfo(monitoringRequest, namespaceArray, workspace, metricName) - metricsStr := client.SendMonitoringRequest(queryType, params) - ch <- ReformatJson(metricsStr, metricName, map[string]string{ResultItemMetricResourceName: workspace}) - wg.Done() - }(metricName, workspace) - } - } + workspaceStats.withMetricResult(now, num) } - } else { - // sum all workspaces - for _, metricName := range WorkspaceMetricsNames { - matched, err := regexp.MatchString(metricsFilter, metricName) - if err == nil && matched { - - wg.Add(1) - - go func(metricName string) { - queryType, params := AssembleAllWorkspaceMetricRequestInfo(monitoringRequest, nil, metricName) - metricsStr := client.SendMonitoringRequest(queryType, params) - ch <- ReformatJson(metricsStr, metricName, map[string]string{MetricLevelWorkspace: "workspaces"}) - - wg.Done() - }(metricName) - } - } - } - - wg.Wait() - close(ch) - - var metricsArray []FormatedMetric - - for oneMetric := range ch { - if oneMetric != nil { - metricsArray = append(metricsArray, *oneMetric) - } - } - - return &FormatedLevelMetric{ - MetricsLevel: MetricLevelWorkspace, - Results: metricsArray, - } -} - -func GetNamespaceLevelMetrics(monitoringRequest *MonitoringRequestParams) *FormatedLevelMetric { - client, err := cs.ClientSets().Prometheus() - if err != nil { - klog.Error(err) - return nil - } - - metricsFilter := monitoringRequest.MetricsFilter - if metricsFilter == "" { - metricsFilter = ".*" - } - - var ch = make(chan *FormatedMetric, ChannelMaxCapacity) - var wg sync.WaitGroup - - for _, metricName := range NamespaceMetricsNames { - matched, err := regexp.MatchString(metricsFilter, metricName) - if err == nil && matched { - wg.Add(1) - go func(metricName string) { - - queryType, params := AssembleNamespaceMetricRequestInfo(monitoringRequest, metricName) - metricsStr := client.SendMonitoringRequest(queryType, params) - - rawResult := ReformatJson(metricsStr, metricName, map[string]string{MetricLevelNamespace: ""}) - ch <- rawResult - - wg.Done() - }(metricName) - } - } - - wg.Wait() - close(ch) - - var metricsArray []FormatedMetric - - for oneMetric := range ch { - if oneMetric != nil { - metricsArray = append(metricsArray, *oneMetric) - } - } - - return &FormatedLevelMetric{ - MetricsLevel: MetricLevelNamespace, - Results: metricsArray, - } -} - -func GetWorkloadLevelMetrics(monitoringRequest *MonitoringRequestParams) *FormatedLevelMetric { - client, err := cs.ClientSets().Prometheus() - if err != nil { - klog.Error(err) - return nil - } - - metricsFilter := monitoringRequest.MetricsFilter - if metricsFilter == "" { - metricsFilter = ".*" - } - - var ch = make(chan *FormatedMetric, ChannelMaxCapacity) - var wg sync.WaitGroup - - if monitoringRequest.WorkloadName == "" { - for _, metricName := range WorkloadMetricsNames { - matched, err := regexp.MatchString(metricsFilter, metricName) - if err == nil && matched { - wg.Add(1) - go func(metricName string) { - queryType, params := AssembleAllWorkloadMetricRequestInfo(monitoringRequest, metricName) - metricsStr := client.SendMonitoringRequest(queryType, params) - reformattedResult := ReformatJson(metricsStr, metricName, map[string]string{MetricLevelWorkload: ""}) - // no need to append a null result - ch <- reformattedResult - wg.Done() - }(metricName) - } - } - } else { - for _, metricName := range WorkloadMetricsNames { - bol, err := regexp.MatchString(metricsFilter, metricName) - if err == nil && bol { - wg.Add(1) - go func(metricName string) { - metricName = strings.TrimLeft(metricName, "workload_") - queryType, params, nullRule := AssembleSpecificWorkloadMetricRequestInfo(monitoringRequest, metricName) - if !nullRule { - metricsStr := client.SendMonitoringRequest(queryType, params) - fmtMetrics := ReformatJson(metricsStr, metricName, map[string]string{MetricLevelPodName: ""}) - unifyMetricHistoryTimeRange(fmtMetrics) - ch <- fmtMetrics - } - wg.Done() - }(metricName) - } - } - } - - wg.Wait() - close(ch) - - var metricsArray []FormatedMetric - - for oneMetric := range ch { - if oneMetric != nil { - metricsArray = append(metricsArray, *oneMetric) - } - } - - return &FormatedLevelMetric{ - MetricsLevel: MetricLevelWorkload, - Results: metricsArray, - } -} - -func GetPodLevelMetrics(monitoringRequest *MonitoringRequestParams) *FormatedLevelMetric { - client, err := cs.ClientSets().Prometheus() - if err != nil { - klog.Error(err) - return nil - } - - metricsFilter := monitoringRequest.MetricsFilter - if metricsFilter == "" { - metricsFilter = ".*" - } - - var ch = make(chan *FormatedMetric, ChannelMaxCapacity) - var wg sync.WaitGroup - - for _, metricName := range PodMetricsNames { - matched, err := regexp.MatchString(metricsFilter, metricName) - if err == nil && matched { - wg.Add(1) - go func(metricName string) { - queryType, params, nullRule := AssemblePodMetricRequestInfo(monitoringRequest, metricName) - if !nullRule { - metricsStr := client.SendMonitoringRequest(queryType, params) - ch <- ReformatJson(metricsStr, metricName, map[string]string{MetricLevelPodName: ""}) - } else { - ch <- nil - } - wg.Done() - }(metricName) - } - } - - wg.Wait() - close(ch) - - var metricsArray []FormatedMetric - - for oneMetric := range ch { - if oneMetric != nil { - metricsArray = append(metricsArray, *oneMetric) - } - } - - return &FormatedLevelMetric{ - MetricsLevel: MetricLevelPod, - Results: metricsArray, - } -} - -func GetContainerLevelMetrics(monitoringRequest *MonitoringRequestParams) *FormatedLevelMetric { - client, err := cs.ClientSets().Prometheus() - if err != nil { - klog.Error(err) - return nil - } - - metricsFilter := monitoringRequest.MetricsFilter - if metricsFilter == "" { - metricsFilter = ".*" - } - - var ch = make(chan *FormatedMetric, ChannelMaxCapacity) - var wg sync.WaitGroup - - for _, metricName := range ContainerMetricsNames { - matched, err := regexp.MatchString(metricsFilter, metricName) - if err == nil && matched { - wg.Add(1) - go func(metricName string) { - queryType, params := AssembleContainerMetricRequestInfo(monitoringRequest, metricName) - metricsStr := client.SendMonitoringRequest(queryType, params) - ch <- ReformatJson(metricsStr, metricName, map[string]string{MetricLevelContainerName: ""}) - wg.Done() - }(metricName) - } - } - - wg.Wait() - close(ch) - - var metricsArray []FormatedMetric - - for oneMetric := range ch { - if oneMetric != nil { - metricsArray = append(metricsArray, *oneMetric) - } - } - - return &FormatedLevelMetric{ - MetricsLevel: MetricLevelContainer, - Results: metricsArray, - } -} - -func GetPVCLevelMetrics(monitoringRequest *MonitoringRequestParams) *FormatedLevelMetric { - client, err := cs.ClientSets().Prometheus() - if err != nil { - klog.Error(err) - return nil - } - - metricsFilter := monitoringRequest.MetricsFilter - if metricsFilter == "" { - metricsFilter = ".*" - } - - var ch = make(chan *FormatedMetric, ChannelMaxCapacity) - var wg sync.WaitGroup - - for _, metricName := range PVCMetricsNames { - matched, err := regexp.MatchString(metricsFilter, metricName) - if err == nil && matched { - wg.Add(1) - go func(metricName string) { - queryType, params, nullRule := AssemblePVCMetricRequestInfo(monitoringRequest, metricName) - if !nullRule { - metricsStr := client.SendMonitoringRequest(queryType, params) - ch <- ReformatJson(metricsStr, metricName, map[string]string{MetricLevelPVC: ""}) - } else { - ch <- nil - } - wg.Done() - }(metricName) - } - } - - wg.Wait() - close(ch) - - var metricsArray []FormatedMetric - - for oneMetric := range ch { - if oneMetric != nil { - metricsArray = append(metricsArray, *oneMetric) - } - } - - return &FormatedLevelMetric{ - MetricsLevel: MetricLevelPVC, - Results: metricsArray, - } -} - -func GetComponentLevelMetrics(monitoringRequest *MonitoringRequestParams) *FormatedLevelMetric { - client, err := cs.ClientSets().Prometheus() - if err != nil { - klog.Error(err) - return nil - } - - metricsFilter := monitoringRequest.MetricsFilter - if metricsFilter == "" { - metricsFilter = ".*" - } - - var ch = make(chan *FormatedMetric, ChannelMaxCapacity) - var wg sync.WaitGroup - - for _, metricName := range ComponentMetricsNames { - matched, err := regexp.MatchString(metricsFilter, metricName) - if err == nil && matched { - wg.Add(1) - go func(metricName string) { - queryType, params := AssembleComponentRequestInfo(monitoringRequest, metricName) - metricsStr := client.SendMonitoringRequest(queryType, params) - formattedJson := ReformatJson(metricsStr, metricName, map[string]string{ResultItemMetricResourceName: monitoringRequest.ComponentName}) - - if metricName == EtcdServerList { - - nodeMap := make(map[string]string, 0) - - nodeAddress := GetNodeAddressInfo() - for nodeName, nodeInfo := range *nodeAddress { - - var nodeIp string - for _, item := range nodeInfo { - if item.Type == v1.NodeInternalIP { - nodeIp = item.Address - break - } - } - - nodeMap[nodeIp] = nodeName - } - - // add node_name label to metrics - for i := 0; i < len(formattedJson.Data.Result); i++ { - metricDesc := formattedJson.Data.Result[i][ResultItemMetric] - metricDescMap, ensure := metricDesc.(map[string]interface{}) - if ensure { - if nodeIp, exist := metricDescMap[ResultItemMetricNodeIp]; exist { - metricDescMap[ResultItemMetricNodeName] = nodeMap[nodeIp.(string)] - } - } - } - } - - ch <- formattedJson - wg.Done() - }(metricName) - } - } - - wg.Wait() - close(ch) - - var metricsArray []FormatedMetric - - for oneMetric := range ch { - if oneMetric != nil { - metricsArray = append(metricsArray, *oneMetric) - } - } - - return &FormatedLevelMetric{ - MetricsLevel: MetricLevelComponent, - Results: metricsArray, - } -} - -func GetAllWorkspacesStatistics() *FormatedLevelMetric { - - wg := sync.WaitGroup{} - var metricsArray []FormatedMetric - timestamp := time.Now().Unix() - - var orgResultItem *FormatedMetric - var devopsResultItem *FormatedMetric - var workspaceProjResultItem *FormatedMetric - var accountResultItem *FormatedMetric - - wg.Add(4) - - go func() { - orgNums, errOrg := workspaces.WorkspaceCount() - if errOrg != nil { - klog.Errorln(errOrg.Error()) - } - orgResultItem = getSpecificMetricItem(timestamp, MetricNameWorkspaceAllOrganizationCount, WorkspaceResourceKindOrganization, orgNums, errOrg) wg.Done() }() go func() { - devOpsProjectNums, errDevops := workspaces.GetAllDevOpsProjectsNums() - if errDevops != nil { - klog.Errorln(errDevops.Error()) + num, err := workspaces.GetAllDevOpsProjectsNums() + if err != nil { + klog.Errorln(err) + devopsStats.Status = "error" + } else { + devopsStats.withMetricResult(now, num) } - devopsResultItem = getSpecificMetricItem(timestamp, MetricNameWorkspaceAllDevopsCount, WorkspaceResourceKindDevops, devOpsProjectNums, errDevops) wg.Done() }() go func() { - projNums, errProj := workspaces.GetAllProjectNums() - if errProj != nil { - klog.Errorln(errProj.Error()) + num, err := workspaces.GetAllProjectNums() + if err != nil { + klog.Errorln(err) + namespaceStats.Status = "error" + } else { + namespaceStats.withMetricResult(now, num) } - workspaceProjResultItem = getSpecificMetricItem(timestamp, MetricNameWorkspaceAllProjectCount, WorkspaceResourceKindNamespace, projNums, errProj) wg.Done() }() go func() { - result, errAct := cs.ClientSets().KubeSphere().ListUsers() - if errAct != nil { - klog.Errorln(errAct.Error()) + ret, err := cs.ClientSets().KubeSphere().ListUsers() + if err != nil { + klog.Errorln(err) + accountStats.Status = "error" + } else { + accountStats.withMetricResult(now, ret.TotalCount) } - accountResultItem = getSpecificMetricItem(timestamp, MetricNameWorkspaceAllAccountCount, WorkspaceResourceKindAccount, result.TotalCount, errAct) wg.Done() }() wg.Wait() - metricsArray = append(metricsArray, *orgResultItem, *devopsResultItem, *workspaceProjResultItem, *accountResultItem) + metricsArray = append(metricsArray, workspaceStats, devopsStats, namespaceStats, accountStats) - return &FormatedLevelMetric{ - MetricsLevel: MetricLevelWorkspace, + return &Response{ + MetricsLevel: MonitorLevelCluster, Results: metricsArray, } } -func MonitorOneWorkspaceStatistics(wsName string) *FormatedLevelMetric { +func GetWorkspaceStatistics(workspaceName string) *Response { - var nsMetrics *FormatedMetric - var devopsMetrics *FormatedMetric - var memberMetrics *FormatedMetric - var roleMetrics *FormatedMetric + now := time.Now().Unix() + + var metricsArray []APIResponse + namespaceStats := APIResponse{MetricName: MetricWorkspaceNamespaceCount} + devopsStats := APIResponse{MetricName: MetricWorkspaceDevopsCount} + memberStats := APIResponse{MetricName: MetricWorkspaceMemberCount} + roleStats := APIResponse{MetricName: MetricWorkspaceRoleCount} wg := sync.WaitGroup{} wg.Add(4) - var fMetricsArray []FormatedMetric - timestamp := int64(time.Now().Unix()) - go func() { - // add namespaces(project) metric - namespaces, errNs := workspaces.WorkspaceNamespaces(wsName) - - if errNs != nil { - klog.Errorln(errNs.Error()) + num, err := workspaces.WorkspaceNamespaceCount(workspaceName) + if err != nil { + klog.Errorln(err) + namespaceStats.Status = "error" + } else { + namespaceStats.withMetricResult(now, num) } - nsMetrics = getSpecificMetricItem(timestamp, MetricNameWorkspaceNamespaceCount, WorkspaceResourceKindNamespace, len(namespaces), errNs) wg.Done() }() go func() { - devOpsProjects, errDevOps := workspaces.GetDevOpsProjects(wsName) - if errDevOps != nil { - klog.Errorln(errDevOps.Error()) + num, err := workspaces.GetDevOpsProjectsCount(workspaceName) + if err != nil { + klog.Errorln(err) + devopsStats.Status = "error" + } else { + devopsStats.withMetricResult(now, num) } - // add devops metric - devopsMetrics = getSpecificMetricItem(timestamp, MetricNameWorkspaceDevopsCount, WorkspaceResourceKindDevops, len(devOpsProjects), errDevOps) wg.Done() }() go func() { - count, errMemb := workspaces.WorkspaceUserCount(wsName) - if errMemb != nil { - klog.Errorln(errMemb.Error()) + num, err := workspaces.WorkspaceUserCount(workspaceName) + if err != nil { + klog.Errorln(err) + memberStats.Status = "error" + } else { + memberStats.withMetricResult(now, num) } - // add member metric - memberMetrics = getSpecificMetricItem(timestamp, MetricNameWorkspaceMemberCount, WorkspaceResourceKindMember, count, errMemb) wg.Done() }() go func() { - roles, errRole := workspaces.GetOrgRoles(wsName) - if errRole != nil { - klog.Errorln(errRole.Error()) + num, err := workspaces.GetOrgRolesCount(workspaceName) + if err != nil { + klog.Errorln(err) + roleStats.Status = "error" + } else { + roleStats.withMetricResult(now, num) } - // add role metric - roleMetrics = getSpecificMetricItem(timestamp, MetricNameWorkspaceRoleCount, WorkspaceResourceKindRole, len(roles), errRole) wg.Done() }() wg.Wait() - fMetricsArray = append(fMetricsArray, *nsMetrics, *devopsMetrics, *memberMetrics, *roleMetrics) + metricsArray = append(metricsArray, namespaceStats, devopsStats, memberStats, roleStats) - return &FormatedLevelMetric{ - MetricsLevel: MetricLevelWorkspace, - Results: fMetricsArray, + return &Response{ + MetricsLevel: MonitorLevelWorkspace, + Results: metricsArray, } } -func getSpecificMetricItem(timestamp int64, metricName string, resource string, count int, err error, resourceType ...string) *FormatedMetric { - var nsMetrics FormatedMetric - nsMetrics.MetricName = metricName - nsMetrics.Data.ResultType = ResultTypeVector - resultItem := make(map[string]interface{}) - tmp := make(map[string]string) - - if len(resourceType) > 0 { - tmp[resourceType[0]] = resource - } else { - tmp[ResultItemMetricResource] = resource +func (response *APIResponse) withMetricResult(time int64, value int) { + response.Status = "success" + response.Data = prometheus.QueryResult{ + ResultType: "vector", + Result: []prometheus.QueryValue{ + { + Value: []interface{}{time, value}, + }, + }, } - - if err == nil { - nsMetrics.Status = MetricStatusSuccess - } else { - nsMetrics.Status = MetricStatusError - resultItem["errormsg"] = err.Error() - } - - resultItem[ResultItemMetric] = tmp - resultItem[ResultItemValue] = []interface{}{timestamp, count} - nsMetrics.Data.Result = make([]map[string]interface{}, 1) - nsMetrics.Data.Result[0] = resultItem - return &nsMetrics -} - -func AssembleClusterMetricRequestInfo(monitoringRequest *MonitoringRequestParams, metricName string) (string, string) { - queryType := monitoringRequest.QueryType - paramValues := monitoringRequest.Params - rule := MakeClusterRule(metricName) - params := makeRequestParamString(rule, paramValues) - - return queryType, params -} - -func AssembleNodeMetricRequestInfo(monitoringRequest *MonitoringRequestParams, metricName string) (string, string) { - queryType := monitoringRequest.QueryType - paramValues := monitoringRequest.Params - rule := MakeNodeRule(monitoringRequest.NodeId, monitoringRequest.ResourcesFilter, metricName) - params := makeRequestParamString(rule, paramValues) - - return queryType, params -} - -func AssembleComponentRequestInfo(monitoringRequest *MonitoringRequestParams, metricName string) (string, string) { - queryType := monitoringRequest.QueryType - paramValues := monitoringRequest.Params - rule := MakeComponentRule(metricName) - params := makeRequestParamString(rule, paramValues) - - return queryType, params } diff --git a/pkg/models/metrics/metrics_rules.go b/pkg/models/metrics/metrics_rules.go new file mode 100644 index 000000000..6099457b1 --- /dev/null +++ b/pkg/models/metrics/metrics_rules.go @@ -0,0 +1,502 @@ +/* +Copyright 2019 The KubeSphere Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +const ( + // TODO: expose the following metrics in prometheus format + MetricClusterWorkspaceCount = "cluster_workspace_count" + MetricClusterAccountCount = "cluster_account_count" + MetricClusterNamespaceCount = "cluster_namespace_count" + MetricClusterDevopsCount = "cluster_devops_project_count" + + MetricWorkspaceNamespaceCount = "workspace_namespace_count" + MetricWorkspaceDevopsCount = "workspace_devops_project_count" + MetricWorkspaceMemberCount = "workspace_member_count" + MetricWorkspaceRoleCount = "workspace_role_count" +) + +var clusterMetrics = []string{ + "cluster_cpu_utilisation", + "cluster_cpu_usage", + "cluster_cpu_total", + "cluster_memory_utilisation", + "cluster_memory_available", + "cluster_memory_total", + "cluster_memory_usage_wo_cache", + "cluster_net_utilisation", + "cluster_net_bytes_transmitted", + "cluster_net_bytes_received", + "cluster_disk_read_iops", + "cluster_disk_write_iops", + "cluster_disk_read_throughput", + "cluster_disk_write_throughput", + "cluster_disk_size_usage", + "cluster_disk_size_utilisation", + "cluster_disk_size_capacity", + "cluster_disk_size_available", + "cluster_disk_inode_total", + "cluster_disk_inode_usage", + "cluster_disk_inode_utilisation", + "cluster_namespace_count", + "cluster_pod_count", + "cluster_pod_quota", + "cluster_pod_utilisation", + "cluster_pod_running_count", + "cluster_pod_succeeded_count", + "cluster_pod_abnormal_count", + "cluster_node_online", + "cluster_node_offline", + "cluster_node_total", + "cluster_cronjob_count", + "cluster_pvc_count", + "cluster_daemonset_count", + "cluster_deployment_count", + "cluster_endpoint_count", + "cluster_hpa_count", + "cluster_job_count", + "cluster_statefulset_count", + "cluster_replicaset_count", + "cluster_service_count", + "cluster_secret_count", + "cluster_pv_count", + "cluster_ingresses_extensions_count", + "cluster_load1", + "cluster_load5", + "cluster_load15", + "cluster_pod_abnormal_ratio", + "cluster_node_offline_ratio", +} + +var nodeMetrics = []string{ + "node_cpu_utilisation", + "node_cpu_total", + "node_cpu_usage", + "node_memory_utilisation", + "node_memory_usage_wo_cache", + "node_memory_available", + "node_memory_total", + "node_net_utilisation", + "node_net_bytes_transmitted", + "node_net_bytes_received", + "node_disk_read_iops", + "node_disk_write_iops", + "node_disk_read_throughput", + "node_disk_write_throughput", + "node_disk_size_capacity", + "node_disk_size_available", + "node_disk_size_usage", + "node_disk_size_utilisation", + "node_disk_inode_total", + "node_disk_inode_usage", + "node_disk_inode_utilisation", + "node_pod_count", + "node_pod_quota", + "node_pod_utilisation", + "node_pod_running_count", + "node_pod_succeeded_count", + "node_pod_abnormal_count", + "node_load1", + "node_load5", + "node_load15", + "node_pod_abnormal_ratio", +} + +var workspaceMetrics = []string{ + "workspace_cpu_usage", + "workspace_memory_usage", + "workspace_memory_usage_wo_cache", + "workspace_net_bytes_transmitted", + "workspace_net_bytes_received", + "workspace_pod_count", + "workspace_pod_running_count", + "workspace_pod_succeeded_count", + "workspace_pod_abnormal_count", + "workspace_ingresses_extensions_count", + "workspace_cronjob_count", + "workspace_pvc_count", + "workspace_daemonset_count", + "workspace_deployment_count", + "workspace_endpoint_count", + "workspace_hpa_count", + "workspace_job_count", + "workspace_statefulset_count", + "workspace_replicaset_count", + "workspace_service_count", + "workspace_secret_count", + "workspace_pod_abnormal_ratio", +} + +var namespaceMetrics = []string{ + "namespace_cpu_usage", + "namespace_memory_usage", + "namespace_memory_usage_wo_cache", + "namespace_net_bytes_transmitted", + "namespace_net_bytes_received", + "namespace_pod_count", + "namespace_pod_running_count", + "namespace_pod_succeeded_count", + "namespace_pod_abnormal_count", + "namespace_pod_abnormal_ratio", + "namespace_memory_limit_hard", + "namespace_cpu_limit_hard", + "namespace_pod_count_hard", + "namespace_cronjob_count", + "namespace_pvc_count", + "namespace_daemonset_count", + "namespace_deployment_count", + "namespace_endpoint_count", + "namespace_hpa_count", + "namespace_job_count", + "namespace_statefulset_count", + "namespace_replicaset_count", + "namespace_service_count", + "namespace_secret_count", + "namespace_ingresses_extensions_count", + "namespace_s2ibuilder_count", +} + +var workloadMetrics = []string{ + // TODO: the following four metrics are deprecated. + "workload_pod_cpu_usage", + "workload_pod_memory_usage_wo_cache", + "workload_pod_net_bytes_transmitted", + "workload_pod_net_bytes_received", + + "workload_cpu_usage", + "workload_memory_usage_wo_cache", + "workload_net_bytes_transmitted", + "workload_net_bytes_received", + + "workload_deployment_replica", + "workload_deployment_replica_available", + "workload_statefulset_replica", + "workload_statefulset_replica_available", + "workload_daemonset_replica", + "workload_daemonset_replica_available", + "workload_deployment_unavailable_replicas_ratio", + "workload_daemonset_unavailable_replicas_ratio", + "workload_statefulset_unavailable_replicas_ratio", +} + +var podMetrics = []string{ + "pod_cpu_usage", + "pod_memory_usage", + "pod_memory_usage_wo_cache", + "pod_net_bytes_transmitted", + "pod_net_bytes_received", +} + +var containerMetrics = []string{ + "container_cpu_usage", + "container_memory_usage", + "container_memory_usage_wo_cache", +} + +var pvcMetrics = []string{ + "pvc_inodes_available", + "pvc_inodes_used", + "pvc_inodes_total", + "pvc_inodes_utilisation", + "pvc_bytes_available", + "pvc_bytes_used", + "pvc_bytes_total", + "pvc_bytes_utilisation", +} + +var componentMetrics = []string{ + "etcd_server_list", + "etcd_server_total", + "etcd_server_up_total", + "etcd_server_has_leader", + "etcd_server_leader_changes", + "etcd_server_proposals_failed_rate", + "etcd_server_proposals_applied_rate", + "etcd_server_proposals_committed_rate", + "etcd_server_proposals_pending_count", + "etcd_mvcc_db_size", + "etcd_network_client_grpc_received_bytes", + "etcd_network_client_grpc_sent_bytes", + "etcd_grpc_call_rate", + "etcd_grpc_call_failed_rate", + "etcd_grpc_server_msg_received_rate", + "etcd_grpc_server_msg_sent_rate", + "etcd_disk_wal_fsync_duration", + "etcd_disk_wal_fsync_duration_quantile", + "etcd_disk_backend_commit_duration", + "etcd_disk_backend_commit_duration_quantile", + + "apiserver_up_sum", + "apiserver_request_rate", + "apiserver_request_by_verb_rate", + "apiserver_request_latencies", + "apiserver_request_by_verb_latencies", + + "scheduler_up_sum", + "scheduler_schedule_attempts", + "scheduler_schedule_attempt_rate", + "scheduler_e2e_scheduling_latency", + "scheduler_e2e_scheduling_latency_quantile", + + "controller_manager_up_sum", + + "coredns_up_sum", + "coredns_cache_hits", + "coredns_cache_misses", + "coredns_dns_request_rate", + "coredns_dns_request_duration", + "coredns_dns_request_duration_quantile", + "coredns_dns_request_by_type_rate", + "coredns_dns_request_by_rcode_rate", + "coredns_panic_rate", + "coredns_proxy_request_rate", + "coredns_proxy_request_duration", + "coredns_proxy_request_duration_quantile", + + "prometheus_up_sum", + "prometheus_tsdb_head_samples_appended_rate", +} + +var metricsPromqlMap = map[string]string{ + //cluster + "cluster_cpu_utilisation": ":node_cpu_utilisation:avg1m", + "cluster_cpu_usage": `round(:node_cpu_utilisation:avg1m * sum(node:node_num_cpu:sum), 0.001)`, + "cluster_cpu_total": "sum(node:node_num_cpu:sum)", + "cluster_memory_utilisation": ":node_memory_utilisation:", + "cluster_memory_available": "sum(node:node_memory_bytes_available:sum)", + "cluster_memory_total": "sum(node:node_memory_bytes_total:sum)", + "cluster_memory_usage_wo_cache": "sum(node:node_memory_bytes_total:sum) - sum(node:node_memory_bytes_available:sum)", + "cluster_net_utilisation": ":node_net_utilisation:sum_irate", + "cluster_net_bytes_transmitted": "sum(node:node_net_bytes_transmitted:sum_irate)", + "cluster_net_bytes_received": "sum(node:node_net_bytes_received:sum_irate)", + "cluster_disk_read_iops": "sum(node:data_volume_iops_reads:sum)", + "cluster_disk_write_iops": "sum(node:data_volume_iops_writes:sum)", + "cluster_disk_read_throughput": "sum(node:data_volume_throughput_bytes_read:sum)", + "cluster_disk_write_throughput": "sum(node:data_volume_throughput_bytes_written:sum)", + "cluster_disk_size_usage": `sum(max(node_filesystem_size_bytes{device=~"/dev/.*", device!~"/dev/loop\\d+", job="node-exporter"} - node_filesystem_avail_bytes{device=~"/dev/.*", device!~"/dev/loop\\d+", job="node-exporter"}) by (device, instance))`, + "cluster_disk_size_utilisation": `cluster:disk_utilization:ratio`, + "cluster_disk_size_capacity": `sum(max(node_filesystem_size_bytes{device=~"/dev/.*", device!~"/dev/loop\\d+", job="node-exporter"}) by (device, instance))`, + "cluster_disk_size_available": `sum(max(node_filesystem_avail_bytes{device=~"/dev/.*", device!~"/dev/loop\\d+", job="node-exporter"}) by (device, instance))`, + "cluster_disk_inode_total": `sum(node:node_inodes_total:)`, + "cluster_disk_inode_usage": `sum(node:node_inodes_total:) - sum(node:node_inodes_free:)`, + "cluster_disk_inode_utilisation": `cluster:disk_inode_utilization:ratio`, + "cluster_namespace_count": `count(kube_namespace_annotations)`, + "cluster_pod_count": `cluster:pod:sum`, + "cluster_pod_quota": `sum(max(kube_node_status_capacity_pods) by (node) unless on (node) (kube_node_status_condition{condition="Ready",status=~"unknown|false"} > 0))`, + "cluster_pod_utilisation": `cluster:pod_utilization:ratio`, + "cluster_pod_running_count": `cluster:pod_running:count`, + "cluster_pod_succeeded_count": `count(kube_pod_info unless on (pod) (kube_pod_status_phase{phase=~"Failed|Pending|Unknown|Running"} > 0) unless on (node) (kube_node_status_condition{condition="Ready",status=~"unknown|false"} > 0))`, + "cluster_pod_abnormal_count": `cluster:pod_abnormal:sum`, + "cluster_node_online": `sum(kube_node_status_condition{condition="Ready",status="true"})`, + "cluster_node_offline": `cluster:node_offline:sum`, + "cluster_node_total": `sum(kube_node_status_condition{condition="Ready"})`, + "cluster_cronjob_count": `sum(kube_cronjob_labels)`, + "cluster_pvc_count": `sum(kube_persistentvolumeclaim_info)`, + "cluster_daemonset_count": `sum(kube_daemonset_labels)`, + "cluster_deployment_count": `sum(kube_deployment_labels)`, + "cluster_endpoint_count": `sum(kube_endpoint_labels)`, + "cluster_hpa_count": `sum(kube_hpa_labels)`, + "cluster_job_count": `sum(kube_job_labels)`, + "cluster_statefulset_count": `sum(kube_statefulset_labels)`, + "cluster_replicaset_count": `count(kube_replicaset_created)`, + "cluster_service_count": `sum(kube_service_info)`, + "cluster_secret_count": `sum(kube_secret_info)`, + "cluster_pv_count": `sum(kube_persistentvolume_labels)`, + "cluster_ingresses_extensions_count": `sum(kube_ingress_labels)`, + "cluster_load1": `sum(node_load1{job="node-exporter"}) / sum(node:node_num_cpu:sum)`, + "cluster_load5": `sum(node_load5{job="node-exporter"}) / sum(node:node_num_cpu:sum)`, + "cluster_load15": `sum(node_load15{job="node-exporter"}) / sum(node:node_num_cpu:sum)`, + "cluster_pod_abnormal_ratio": `cluster:pod_abnormal:ratio`, + "cluster_node_offline_ratio": `cluster:node_offline:ratio`, + + //node + "node_cpu_utilisation": "node:node_cpu_utilisation:avg1m{$1}", + "node_cpu_total": "node:node_num_cpu:sum{$1}", + "node_memory_utilisation": "node:node_memory_utilisation:{$1}", + "node_memory_available": "node:node_memory_bytes_available:sum{$1}", + "node_memory_total": "node:node_memory_bytes_total:sum{$1}", + "node_memory_usage_wo_cache": "node:node_memory_bytes_total:sum{$1} - node:node_memory_bytes_available:sum{$1}", + "node_net_utilisation": "node:node_net_utilisation:sum_irate{$1}", + "node_net_bytes_transmitted": "node:node_net_bytes_transmitted:sum_irate{$1}", + "node_net_bytes_received": "node:node_net_bytes_received:sum_irate{$1}", + "node_disk_read_iops": "node:data_volume_iops_reads:sum{$1}", + "node_disk_write_iops": "node:data_volume_iops_writes:sum{$1}", + "node_disk_read_throughput": "node:data_volume_throughput_bytes_read:sum{$1}", + "node_disk_write_throughput": "node:data_volume_throughput_bytes_written:sum{$1}", + "node_disk_size_capacity": `sum(max(node_filesystem_size_bytes{device=~"/dev/.*", device!~"/dev/loop\\d+", job="node-exporter"} * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{$1}) by (device, node)) by (node)`, + "node_disk_size_available": `node:disk_space_available:{$1}`, + "node_disk_size_usage": `sum(max((node_filesystem_size_bytes{device=~"/dev/.*", device!~"/dev/loop\\d+", job="node-exporter"} - node_filesystem_avail_bytes{device=~"/dev/.*", device!~"/dev/loop\\d+", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{$1}) by (device, node)) by (node)`, + "node_disk_size_utilisation": `node:disk_space_utilization:ratio{$1}`, + "node_disk_inode_total": `node:node_inodes_total:{$1}`, + "node_disk_inode_usage": `node:node_inodes_total:{$1} - node:node_inodes_free:{$1}`, + "node_disk_inode_utilisation": `node:disk_inode_utilization:ratio{$1}`, + "node_pod_count": `node:pod_count:sum{$1}`, + "node_pod_quota": `max(kube_node_status_capacity_pods{$1}) by (node) unless on (node) (kube_node_status_condition{condition="Ready",status=~"unknown|false"} > 0)`, + "node_pod_utilisation": `node:pod_utilization:ratio{$1}`, + "node_pod_running_count": `node:pod_running:count{$1}`, + "node_pod_succeeded_count": `node:pod_succeeded:count{$1}`, + "node_pod_abnormal_count": `node:pod_abnormal:count{$1}`, + "node_cpu_usage": `round(node:node_cpu_utilisation:avg1m{$1} * node:node_num_cpu:sum{$1}, 0.001)`, + "node_load1": `node:load1:ratio{$1}`, + "node_load5": `node:load5:ratio{$1}`, + "node_load15": `node:load15:ratio{$1}`, + "node_pod_abnormal_ratio": `node:pod_abnormal:ratio{$1}`, + + // workspace + "workspace_cpu_usage": `round(sum by (label_kubesphere_io_workspace) (namespace:container_cpu_usage_seconds_total:sum_rate{namespace!="", $1}), 0.001)`, + "workspace_memory_usage": `sum by (label_kubesphere_io_workspace) (namespace:container_memory_usage_bytes:sum{namespace!="", $1})`, + "workspace_memory_usage_wo_cache": `sum by (label_kubesphere_io_workspace) (namespace:container_memory_usage_bytes_wo_cache:sum{namespace!="", $1})`, + "workspace_net_bytes_transmitted": `sum by (label_kubesphere_io_workspace) (sum by (namespace) (irate(container_network_transmit_bytes_total{namespace!="", pod_name!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m])) * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, + "workspace_net_bytes_received": `sum by (label_kubesphere_io_workspace) (sum by (namespace) (irate(container_network_receive_bytes_total{namespace!="", pod_name!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m])) * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{})`, + "workspace_pod_count": `sum by (label_kubesphere_io_workspace) (kube_pod_status_phase{phase!~"Failed|Succeeded", namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{$1}))`, + "workspace_pod_running_count": `sum by (label_kubesphere_io_workspace) (kube_pod_status_phase{phase="Running", namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{$1}))`, + "workspace_pod_succeeded_count": `sum by (label_kubesphere_io_workspace) (kube_pod_status_phase{phase="Succeeded", namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{$1}))`, + "workspace_pod_abnormal_count": `count by (label_kubesphere_io_workspace) ((kube_pod_info{node!=""} unless on (pod, namespace) (kube_pod_status_phase{job="kube-state-metrics", phase="Succeeded"}>0) unless on (pod, namespace) ((kube_pod_status_ready{job="kube-state-metrics", condition="true"}>0) and on (pod, namespace) (kube_pod_status_phase{job="kube-state-metrics", phase="Running"}>0)) unless on (pod, namespace) (kube_pod_container_status_waiting_reason{job="kube-state-metrics", reason="ContainerCreating"}>0)) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{$1}))`, + "workspace_ingresses_extensions_count": `sum by (label_kubesphere_io_workspace) (kube_ingress_labels{namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{$1}))`, + "workspace_cronjob_count": `sum by (label_kubesphere_io_workspace) (kube_cronjob_labels{namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{$1}))`, + "workspace_pvc_count": `sum by (label_kubesphere_io_workspace) (kube_persistentvolumeclaim_info{namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{$1}))`, + "workspace_daemonset_count": `sum by (label_kubesphere_io_workspace) (kube_daemonset_labels{namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{$1}))`, + "workspace_deployment_count": `sum by (label_kubesphere_io_workspace) (kube_deployment_labels{namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{$1}))`, + "workspace_endpoint_count": `sum by (label_kubesphere_io_workspace) (kube_endpoint_labels{namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{$1}))`, + "workspace_hpa_count": `sum by (label_kubesphere_io_workspace) (kube_hpa_labels{namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{$1}))`, + "workspace_job_count": `sum by (label_kubesphere_io_workspace) (kube_job_labels{namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{$1}))`, + "workspace_statefulset_count": `sum by (label_kubesphere_io_workspace) (kube_statefulset_labels{namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{$1}))`, + "workspace_replicaset_count": `count by (label_kubesphere_io_workspace) (kube_replicaset_created{namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{$1}))`, + "workspace_service_count": `sum by (label_kubesphere_io_workspace) (kube_service_info{namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{$1}))`, + "workspace_secret_count": `sum by (label_kubesphere_io_workspace) (kube_secret_info{namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{$1}))`, + "workspace_pod_abnormal_ratio": `count by (label_kubesphere_io_workspace) ((kube_pod_info{node!=""} unless on (pod, namespace) (kube_pod_status_phase{job="kube-state-metrics", phase="Succeeded"}>0) unless on (pod, namespace) ((kube_pod_status_ready{job="kube-state-metrics", condition="true"}>0) and on (pod, namespace) (kube_pod_status_phase{job="kube-state-metrics", phase="Running"}>0)) unless on (pod, namespace) (kube_pod_container_status_waiting_reason{job="kube-state-metrics", reason="ContainerCreating"}>0)) * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1}) / sum by (label_kubesphere_io_workspace) (kube_pod_status_phase{phase!="Succeeded", namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{$1}))`, + + //namespace + "namespace_cpu_usage": `round(namespace:container_cpu_usage_seconds_total:sum_rate{namespace!="", $1}, 0.001)`, + "namespace_memory_usage": `namespace:container_memory_usage_bytes:sum{namespace!="", $1}`, + "namespace_memory_usage_wo_cache": `namespace:container_memory_usage_bytes_wo_cache:sum{namespace!="", $1}`, + "namespace_net_bytes_transmitted": `sum by (namespace) (irate(container_network_transmit_bytes_total{namespace!="", pod_name!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m]) * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, + "namespace_net_bytes_received": `sum by (namespace) (irate(container_network_receive_bytes_total{namespace!="", pod_name!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m]) * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, + "namespace_pod_count": `sum by (namespace) (kube_pod_status_phase{phase!~"Failed|Succeeded", namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, + "namespace_pod_running_count": `sum by (namespace) (kube_pod_status_phase{phase="Running", namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, + "namespace_pod_succeeded_count": `sum by (namespace) (kube_pod_status_phase{phase="Succeeded", namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, + "namespace_pod_abnormal_count": `namespace:pod_abnormal:count{namespace!="", $1}`, + "namespace_pod_abnormal_ratio": `namespace:pod_abnormal:ratio{namespace!="", $1}`, + "namespace_memory_limit_hard": `min by (namespace) (kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", resource="limits.memory"} * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, + "namespace_cpu_limit_hard": `min by (namespace) (kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", resource="limits.cpu"} * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, + "namespace_pod_count_hard": `min by (namespace) (kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", resource="count/pods"} * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, + "namespace_cronjob_count": `sum by (namespace) (kube_cronjob_labels{namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, + "namespace_pvc_count": `sum by (namespace) (kube_persistentvolumeclaim_info{namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, + "namespace_daemonset_count": `sum by (namespace) (kube_daemonset_labels{namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, + "namespace_deployment_count": `sum by (namespace) (kube_deployment_labels{namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, + "namespace_endpoint_count": `sum by (namespace) (kube_endpoint_labels{namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, + "namespace_hpa_count": `sum by (namespace) (kube_hpa_labels{namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, + "namespace_job_count": `sum by (namespace) (kube_job_labels{namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, + "namespace_statefulset_count": `sum by (namespace) (kube_statefulset_labels{namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, + "namespace_replicaset_count": `count by (namespace) (kube_replicaset_created{namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, + "namespace_service_count": `sum by (namespace) (kube_service_info{namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, + "namespace_secret_count": `sum by (namespace) (kube_secret_info{namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, + "namespace_ingresses_extensions_count": `sum by (namespace) (kube_ingress_labels{namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, + "namespace_s2ibuilder_count": `sum by (namespace) (s2i_s2ibuilder_created{namespace!=""} * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels{$1})`, + + // workload + // TODO: the following four metrics are deprecated. + "workload_pod_cpu_usage": `round(namespace:workload_cpu_usage:sum{$1}, 0.001)`, + "workload_pod_memory_usage_wo_cache": `namespace:workload_memory_usage_wo_cache:sum{$1}`, + "workload_pod_net_bytes_transmitted": `namespace:workload_net_bytes_transmitted:sum_irate{$1}`, + "workload_pod_net_bytes_received": `namespace:workload_net_bytes_received:sum_irate{$1}`, + + "workload_cpu_usage": `round(namespace:workload_cpu_usage:sum{$1}, 0.001)`, + "workload_memory_usage_wo_cache": `namespace:workload_memory_usage_wo_cache:sum{$1}`, + "workload_net_bytes_transmitted": `namespace:workload_net_bytes_transmitted:sum_irate{$1}`, + "workload_net_bytes_received": `namespace:workload_net_bytes_received:sum_irate{$1}`, + + "workload_deployment_replica": `label_join(sum (label_join(label_replace(kube_deployment_spec_replicas{$2}, "owner_kind", "Deployment", "", ""), "workload", "", "deployment")) by (namespace, owner_kind, workload), "workload", ":", "owner_kind", "workload")`, + "workload_deployment_replica_available": `label_join(sum (label_join(label_replace(kube_deployment_status_replicas_available{$2}, "owner_kind", "Deployment", "", ""), "workload", "", "deployment")) by (namespace, owner_kind, workload), "workload", ":", "owner_kind", "workload")`, + "workload_statefulset_replica": `label_join(sum (label_join(label_replace(kube_statefulset_replicas{$2}, "owner_kind", "StatefulSet", "", ""), "workload", "", "statefulset")) by (namespace, owner_kind, workload), "workload", ":", "owner_kind", "workload")`, + "workload_statefulset_replica_available": `label_join(sum (label_join(label_replace(kube_statefulset_status_replicas_current{$2}, "owner_kind", "StatefulSet", "", ""), "workload", "", "statefulset")) by (namespace, owner_kind, workload), "workload", ":", "owner_kind", "workload")`, + "workload_daemonset_replica": `label_join(sum (label_join(label_replace(kube_daemonset_status_desired_number_scheduled{$2}, "owner_kind", "DaemonSet", "", ""), "workload", "", "daemonset")) by (namespace, owner_kind, workload), "workload", ":", "owner_kind", "workload")`, + "workload_daemonset_replica_available": `label_join(sum (label_join(label_replace(kube_daemonset_status_number_available{$2}, "owner_kind", "DaemonSet", "", ""), "workload", "", "daemonset")) by (namespace, owner_kind, workload), "workload", ":", "owner_kind", "workload")`, + "workload_deployment_unavailable_replicas_ratio": `namespace:deployment_unavailable_replicas:ratio{$1}`, + "workload_daemonset_unavailable_replicas_ratio": `namespace:daemonset_unavailable_replicas:ratio{$1}`, + "workload_statefulset_unavailable_replicas_ratio": `namespace:statefulset_unavailable_replicas:ratio{$1}`, + + // pod + "pod_cpu_usage": `round(label_join(sum by (namespace, pod_name) (irate(container_cpu_usage_seconds_total{job="kubelet", pod_name!="", image!=""}[5m])), "pod", "", "pod_name") * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}, 0.001)`, + "pod_memory_usage": `label_join(sum by (namespace, pod_name) (container_memory_usage_bytes{job="kubelet", pod_name!="", image!=""}), "pod", "", "pod_name") * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`, + "pod_memory_usage_wo_cache": `label_join(sum by (namespace, pod_name) (container_memory_usage_bytes{job="kubelet", pod_name!="", image!=""} - container_memory_cache{job="kubelet", pod_name!="", image!=""}), "pod", "", "pod_name") * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`, + "pod_net_bytes_transmitted": `label_join(sum by (namespace, pod_name) (irate(container_network_transmit_bytes_total{pod_name!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m])), "pod", "", "pod_name") * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`, + "pod_net_bytes_received": `label_join(sum by (namespace, pod_name) (irate(container_network_receive_bytes_total{pod_name!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m])), "pod", "", "pod_name") * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`, + + // container + "container_cpu_usage": `round(sum by (namespace, pod_name, container_name) (irate(container_cpu_usage_seconds_total{job="kubelet", container_name!="POD", container_name!="", image!="", $1}[5m])), 0.001)`, + "container_memory_usage": `sum by (namespace, pod_name, container_name) (container_memory_usage_bytes{job="kubelet", container_name!="POD", container_name!="", image!="", $1})`, + "container_memory_usage_wo_cache": `sum by (namespace, pod_name, container_name) (container_memory_usage_bytes{job="kubelet", container_name!="POD", container_name!="", image!="", $1} - container_memory_cache{job="kubelet", container_name!="POD", container_name!="", image!="", $1})`, + + // pvc + "pvc_inodes_available": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_inodes_free) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{$1}`, + "pvc_inodes_used": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_inodes_used) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{$1}`, + "pvc_inodes_total": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_inodes) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{$1}`, + "pvc_inodes_utilisation": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_inodes_used / kubelet_volume_stats_inodes) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{$1}`, + "pvc_bytes_available": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_available_bytes) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{$1}`, + "pvc_bytes_used": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_used_bytes) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{$1}`, + "pvc_bytes_total": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{$1}`, + "pvc_bytes_utilisation": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_used_bytes / kubelet_volume_stats_capacity_bytes) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{$1}`, + + // component + "etcd_server_list": `label_replace(up{job="etcd"}, "node_ip", "$1", "instance", "(.*):.*")`, + "etcd_server_total": `count(up{job="etcd"})`, + "etcd_server_up_total": `etcd:up:sum`, + "etcd_server_has_leader": `label_replace(etcd_server_has_leader, "node_ip", "$1", "instance", "(.*):.*")`, + "etcd_server_leader_changes": `label_replace(etcd:etcd_server_leader_changes_seen:sum_changes, "node_ip", "$1", "node", "(.*)")`, + "etcd_server_proposals_failed_rate": `avg(etcd:etcd_server_proposals_failed:sum_irate)`, + "etcd_server_proposals_applied_rate": `avg(etcd:etcd_server_proposals_applied:sum_irate)`, + "etcd_server_proposals_committed_rate": `avg(etcd:etcd_server_proposals_committed:sum_irate)`, + "etcd_server_proposals_pending_count": `avg(etcd:etcd_server_proposals_pending:sum)`, + "etcd_mvcc_db_size": `avg(etcd:etcd_debugging_mvcc_db_total_size:sum)`, + "etcd_network_client_grpc_received_bytes": `sum(etcd:etcd_network_client_grpc_received_bytes:sum_irate)`, + "etcd_network_client_grpc_sent_bytes": `sum(etcd:etcd_network_client_grpc_sent_bytes:sum_irate)`, + "etcd_grpc_call_rate": `sum(etcd:grpc_server_started:sum_irate)`, + "etcd_grpc_call_failed_rate": `sum(etcd:grpc_server_handled:sum_irate)`, + "etcd_grpc_server_msg_received_rate": `sum(etcd:grpc_server_msg_received:sum_irate)`, + "etcd_grpc_server_msg_sent_rate": `sum(etcd:grpc_server_msg_sent:sum_irate)`, + "etcd_disk_wal_fsync_duration": `avg(etcd:etcd_disk_wal_fsync_duration:avg)`, + "etcd_disk_wal_fsync_duration_quantile": `avg(etcd:etcd_disk_wal_fsync_duration:histogram_quantile) by (quantile)`, + "etcd_disk_backend_commit_duration": `avg(etcd:etcd_disk_backend_commit_duration:avg)`, + "etcd_disk_backend_commit_duration_quantile": `avg(etcd:etcd_disk_backend_commit_duration:histogram_quantile) by (quantile)`, + + "apiserver_up_sum": `apiserver:up:sum`, + "apiserver_request_rate": `apiserver:apiserver_request_count:sum_irate`, + "apiserver_request_by_verb_rate": `apiserver:apiserver_request_count:sum_verb_irate`, + "apiserver_request_latencies": `apiserver:apiserver_request_latencies:avg`, + "apiserver_request_by_verb_latencies": `apiserver:apiserver_request_latencies:avg_by_verb`, + + "scheduler_up_sum": `scheduler:up:sum`, + "scheduler_schedule_attempts": `scheduler:scheduler_schedule_attempts:sum`, + "scheduler_schedule_attempt_rate": `scheduler:scheduler_schedule_attempts:sum_rate`, + "scheduler_e2e_scheduling_latency": `scheduler:scheduler_e2e_scheduling_latency:avg`, + "scheduler_e2e_scheduling_latency_quantile": `scheduler:scheduler_e2e_scheduling_latency:histogram_quantile`, + + "controller_manager_up_sum": `controller_manager:up:sum`, + + "coredns_up_sum": `coredns:up:sum`, + "coredns_cache_hits": `coredns:coredns_cache_hits_total:sum_irate`, + "coredns_cache_misses": `coredns:coredns_cache_misses:sum_irate`, + "coredns_dns_request_rate": `coredns:coredns_dns_request_count:sum_irate`, + "coredns_dns_request_duration": `coredns:coredns_dns_request_duration:avg`, + "coredns_dns_request_duration_quantile": `coredns:coredns_dns_request_duration:histogram_quantile`, + "coredns_dns_request_by_type_rate": `coredns:coredns_dns_request_type_count:sum_irate`, + "coredns_dns_request_by_rcode_rate": `coredns:coredns_dns_response_rcode_count:sum_irate`, + "coredns_panic_rate": `coredns:coredns_panic_count:sum_irate`, + "coredns_proxy_request_rate": `coredns:coredns_proxy_request_count:sum_irate`, + "coredns_proxy_request_duration": `coredns:coredns_proxy_request_duration:avg`, + "coredns_proxy_request_duration_quantile": `coredns:coredns_proxy_request_duration:histogram_quantile`, + + "prometheus_up_sum": `prometheus:up:sum`, + "prometheus_tsdb_head_samples_appended_rate": `prometheus:prometheus_tsdb_head_samples_appended:sum_rate`, +} diff --git a/pkg/models/metrics/metricsrule.go b/pkg/models/metrics/metricsrule.go deleted file mode 100644 index bfc876370..000000000 --- a/pkg/models/metrics/metricsrule.go +++ /dev/null @@ -1,284 +0,0 @@ -/* -Copyright 2018 The KubeSphere Authors. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package metrics - -import ( - "strings" -) - -// resources_filter = xxxx|xxxx -func MakeWorkloadPromQL(metricName, nsName, resources_filter, wkKind string) string { - - switch wkKind { - case "deployment": - wkKind = Deployment - case "daemonset": - wkKind = DaemonSet - case "statefulset": - wkKind = StatefulSet - } - - if wkKind == "" { - resources_filter = Any - } else if resources_filter == "" { - if strings.Contains(metricName, "pod") { - resources_filter = wkKind + ":" + Any - } else if strings.Contains(metricName, strings.ToLower(wkKind)) { - resources_filter = Any - } - } else { - var prefix string - - // The "workload_{deployment,statefulset,daemonset}_xxx" metric uses "deployment","statefulset" or "daemonset" label selectors - // which match exactly a workload name - // eg. kube_daemonset_status_number_unavailable{daemonset=~"^xxx$"} - if strings.Contains(metricName, "deployment") || strings.Contains(metricName, "daemonset") || strings.Contains(metricName, "statefulset") { - // to pass "resources_filter" to PromQL, we reformat it - prefix = "" - } else { - // While workload_{cpu,memory,net}_xxx metrics uses "workload" - // eg. namespace:workload_cpu_usage:sum{workload="Deployment:xxx"} - prefix = wkKind + ":" - } - - filters := strings.Split(resources_filter, "|") - // reshape it to match PromQL re2 syntax - resources_filter = "" - for i, filter := range filters { - - resources_filter += "^" + prefix + filter + "$" // eg. ^Deployment:xxx$ - - if i != len(filters)-1 { - resources_filter += "|" - } - } - } - - var promql = RulePromQLTmplMap[metricName] - promql = strings.Replace(promql, "$2", nsName, -1) - promql = strings.Replace(promql, "$3", resources_filter, -1) - - return promql -} - -func MakeSpecificWorkloadRule(wkKind, wkName, namespace string) string { - var rule = PodInfoRule - if namespace == "" { - namespace = ".*" - } - // alertnatives values: Deployment StatefulSet ReplicaSet DaemonSet - wkKind = strings.ToLower(wkKind) - - switch wkKind { - case "deployment": - wkKind = ReplicaSet - if wkName != "" { - wkName = "~\"^" + wkName + `-(\\w)+$"` - } else { - wkName = "~\".*\"" - } - rule = strings.Replace(rule, "$1", wkKind, -1) - rule = strings.Replace(rule, "$2", wkName, -1) - rule = strings.Replace(rule, "$3", namespace, -1) - return rule - case "replicaset": - wkKind = ReplicaSet - case "statefulset": - wkKind = StatefulSet - case "daemonset": - wkKind = DaemonSet - } - - if wkName == "" { - wkName = "~\".*\"" - } else { - wkName = "\"" + wkName + "\"" - } - - rule = strings.Replace(rule, "$1", wkKind, -1) - rule = strings.Replace(rule, "$2", wkName, -1) - rule = strings.Replace(rule, "$3", namespace, -1) - return rule -} - -func MakeAllWorkspacesPromQL(metricsName, nsFilter string) string { - - var promql = RulePromQLTmplMap[metricsName] - nsFilter = "!~\"" + nsFilter + "\"" - promql = strings.Replace(promql, "$1", nsFilter, -1) - return promql -} - -func MakeSpecificWorkspacePromQL(metricsName, nsFilter string, workspace string) string { - - var promql = RulePromQLTmplMap[metricsName] - - nsFilter = "=~\"" + nsFilter + "\"" - workspace = "=~\"^(" + workspace + ")$\"" - - promql = strings.Replace(promql, "$1", nsFilter, -1) - promql = strings.Replace(promql, "$2", workspace, -1) - return promql -} - -func MakeContainerPromQL(nsName, nodeId, podName, containerName, metricName, containerFilter string) string { - var promql string - - if nsName != "" { - // get container metrics from namespace-pod - promql = RulePromQLTmplMap[metricName] - promql = strings.Replace(promql, "$1", nsName, -1) - } else { - // get container metrics from node-pod - promql = RulePromQLTmplMap[metricName+"_node"] - promql = strings.Replace(promql, "$1", nodeId, -1) - } - - promql = strings.Replace(promql, "$2", podName, -1) - - if containerName == "" { - - if containerFilter == "" { - containerFilter = ".*" - } - promql = strings.Replace(promql, "$3", containerFilter, -1) - } else { - promql = strings.Replace(promql, "$3", containerName, -1) - } - - return promql -} - -func MakePodPromQL(metricName, nsName, nodeID, podName, podFilter string) string { - - if podFilter == "" { - podFilter = ".*" - } - - var promql = "" - if nsName != "" { - // get pod metrics by namespace - if podName != "" { - // specific pod - promql = RulePromQLTmplMap[metricName] - promql = strings.Replace(promql, "$1", nsName, -1) - promql = strings.Replace(promql, "$2", podName, -1) - - } else { - // all pods - metricName += "_all" - promql = RulePromQLTmplMap[metricName] - - promql = strings.Replace(promql, "$1", nsName, -1) - promql = strings.Replace(promql, "$2", podFilter, -1) - } - } else if nodeID != "" { - // get pod metrics by nodeid - metricName += "_node" - promql = RulePromQLTmplMap[metricName] - promql = strings.Replace(promql, "$3", nodeID, -1) - if podName != "" { - // specific pod - promql = strings.Replace(promql, "$2", podName, -1) - } else { - promql = strings.Replace(promql, "$2", podFilter, -1) - } - } - return promql -} - -func MakePVCPromQL(metricName, nsName, pvcName, scName, pvcFilter string) string { - if pvcFilter == "" { - pvcFilter = ".*" - } - - var promql = "" - if nsName != "" { - // get pvc metrics by namespace - if pvcName != "" { - // specific pvc - promql = RulePromQLTmplMap[metricName] - promql = strings.Replace(promql, "$1", nsName, -1) - promql = strings.Replace(promql, "$2", pvcName, -1) - } else { - // all pvc in a specific namespace - metricName += "_ns" - promql = RulePromQLTmplMap[metricName] - promql = strings.Replace(promql, "$1", nsName, -1) - promql = strings.Replace(promql, "$2", pvcFilter, -1) - } - } else { - if scName != "" { - // all pvc in a specific storageclass - metricName += "_sc" - promql = RulePromQLTmplMap[metricName] - promql = strings.Replace(promql, "$1", scName, -1) - } - } - return promql -} - -func MakeNamespacePromQL(nsName string, nsFilter string, metricsName string) string { - var recordingRule = RulePromQLTmplMap[metricsName] - - if nsName != "" { - nsFilter = nsName - } else { - if nsFilter == "" { - nsFilter = ".*" - } - } - recordingRule = strings.Replace(recordingRule, "$1", nsFilter, -1) - return recordingRule -} - -// cluster rule -func MakeClusterRule(metricsName string) string { - var rule = RulePromQLTmplMap[metricsName] - return rule -} - -// node rule -func MakeNodeRule(nodeID string, nodesFilter string, metricsName string) string { - var rule = RulePromQLTmplMap[metricsName] - - if nodesFilter == "" { - nodesFilter = ".*" - } - if strings.Contains(metricsName, "disk_size") || strings.Contains(metricsName, "pod") || strings.Contains(metricsName, "usage") || strings.Contains(metricsName, "inode") || strings.Contains(metricsName, "load") { - // disk size promql - if nodeID != "" { - nodesFilter = "{" + "node" + "=" + "\"" + nodeID + "\"" + "}" - } else { - nodesFilter = "{" + "node" + "=~" + "\"" + nodesFilter + "\"" + "}" - } - rule = strings.Replace(rule, "$1", nodesFilter, -1) - } else { - // cpu, memory, network, disk_iops rules - if nodeID != "" { - // specific node - rule = rule + "{" + "node" + "=" + "\"" + nodeID + "\"" + "}" - } else { - // all nodes or specific nodes filted with re2 syntax - rule = rule + "{" + "node" + "=~" + "\"" + nodesFilter + "\"" + "}" - } - } - - return rule -} - -func MakeComponentRule(metricsName string) string { - var rule = RulePromQLTmplMap[metricsName] - return rule -} diff --git a/pkg/models/metrics/metricsruleconst.go b/pkg/models/metrics/metricsruleconst.go deleted file mode 100644 index 812f18566..000000000 --- a/pkg/models/metrics/metricsruleconst.go +++ /dev/null @@ -1,776 +0,0 @@ -/* -Copyright 2018 The KubeSphere Authors. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package metrics - -const ( - ResultTypeVector = "vector" - ResultTypeMatrix = "matrix" - MetricStatus = "status" - MetricStatusError = "error" - MetricStatusSuccess = "success" - ResultItemMetric = "metric" - ResultItemMetricResource = "resource" - ResultItemMetricResourceName = "resource_name" - ResultItemMetricNodeIp = "node_ip" - ResultItemMetricNodeName = "node_name" - ResultItemValue = "value" - ResultItemValues = "values" - ResultSortTypeDesc = "desc" - ResultSortTypeAsc = "asc" -) - -const ( - MetricNameWorkloadCount = "workload_count" - MetricNameNamespacePodCount = "namespace_pod_count" - - MetricNameWorkspaceAllOrganizationCount = "workspace_all_organization_count" - MetricNameWorkspaceAllAccountCount = "workspace_all_account_count" - MetricNameWorkspaceAllProjectCount = "workspace_all_project_count" - MetricNameWorkspaceAllDevopsCount = "workspace_all_devops_project_count" - MetricNameClusterAllProjectCount = "cluster_namespace_count" - - MetricNameWorkspaceNamespaceCount = "workspace_namespace_count" - MetricNameWorkspaceDevopsCount = "workspace_devops_project_count" - MetricNameWorkspaceMemberCount = "workspace_member_count" - MetricNameWorkspaceRoleCount = "workspace_role_count" - MetricNameComponentOnLine = "component_online_count" - MetricNameComponentLine = "component_count" -) - -const ( - WorkspaceResourceKindOrganization = "organization" - WorkspaceResourceKindAccount = "account" - WorkspaceResourceKindNamespace = "namespace" - WorkspaceResourceKindDevops = "devops" - WorkspaceResourceKindMember = "member" - WorkspaceResourceKindRole = "role" -) - -const ( - MetricLevelCluster = "cluster" - MetricLevelClusterWorkspace = "cluster_workspace" - MetricLevelNode = "node" - MetricLevelWorkspace = "workspace" - MetricLevelNamespace = "namespace" - MetricLevelPod = "pod" - MetricLevelPodName = "pod_name" - MetricLevelContainer = "container" - MetricLevelContainerName = "container_name" - MetricLevelPVC = "persistentvolumeclaim" - MetricLevelWorkload = "workload" - MetricLevelComponent = "component" -) - -const ( - ReplicaSet = "ReplicaSet" - StatefulSet = "StatefulSet" - DaemonSet = "DaemonSet" - Deployment = "Deployment" - Any = ".*" -) - -const ( - NodeStatusRule = `kube_node_status_condition{condition="Ready"} > 0` - PodInfoRule = `kube_pod_info{created_by_kind="$1",created_by_name=$2,namespace="$3"}` - NamespaceLabelRule = `kube_namespace_labels` - WorkloadReplicaSetOwnerRule = `kube_pod_owner{namespace="$1", owner_name!="", owner_kind="ReplicaSet"}` - WorkspaceNamespaceLabelRule = `sum(kube_namespace_labels{label_kubesphere_io_workspace != ""}) by (label_kubesphere_io_workspace)` - ExcludedVirtualNetworkInterfaces = `interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)"` -) - -const ( - WorkspaceJoinedKey = "label_kubesphere_io_workspace" -) - -// The metrics need to include extra info out of prometheus -// eg. add node name info to the etcd_server_list metric -const ( - EtcdServerList = "etcd_server_list" -) - -type MetricMap map[string]string - -var ClusterMetricsNames = []string{ - "cluster_cpu_utilisation", - "cluster_cpu_usage", - "cluster_cpu_total", - "cluster_memory_utilisation", - "cluster_memory_available", - "cluster_memory_total", - "cluster_memory_usage_wo_cache", - "cluster_net_utilisation", - "cluster_net_bytes_transmitted", - "cluster_net_bytes_received", - "cluster_disk_read_iops", - "cluster_disk_write_iops", - "cluster_disk_read_throughput", - "cluster_disk_write_throughput", - "cluster_disk_size_usage", - "cluster_disk_size_utilisation", - "cluster_disk_size_capacity", - "cluster_disk_size_available", - "cluster_disk_inode_total", - "cluster_disk_inode_usage", - "cluster_disk_inode_utilisation", - - "cluster_node_online", - "cluster_node_offline", - "cluster_node_total", - - "cluster_pod_count", - "cluster_pod_quota", - "cluster_pod_utilisation", - "cluster_pod_running_count", - "cluster_pod_succeeded_count", - "cluster_pod_abnormal_count", - "cluster_ingresses_extensions_count", - "cluster_cronjob_count", - "cluster_pvc_count", - "cluster_daemonset_count", - "cluster_deployment_count", - "cluster_endpoint_count", - "cluster_hpa_count", - "cluster_job_count", - "cluster_statefulset_count", - "cluster_replicaset_count", - "cluster_service_count", - "cluster_secret_count", - "cluster_ingresses_extensions_count", - "cluster_namespace_count", - - "cluster_load1", - "cluster_load5", - "cluster_load15", - - // New in ks 2.0 - "cluster_pod_abnormal_ratio", - "cluster_node_offline_ratio", -} -var NodeMetricsNames = []string{ - "node_cpu_utilisation", - "node_cpu_total", - "node_cpu_usage", - "node_memory_utilisation", - "node_memory_usage_wo_cache", - "node_memory_available", - "node_memory_total", - - "node_net_utilisation", - "node_net_bytes_transmitted", - "node_net_bytes_received", - "node_disk_read_iops", - "node_disk_write_iops", - "node_disk_read_throughput", - "node_disk_write_throughput", - "node_disk_size_capacity", - "node_disk_size_available", - "node_disk_size_usage", - "node_disk_size_utilisation", - - "node_disk_inode_total", - "node_disk_inode_usage", - "node_disk_inode_utilisation", - - "node_pod_count", - "node_pod_quota", - "node_pod_utilisation", - "node_pod_running_count", - "node_pod_succeeded_count", - "node_pod_abnormal_count", - - "node_load1", - "node_load5", - "node_load15", - - // New in ks 2.0 - "node_pod_abnormal_ratio", -} -var WorkspaceMetricsNames = []string{ - "workspace_cpu_usage", - "workspace_memory_usage", - "workspace_memory_usage_wo_cache", - "workspace_net_bytes_transmitted", - "workspace_net_bytes_received", - "workspace_pod_count", - "workspace_pod_running_count", - "workspace_pod_succeeded_count", - "workspace_pod_abnormal_count", - "workspace_ingresses_extensions_count", - - "workspace_cronjob_count", - "workspace_pvc_count", - "workspace_daemonset_count", - "workspace_deployment_count", - "workspace_endpoint_count", - "workspace_hpa_count", - "workspace_job_count", - "workspace_statefulset_count", - "workspace_replicaset_count", - "workspace_service_count", - "workspace_secret_count", - - "workspace_all_project_count", - - // New in ks 2.0 - "workspace_pod_abnormal_ratio", -} -var NamespaceMetricsNames = []string{ - "namespace_cpu_usage", - "namespace_memory_usage", - "namespace_memory_usage_wo_cache", - "namespace_net_bytes_transmitted", - "namespace_net_bytes_received", - "namespace_pod_count", - "namespace_pod_running_count", - "namespace_pod_succeeded_count", - "namespace_pod_abnormal_count", - - "namespace_configmap_count_used", - "namespace_jobs_batch_count_used", - "namespace_roles_count_used", - "namespace_memory_limit_used", - "namespace_pvc_used", - "namespace_memory_request_used", - "namespace_pvc_count_used", - "namespace_cronjobs_batch_count_used", - "namespace_ingresses_extensions_count_used", - "namespace_cpu_limit_used", - "namespace_storage_request_used", - "namespace_deployment_count_used", - "namespace_pod_count_used", - "namespace_statefulset_count_used", - "namespace_daemonset_count_used", - "namespace_secret_count_used", - "namespace_service_count_used", - "namespace_cpu_request_used", - "namespace_service_loadbalancer_used", - - "namespace_configmap_count_hard", - "namespace_jobs_batch_count_hard", - "namespace_roles_count_hard", - "namespace_memory_limit_hard", - "namespace_pvc_hard", - "namespace_memory_request_hard", - "namespace_pvc_count_hard", - "namespace_cronjobs_batch_count_hard", - "namespace_ingresses_extensions_count_hard", - "namespace_cpu_limit_hard", - "namespace_storage_request_hard", - "namespace_deployment_count_hard", - "namespace_pod_count_hard", - "namespace_statefulset_count_hard", - "namespace_daemonset_count_hard", - "namespace_secret_count_hard", - "namespace_service_count_hard", - "namespace_cpu_request_hard", - "namespace_service_loadbalancer_hard", - - "namespace_cronjob_count", - "namespace_pvc_count", - "namespace_daemonset_count", - "namespace_deployment_count", - "namespace_endpoint_count", - "namespace_hpa_count", - "namespace_job_count", - "namespace_statefulset_count", - "namespace_replicaset_count", - "namespace_service_count", - "namespace_secret_count", - - "namespace_ingresses_extensions_count", - - // New in ks 2.0 - "namespace_pod_abnormal_ratio", - "namespace_resourcequota_used_ratio", -} - -var PodMetricsNames = []string{ - "pod_cpu_usage", - "pod_memory_usage", - "pod_memory_usage_wo_cache", - "pod_net_bytes_transmitted", - "pod_net_bytes_received", -} - -var WorkloadMetricsNames = []string{ - "workload_pod_cpu_usage", - "workload_pod_memory_usage", - "workload_pod_memory_usage_wo_cache", - "workload_pod_net_bytes_transmitted", - "workload_pod_net_bytes_received", - - "workload_deployment_replica", - "workload_deployment_replica_available", - "workload_statefulset_replica", - "workload_statefulset_replica_available", - "workload_daemonset_replica", - "workload_daemonset_replica_available", - - // New in ks 2.0 - "workload_deployment_unavailable_replicas_ratio", - "workload_daemonset_unavailable_replicas_ratio", - "workload_statefulset_unavailable_replicas_ratio", -} - -var ContainerMetricsNames = []string{ - "container_cpu_usage", - "container_memory_usage", - "container_memory_usage_wo_cache", - //"container_net_bytes_transmitted", - //"container_net_bytes_received", -} - -var PVCMetricsNames = []string{ - "pvc_inodes_available", - "pvc_inodes_used", - "pvc_inodes_total", - "pvc_inodes_utilisation", - "pvc_bytes_available", - "pvc_bytes_used", - "pvc_bytes_total", - "pvc_bytes_utilisation", -} - -var ComponentMetricsNames = []string{ - "etcd_server_list", - "etcd_server_total", - "etcd_server_up_total", - "etcd_server_has_leader", - "etcd_server_leader_changes", - "etcd_server_proposals_failed_rate", - "etcd_server_proposals_applied_rate", - "etcd_server_proposals_committed_rate", - "etcd_server_proposals_pending_count", - "etcd_mvcc_db_size", - "etcd_network_client_grpc_received_bytes", - "etcd_network_client_grpc_sent_bytes", - "etcd_grpc_call_rate", - "etcd_grpc_call_failed_rate", - "etcd_grpc_server_msg_received_rate", - "etcd_grpc_server_msg_sent_rate", - "etcd_disk_wal_fsync_duration", - "etcd_disk_wal_fsync_duration_quantile", - "etcd_disk_backend_commit_duration", - "etcd_disk_backend_commit_duration_quantile", - - "apiserver_up_sum", - "apiserver_request_rate", - "apiserver_request_by_verb_rate", - "apiserver_request_latencies", - "apiserver_request_by_verb_latencies", - - "scheduler_up_sum", - "scheduler_schedule_attempts", - "scheduler_schedule_attempt_rate", - "scheduler_e2e_scheduling_latency", - "scheduler_e2e_scheduling_latency_quantile", - - "controller_manager_up_sum", - - "coredns_up_sum", - "coredns_cache_hits", - "coredns_cache_misses", - "coredns_dns_request_rate", - "coredns_dns_request_duration", - "coredns_dns_request_duration_quantile", - "coredns_dns_request_by_type_rate", - "coredns_dns_request_by_rcode_rate", - "coredns_panic_rate", - "coredns_proxy_request_rate", - "coredns_proxy_request_duration", - "coredns_proxy_request_duration_quantile", - - "prometheus_up_sum", - "prometheus_tsdb_head_samples_appended_rate", -} - -var RulePromQLTmplMap = MetricMap{ - //cluster - "cluster_cpu_utilisation": ":node_cpu_utilisation:avg1m", - "cluster_cpu_usage": `round(:node_cpu_utilisation:avg1m * sum(node:node_num_cpu:sum), 0.001)`, - "cluster_cpu_total": "sum(node:node_num_cpu:sum)", - "cluster_memory_utilisation": ":node_memory_utilisation:", - "cluster_memory_available": "sum(node:node_memory_bytes_available:sum)", - "cluster_memory_total": "sum(node:node_memory_bytes_total:sum)", - "cluster_memory_usage_wo_cache": "sum(node:node_memory_bytes_total:sum) - sum(node:node_memory_bytes_available:sum)", - - "cluster_net_utilisation": ":node_net_utilisation:sum_irate", - "cluster_net_bytes_transmitted": "sum(node:node_net_bytes_transmitted:sum_irate)", - "cluster_net_bytes_received": "sum(node:node_net_bytes_received:sum_irate)", - "cluster_disk_read_iops": "sum(node:data_volume_iops_reads:sum)", - "cluster_disk_write_iops": "sum(node:data_volume_iops_writes:sum)", - "cluster_disk_read_throughput": "sum(node:data_volume_throughput_bytes_read:sum)", - "cluster_disk_write_throughput": "sum(node:data_volume_throughput_bytes_written:sum)", - - "cluster_disk_size_usage": `sum(max(node_filesystem_size_bytes{device=~"/dev/.*", device!~"/dev/loop\\d+", job="node-exporter"} - node_filesystem_avail_bytes{device=~"/dev/.*", device!~"/dev/loop\\d+", job="node-exporter"}) by (device, instance))`, - "cluster_disk_size_utilisation": `cluster:disk_utilization:ratio`, - "cluster_disk_size_capacity": `sum(max(node_filesystem_size_bytes{device=~"/dev/.*", device!~"/dev/loop\\d+", job="node-exporter"}) by (device, instance))`, - "cluster_disk_size_available": `sum(max(node_filesystem_avail_bytes{device=~"/dev/.*", device!~"/dev/loop\\d+", job="node-exporter"}) by (device, instance))`, - - "cluster_disk_inode_total": `sum(node:node_inodes_total:)`, - "cluster_disk_inode_usage": `sum(node:node_inodes_total:) - sum(node:node_inodes_free:)`, - "cluster_disk_inode_utilisation": `cluster:disk_inode_utilization:ratio`, - - "cluster_namespace_count": `count(kube_namespace_annotations)`, - - // cluster_pod_count = cluster_pod_running_count + cluster_pod_succeeded_count + cluster_pod_abnormal_count - "cluster_pod_count": `cluster:pod:sum`, - "cluster_pod_quota": `sum(max(kube_node_status_capacity_pods) by (node) unless on (node) (kube_node_status_condition{condition="Ready",status=~"unknown|false"} > 0))`, - "cluster_pod_utilisation": `cluster:pod_utilization:ratio`, - "cluster_pod_running_count": `cluster:pod_running:count`, - "cluster_pod_succeeded_count": `count(kube_pod_info unless on (pod) (kube_pod_status_phase{phase=~"Failed|Pending|Unknown|Running"} > 0) unless on (node) (kube_node_status_condition{condition="Ready",status=~"unknown|false"} > 0))`, - "cluster_pod_abnormal_count": `cluster:pod_abnormal:sum`, - - "cluster_node_online": `sum(kube_node_status_condition{condition="Ready",status="true"})`, - "cluster_node_offline": `cluster:node_offline:sum`, - "cluster_node_total": `sum(kube_node_status_condition{condition="Ready"})`, - - "cluster_configmap_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", resource="count/configmaps"}) by (resource, type)`, - "cluster_jobs_batch_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", resource="count/jobs.batch"}) by (resource, type)`, - "cluster_roles_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", resource="count/roles.rbac.authorization.k8s.io"}) by (resource, type)`, - "cluster_memory_limit_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", resource="limits.memory"}) by (resource, type)`, - "cluster_pvc_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", resource="persistentvolumeclaims"}) by (resource, type)`, - "cluster_memory_request_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", resource="requests.memory"}) by (resource, type)`, - "cluster_pvc_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", resource="count/persistentvolumeclaims"}) by (resource, type)`, - "cluster_cronjobs_batch_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", resource="count/cronjobs.batch"}) by (resource, type)`, - "cluster_ingresses_extensions_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", resource="count/ingresses.extensions"}) by (resource, type)`, - "cluster_cpu_limit_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", resource="limits.cpu"}) by (resource, type)`, - "cluster_storage_request_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", resource="requests.storage"}) by (resource, type)`, - "cluster_deployment_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", resource="count/deployments.apps"}) by (resource, type)`, - "cluster_pod_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", resource="count/pods"}) by (resource, type)`, - "cluster_statefulset_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", resource="count/statefulsets.apps"}) by (resource, type)`, - "cluster_daemonset_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", resource="count/daemonsets.apps"}) by (resource, type)`, - "cluster_secret_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", resource="count/secrets"}) by (resource, type)`, - "cluster_service_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", resource="count/services"}) by (resource, type)`, - "cluster_cpu_request_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", resource="requests.cpu"}) by (resource, type)`, - "cluster_service_loadbalancer_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", resource="services.loadbalancers"}) by (resource, type)`, - - "cluster_cronjob_count": `sum(kube_cronjob_labels)`, - "cluster_pvc_count": `sum(kube_persistentvolumeclaim_info)`, - "cluster_daemonset_count": `sum(kube_daemonset_labels)`, - "cluster_deployment_count": `sum(kube_deployment_labels)`, - "cluster_endpoint_count": `sum(kube_endpoint_labels)`, - "cluster_hpa_count": `sum(kube_hpa_labels)`, - "cluster_job_count": `sum(kube_job_labels)`, - "cluster_statefulset_count": `sum(kube_statefulset_labels)`, - "cluster_replicaset_count": `count(kube_replicaset_created)`, - "cluster_service_count": `sum(kube_service_info)`, - "cluster_secret_count": `sum(kube_secret_info)`, - "cluster_pv_count": `sum(kube_persistentvolume_labels)`, - "cluster_ingresses_extensions_count": `sum(kube_ingress_labels)`, - - "cluster_load1": `sum(node_load1{job="node-exporter"}) / sum(node:node_num_cpu:sum)`, - "cluster_load5": `sum(node_load5{job="node-exporter"}) / sum(node:node_num_cpu:sum)`, - "cluster_load15": `sum(node_load15{job="node-exporter"}) / sum(node:node_num_cpu:sum)`, - - // cluster: New added in ks 2.0 - "cluster_pod_abnormal_ratio": `cluster:pod_abnormal:ratio`, - "cluster_node_offline_ratio": `cluster:node_offline:ratio`, - - //node - "node_cpu_utilisation": "node:node_cpu_utilisation:avg1m", - "node_cpu_total": "node:node_num_cpu:sum", - "node_memory_utilisation": "node:node_memory_utilisation:", - "node_memory_available": "node:node_memory_bytes_available:sum", - "node_memory_total": "node:node_memory_bytes_total:sum", - "node_memory_usage_wo_cache": "node:node_memory_bytes_total:sum$1 - node:node_memory_bytes_available:sum$1", - - "node_net_utilisation": "node:node_net_utilisation:sum_irate", - "node_net_bytes_transmitted": "node:node_net_bytes_transmitted:sum_irate", - "node_net_bytes_received": "node:node_net_bytes_received:sum_irate", - "node_disk_read_iops": "node:data_volume_iops_reads:sum", - "node_disk_write_iops": "node:data_volume_iops_writes:sum", - "node_disk_read_throughput": "node:data_volume_throughput_bytes_read:sum", - "node_disk_write_throughput": "node:data_volume_throughput_bytes_written:sum", - - "node_disk_size_capacity": `sum(max(node_filesystem_size_bytes{device=~"/dev/.*", device!~"/dev/loop\\d+", job="node-exporter"} * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1) by (device, node)) by (node)`, - "node_disk_size_available": `node:disk_space_available:$1`, - "node_disk_size_usage": `sum(max((node_filesystem_size_bytes{device=~"/dev/.*", device!~"/dev/loop\\d+", job="node-exporter"} - node_filesystem_avail_bytes{device=~"/dev/.*", device!~"/dev/loop\\d+", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1) by (device, node)) by (node)`, - "node_disk_size_utilisation": `node:disk_space_utilization:ratio$1`, - - "node_disk_inode_total": `node:node_inodes_total:$1`, - "node_disk_inode_usage": `node:node_inodes_total:$1 - node:node_inodes_free:$1`, - "node_disk_inode_utilisation": `node:disk_inode_utilization:ratio$1`, - - "node_pod_count": `node:pod_count:sum$1`, - "node_pod_quota": `max(kube_node_status_capacity_pods$1) by (node) unless on (node) (kube_node_status_condition{condition="Ready",status=~"unknown|false"} > 0)`, - "node_pod_utilisation": `node:pod_utilization:ratio$1`, - "node_pod_running_count": `node:pod_running:count$1`, - "node_pod_succeeded_count": `node:pod_succeeded:count$1`, - "node_pod_abnormal_count": `node:pod_abnormal:count$1`, - - // without log node: unless on(node) kube_node_labels{label_role="log"} - "node_cpu_usage": `round(node:node_cpu_utilisation:avg1m$1 * node:node_num_cpu:sum$1, 0.001)`, - - "node_load1": `node:load1:ratio$1`, - "node_load5": `node:load5:ratio$1`, - "node_load15": `node:load15:ratio$1`, - - // New in ks 2.0 - "node_pod_abnormal_ratio": `node:pod_abnormal:ratio$1`, - - //namespace - "namespace_cpu_usage": `round(namespace:container_cpu_usage_seconds_total:sum_rate{namespace!="", namespace=~"$1"}, 0.001)`, - "namespace_memory_usage": `namespace:container_memory_usage_bytes:sum{namespace!="", namespace=~"$1"}`, - "namespace_memory_usage_wo_cache": `namespace:container_memory_usage_bytes_wo_cache:sum{namespace!="", namespace=~"$1"}`, - "namespace_net_bytes_transmitted": `sum by (namespace) (irate(container_network_transmit_bytes_total{namespace!="", namespace=~"$1", pod_name!="", ` + ExcludedVirtualNetworkInterfaces + `, job="kubelet"}[5m]))* on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_net_bytes_received": `sum by (namespace) (irate(container_network_receive_bytes_total{namespace!="", namespace=~"$1", pod_name!="", ` + ExcludedVirtualNetworkInterfaces + `, job="kubelet"}[5m])) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_pod_count": `sum(kube_pod_status_phase{phase!~"Failed|Succeeded", namespace!="", namespace=~"$1"}) by (namespace) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_pod_running_count": `sum(kube_pod_status_phase{phase="Running", namespace!="", namespace=~"$1"}) by (namespace) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_pod_succeeded_count": `sum(kube_pod_status_phase{phase="Succeeded", namespace!="", namespace=~"$1"}) by (namespace) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_pod_abnormal_count": `namespace:pod_abnormal:count{namespace!="", namespace=~"$1"}`, - - "namespace_roles_count_used": `max(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="count/roles.rbac.authorization.k8s.io"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_pvc_used": `max(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="persistentvolumeclaims"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_storage_request_used": `max(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="requests.storage"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_service_loadbalancer_used": `max(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="services.loadbalancers"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - // workarounds to calculate resource quota usage - "namespace_deployment_count_used": `count(kube_deployment_created{namespace="$1"}) by (namespace) * on(namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels`, - "namespace_statefulset_count_used": `count(kube_statefulset_created{namespace="$1"}) by (namespace) * on(namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels`, - "namespace_daemonset_count_used": `count(kube_daemonset_created{namespace="$1"}) by (namespace) * on(namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels`, - "namespace_jobs_batch_count_used": `count(kube_job_info{namespace="$1"}) by (namespace) * on(namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels`, - "namespace_cronjobs_batch_count_used": `count(kube_cronjob_created{namespace="$1"}) by (namespace) * on(namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels`, - "namespace_pod_count_used": `sum(kube_pod_status_phase{phase!~"Failed|Succeeded", namespace!="", namespace=~"$1"}) by (namespace) * on (namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels`, - "namespace_service_count_used": `count(kube_service_created{namespace="$1"}) by (namespace) * on(namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels`, - "namespace_ingresses_extensions_count_used": `count(kube_ingress_created{namespace="$1"}) by (namespace) * on(namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels`, - "namespace_pvc_count_used": `count(kube_persistentvolumeclaim_info{namespace="$1"}) by (namespace) * on(namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels`, - "namespace_secret_count_used": `count(kube_secret_created{namespace="$1"}) by (namespace) * on(namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels`, - "namespace_configmap_count_used": `count(kube_configmap_created{namespace="$1"}) by (namespace) * on(namespace) group_left(label_kubesphere_io_workspace) kube_namespace_labels`, - "namespace_cpu_limit_used": `round(namespace:container_cpu_usage_seconds_total:sum_rate{namespace!="", namespace="$1"}, 0.001)`, - "namespace_cpu_request_used": `round(namespace:container_cpu_usage_seconds_total:sum_rate{namespace!="", namespace="$1"}, 0.001)`, - "namespace_memory_limit_used": `namespace:container_memory_usage_bytes_wo_cache:sum{namespace!="", namespace=~"$1"}`, - "namespace_memory_request_used": `namespace:container_memory_usage_bytes_wo_cache:sum{namespace!="", namespace=~"$1"}`, - - "namespace_configmap_count_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/configmaps"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_jobs_batch_count_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/jobs.batch"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_roles_count_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/roles.rbac.authorization.k8s.io"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_memory_limit_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="limits.memory"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_pvc_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="persistentvolumeclaims"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_memory_request_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="requests.memory"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_pvc_count_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/persistentvolumeclaims"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_cronjobs_batch_count_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/cronjobs.batch"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_ingresses_extensions_count_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/ingresses.extensions"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_cpu_limit_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="limits.cpu"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_storage_request_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="requests.storage"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_deployment_count_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/deployments.apps"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_pod_count_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/pods"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_statefulset_count_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/statefulsets.apps"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_daemonset_count_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/daemonsets.apps"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_secret_count_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/secrets"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_service_count_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/services"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_cpu_request_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="requests.cpu"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_service_loadbalancer_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="services.loadbalancers"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - - "namespace_cronjob_count": `sum(kube_cronjob_labels{namespace!="", namespace=~"$1"}) by (namespace) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_pvc_count": `sum(kube_persistentvolumeclaim_info{namespace!="", namespace=~"$1"}) by (namespace) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_daemonset_count": `sum(kube_daemonset_labels{namespace!="", namespace=~"$1"}) by (namespace) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_deployment_count": `sum(kube_deployment_labels{namespace!="", namespace=~"$1"}) by (namespace) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_endpoint_count": `sum(kube_endpoint_labels{namespace!="", namespace=~"$1"}) by (namespace) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_hpa_count": `sum(kube_hpa_labels{namespace!="", namespace=~"$1"}) by (namespace) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_job_count": `sum(kube_job_labels{namespace!="", namespace=~"$1"}) by (namespace) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_statefulset_count": `sum(kube_statefulset_labels{namespace!="", namespace=~"$1"}) by (namespace) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_replicaset_count": `count(kube_replicaset_created{namespace!="", namespace=~"$1"}) by (namespace) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_service_count": `sum(kube_service_info{namespace!="", namespace=~"$1"}) by (namespace) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_secret_count": `sum(kube_secret_info{namespace!="", namespace=~"$1"}) by (namespace) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - - "namespace_ingresses_extensions_count": `sum(kube_ingress_labels{namespace!="", namespace=~"$1"}) by (namespace) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - - // New in ks 2.0 - "namespace_pod_abnormal_ratio": `namespace:pod_abnormal:ratio{namespace!="", namespace=~"$1"}`, - "namespace_resourcequota_used_ratio": `namespace:resourcequota_used:ratio{namespace!="", namespace=~"$1"}`, - - // pod - "pod_cpu_usage": `round(sum(irate(container_cpu_usage_seconds_total{job="kubelet", namespace="$1", pod_name!="", pod_name="$2", image!=""}[5m])) by (namespace, pod_name), 0.001)`, - "pod_memory_usage": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name!="", pod_name="$2", image!=""}) by (namespace, pod_name)`, - "pod_memory_usage_wo_cache": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name!="", pod_name="$2", image!=""} - container_memory_cache{job="kubelet", namespace="$1", pod_name!="", pod_name="$2",image!=""}) by (namespace, pod_name)`, - "pod_net_bytes_transmitted": `sum by (namespace, pod_name) (irate(container_network_transmit_bytes_total{namespace="$1", pod_name!="", pod_name="$2", ` + ExcludedVirtualNetworkInterfaces + `, job="kubelet"}[5m]))`, - "pod_net_bytes_received": `sum by (namespace, pod_name) (irate(container_network_receive_bytes_total{namespace="$1", pod_name!="", pod_name="$2", ` + ExcludedVirtualNetworkInterfaces + `, job="kubelet"}[5m]))`, - - "pod_cpu_usage_all": `round(sum(irate(container_cpu_usage_seconds_total{job="kubelet", namespace="$1", pod_name!="", pod_name=~"$2", image!=""}[5m])) by (namespace, pod_name), 0.001)`, - "pod_memory_usage_all": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name!="", pod_name=~"$2", image!=""}) by (namespace, pod_name)`, - "pod_memory_usage_wo_cache_all": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name!="", pod_name=~"$2", image!=""} - container_memory_cache{job="kubelet", namespace="$1", pod_name!="", pod_name=~"$2", image!=""}) by (namespace, pod_name)`, - "pod_net_bytes_transmitted_all": `sum by (namespace, pod_name) (irate(container_network_transmit_bytes_total{namespace="$1", pod_name!="", pod_name=~"$2", ` + ExcludedVirtualNetworkInterfaces + `, job="kubelet"}[5m]))`, - "pod_net_bytes_received_all": `sum by (namespace, pod_name) (irate(container_network_receive_bytes_total{namespace="$1", pod_name!="", pod_name=~"$2", ` + ExcludedVirtualNetworkInterfaces + `, job="kubelet"}[5m]))`, - - "pod_cpu_usage_node": `round(sum by (node, pod_name) (irate(container_cpu_usage_seconds_total{job="kubelet",pod_name!="", pod_name=~"$2", image!=""}[5m]) * on (namespace, pod_name) group_left(node) label_join(node_namespace_pod:kube_pod_info:{node="$3"}, "pod_name", "", "pod", "_name")), 0.001)`, - "pod_memory_usage_node": `sum by (node, pod_name) (container_memory_usage_bytes{job="kubelet",pod_name!="", pod_name=~"$2", image!=""} * on (namespace, pod_name) group_left(node) label_join(node_namespace_pod:kube_pod_info:{node="$3"}, "pod_name", "", "pod", "_name"))`, - "pod_memory_usage_wo_cache_node": `sum by (node, pod_name) ((container_memory_usage_bytes{job="kubelet",pod_name!="", pod_name=~"$2", image!=""} - container_memory_cache{job="kubelet",pod_name!="", pod_name=~"$2", image!=""}) * on (namespace, pod_name) group_left(node) label_join(node_namespace_pod:kube_pod_info:{node="$3"}, "pod_name", "", "pod", "_name"))`, - "pod_net_bytes_transmitted_node": `sum by (node, pod_name) (irate(container_network_transmit_bytes_total{pod_name!="", pod_name=~"$2", ` + ExcludedVirtualNetworkInterfaces + `, job="kubelet"}[5m]) * on (pod_name) group_left(node) label_join(node_namespace_pod:kube_pod_info:{node="$3"}, "pod_name", "", "pod", "_name"))`, - "pod_net_bytes_received_node": `sum by (node, pod_name) (irate(container_network_receive_bytes_total{pod_name!="", pod_name=~"$2", ` + ExcludedVirtualNetworkInterfaces + `, job="kubelet"}[5m]) * on (pod_name) group_left(node) label_join(node_namespace_pod:kube_pod_info:{node="$3"}, "pod_name", "", "pod", "_name"))`, - - // workload - // Join the "container_cpu_usage_seconds_total" metric with "kube_pod_owner" to calculate workload-level resource usage - // - // Note the name convention: - // For hardware resource metrics, combine pod metric name with `workload_` - // For k8s resource metrics, must specify the workload type in metric names - "workload_pod_cpu_usage": `round(namespace:workload_cpu_usage:sum{namespace="$2", workload=~"$3"}, 0.001)`, - "workload_pod_memory_usage": `namespace:workload_memory_usage:sum{namespace="$2", workload=~"$3"}`, - "workload_pod_memory_usage_wo_cache": `namespace:workload_memory_usage_wo_cache:sum{namespace="$2", workload=~"$3"}`, - "workload_pod_net_bytes_transmitted": `namespace:workload_net_bytes_transmitted:sum_irate{namespace="$2", workload=~"$3"}`, - "workload_pod_net_bytes_received": `namespace:workload_net_bytes_received:sum_irate{namespace="$2", workload=~"$3"}`, - - "workload_deployment_replica": `label_join(sum (label_join(label_replace(kube_deployment_spec_replicas{namespace="$2", deployment=~"$3"}, "owner_kind", "Deployment", "", ""), "workload", "", "deployment")) by (namespace, owner_kind, workload), "workload", ":", "owner_kind", "workload")`, - "workload_deployment_replica_available": `label_join(sum (label_join(label_replace(kube_deployment_status_replicas_available{namespace="$2", deployment=~"$3"}, "owner_kind", "Deployment", "", ""), "workload", "", "deployment")) by (namespace, owner_kind, workload), "workload", ":", "owner_kind", "workload")`, - "workload_statefulset_replica": `label_join(sum (label_join(label_replace(kube_statefulset_replicas{namespace="$2", statefulset=~"$3"}, "owner_kind", "StatefulSet", "", ""), "workload", "", "statefulset")) by (namespace, owner_kind, workload), "workload", ":", "owner_kind", "workload")`, - "workload_statefulset_replica_available": `label_join(sum (label_join(label_replace(kube_statefulset_status_replicas_current{namespace="$2", statefulset=~"$3"}, "owner_kind", "StatefulSet", "", ""), "workload", "", "statefulset")) by (namespace, owner_kind, workload), "workload", ":", "owner_kind", "workload")`, - "workload_daemonset_replica": `label_join(sum (label_join(label_replace(kube_daemonset_status_desired_number_scheduled{namespace="$2", daemonset=~"$3"}, "owner_kind", "DaemonSet", "", ""), "workload", "", "daemonset")) by (namespace, owner_kind, workload), "workload", ":", "owner_kind", "workload")`, - "workload_daemonset_replica_available": `label_join(sum (label_join(label_replace(kube_daemonset_status_number_available{namespace="$2", daemonset=~"$3"}, "owner_kind", "DaemonSet", "", ""), "workload", "", "daemonset")) by (namespace, owner_kind, workload), "workload", ":", "owner_kind", "workload")`, - - // New in ks 2.0 - "workload_deployment_unavailable_replicas_ratio": `namespace:deployment_unavailable_replicas:ratio{namespace="$2", deployment=~"$3"}`, - "workload_daemonset_unavailable_replicas_ratio": `namespace:daemonset_unavailable_replicas:ratio{namespace="$2", daemonset=~"$3"}`, - "workload_statefulset_unavailable_replicas_ratio": `namespace:statefulset_unavailable_replicas:ratio{namespace="$2", statefulset=~"$3"}`, - - // container - "container_cpu_usage": `round(sum(irate(container_cpu_usage_seconds_total{namespace="$1", pod_name="$2", container_name!="POD", container_name=~"$3"}[5m])) by (namespace, pod_name, container_name), 0.001)`, - "container_memory_usage": `sum(container_memory_usage_bytes{namespace="$1", pod_name="$2", container_name!="POD", container_name=~"$3"}) by (namespace, pod_name, container_name)`, - "container_memory_usage_wo_cache": `container_memory_usage_bytes{namespace="$1", pod_name="$2", container_name!="POD", container_name=~"$3"} - ignoring(id, image, endpoint, instance, job, name, service) container_memory_cache{namespace="$1", pod_name="$2", container_name!="POD", container_name=~"$3"}`, - "container_net_bytes_transmitted": `sum(irate(container_network_transmit_bytes_total{job="kubelet", namespace="$1", pod_name="$2", container_name="POD", ` + ExcludedVirtualNetworkInterfaces + `}[5m])) by (namespace, pod_name, container_name)`, - "container_net_bytes_received": `sum(irate(container_network_receive_bytes_total{job="kubelet", namespace="$1", pod_name="$2", container_name="POD", ` + ExcludedVirtualNetworkInterfaces + `}[5m])) by (namespace, pod_name, container_name)`, - - "container_cpu_usage_node": `round(sum by (node, pod_name, container_name) (irate(container_cpu_usage_seconds_total{job="kubelet", pod_name="$2", container_name!="POD", container_name!="", container_name=~"$3", image!=""}[5m]) * on (pod_name) group_left(node) label_join(node_namespace_pod:kube_pod_info:{node="$1"}, "pod_name", "", "pod", "_name")), 0.001)`, - "container_memory_usage_node": `sum by (node, pod_name, container_name) (container_memory_usage_bytes{job="kubelet", pod_name="$2", container_name!="POD", container_name!="", container_name=~"$3", image!=""} * on (pod_name) group_left(node) label_join(node_namespace_pod:kube_pod_info:{node="$1"}, "pod_name", "", "pod", "_name"))`, - "container_memory_usage_wo_cache_node": `sum by (node, pod_name, container_name) ((container_memory_usage_bytes{job="kubelet", pod_name="$2", container_name!="POD", container_name!="", container_name=~"$3", image!=""} - container_memory_cache{job="kubelet", pod_name="$2", container_name!="POD", container_name!="", container_name=~"$3", image!=""}) * on (pod_name) group_left(node) label_join(node_namespace_pod:kube_pod_info:{node="$1"}, "pod_name", "", "pod", "_name"))`, - "container_net_bytes_transmitted_node": `sum by (node, pod_name, container_name) (irate(container_network_transmit_bytes_total{job="kubelet", ` + ExcludedVirtualNetworkInterfaces + `, pod_name="$2", container_name="POD", container_name!="", image!=""}[5m]) * on (pod_name) group_left(node) label_join(node_namespace_pod:kube_pod_info:{node="$1"}, "pod_name", "", "pod", "_name"))`, - "container_net_bytes_received_node": `sum by (node, pod_name, container_name) (irate(container_network_receive_bytes_total{job="kubelet", ` + ExcludedVirtualNetworkInterfaces + `, pod_name="$2", container_name="POD", container_name!="", image!=""}[5m]) * on (pod_name) group_left(node) label_join(node_namespace_pod:kube_pod_info:{node="$1"}, "pod_name", "", "pod", "_name"))`, - - // workspace - "workspace_cpu_usage": `round(sum(namespace:container_cpu_usage_seconds_total:sum_rate{namespace!="", namespace$1, label_kubesphere_io_workspace$2}), 0.001)`, - "workspace_memory_usage": `sum(namespace:container_memory_usage_bytes:sum{namespace!="", namespace$1, label_kubesphere_io_workspace$2})`, - "workspace_memory_usage_wo_cache": `sum(namespace:container_memory_usage_bytes_wo_cache:sum{namespace!="", namespace$1, label_kubesphere_io_workspace$2})`, - "workspace_net_bytes_transmitted": `sum(sum by (namespace) (irate(container_network_transmit_bytes_total{namespace!="", namespace$1, pod_name!="", ` + ExcludedVirtualNetworkInterfaces + `, job="kubelet"}[5m])))`, - "workspace_net_bytes_received": `sum(sum by (namespace) (irate(container_network_receive_bytes_total{namespace!="", namespace$1, pod_name!="", ` + ExcludedVirtualNetworkInterfaces + `, job="kubelet"}[5m])))`, - "workspace_pod_count": `sum(kube_pod_status_phase{phase!~"Failed|Succeeded", namespace!="", namespace$1} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2}))`, - "workspace_pod_running_count": `sum(kube_pod_status_phase{phase="Running", namespace!="", namespace$1} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2}))`, - "workspace_pod_succeeded_count": `sum(kube_pod_status_phase{phase="Succeeded", namespace!="", namespace$1} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2}))`, - "workspace_pod_abnormal_count": `count((kube_pod_info{node!="", namespace$1} unless on (pod, namespace) (kube_pod_status_phase{job="kube-state-metrics", phase="Succeeded"}>0) unless on (pod, namespace) ((kube_pod_status_ready{job="kube-state-metrics", condition="true"}>0) and on (pod, namespace) (kube_pod_status_phase{job="kube-state-metrics", phase="Running"}>0)) unless on (pod, namespace) (kube_pod_container_status_waiting_reason{job="kube-state-metrics", reason="ContainerCreating"}>0)) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2}))`, - - "workspace_configmap_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace$1, resource="count/configmaps"} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2})) by (resource, type)`, - "workspace_jobs_batch_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace$1, resource="count/jobs.batch"} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2})) by (resource, type)`, - "workspace_roles_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace$1, resource="count/roles.rbac.authorization.k8s.io"} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2})) by (resource, type)`, - "workspace_memory_limit_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace$1, resource="limits.memory"} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2})) by (resource, type)`, - "workspace_pvc_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace$1, resource="persistentvolumeclaims"} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2})) by (resource, type)`, - "workspace_memory_request_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace$1, resource="requests.memory"} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2})) by (resource, type)`, - "workspace_pvc_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace$1, resource="count/persistentvolumeclaims"} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2})) by (resource, type)`, - "workspace_cronjobs_batch_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace$1, resource="count/cronjobs.batch"} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2})) by (resource, type)`, - "workspace_ingresses_extensions_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace$1, resource="count/ingresses.extensions"} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2})) by (resource, type)`, - "workspace_cpu_limit_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace$1, resource="limits.cpu"} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2})) by (resource, type)`, - "workspace_storage_request_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace$1, resource="requests.storage"} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2})) by (resource, type)`, - "workspace_deployment_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace$1, resource="count/deployments.apps"} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2})) by (resource, type)`, - "workspace_pod_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace$1, resource="count/pods"} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2})) by (resource, type)`, - "workspace_statefulset_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace$1, resource="count/statefulsets.apps"} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2})) by (resource, type)`, - "workspace_daemonset_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace$1, resource="count/daemonsets.apps"} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2})) by (resource, type)`, - "workspace_secret_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace$1, resource="count/secrets"} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2})) by (resource, type)`, - "workspace_service_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace$1, resource="count/services"} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2})) by (resource, type)`, - "workspace_cpu_request_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace$1, resource="requests.cpu"} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2})) by (resource, type)`, - "workspace_service_loadbalancer_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace$1, resource="services.loadbalancers"} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2})) by (resource, type)`, - - "workspace_ingresses_extensions_count": `sum(kube_ingress_labels{namespace!="", namespace$1} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2}))`, - - "workspace_cronjob_count": `sum(kube_cronjob_labels{namespace!="", namespace$1} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2}))`, - "workspace_pvc_count": `sum(kube_persistentvolumeclaim_info{namespace!="", namespace$1} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2}))`, - "workspace_daemonset_count": `sum(kube_daemonset_labels{namespace!="", namespace$1} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2}))`, - "workspace_deployment_count": `sum(kube_deployment_labels{namespace!="", namespace$1} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2}))`, - "workspace_endpoint_count": `sum(kube_endpoint_labels{namespace!="", namespace$1} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2}))`, - "workspace_hpa_count": `sum(kube_hpa_labels{namespace!="", namespace$1} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2}))`, - "workspace_job_count": `sum(kube_job_labels{namespace!="", namespace$1} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2}))`, - "workspace_statefulset_count": `sum(kube_statefulset_labels{namespace!="", namespace$1} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2}))`, - "workspace_replicaset_count": `count(kube_replicaset_created{namespace!="", namespace$1} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2}))`, - "workspace_service_count": `sum(kube_service_info{namespace!="", namespace$1} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2}))`, - "workspace_secret_count": `sum(kube_secret_info{namespace!="", namespace$1} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2}))`, - - "workspace_all_project_count": `count(kube_namespace_annotations)`, - - // New in ks 2.0 - "workspace_pod_abnormal_ratio": `count((kube_pod_info{node!="", namespace$1} unless on (pod, namespace) (kube_pod_status_phase{job="kube-state-metrics", phase="Succeeded"}>0) unless on (pod, namespace) ((kube_pod_status_ready{job="kube-state-metrics", condition="true"}>0) and on (pod, namespace) (kube_pod_status_phase{job="kube-state-metrics", phase="Running"}>0)) unless on (pod, namespace) (kube_pod_container_status_waiting_reason{job="kube-state-metrics", reason="ContainerCreating"}>0)) / sum(kube_pod_status_phase{phase!~"Succeeded", namespace!="", namespace$1}) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels{label_kubesphere_io_workspace$2}))`, - - // PVC - "pvc_inodes_available": `max (kubelet_volume_stats_inodes_free{namespace="$1",persistentvolumeclaim="$2"})by(namespace, persistentvolumeclaim)*on(namespace, persistentvolumeclaim)group_left(storageclass)kube_persistentvolumeclaim_info`, - "pvc_inodes_used": `max (kubelet_volume_stats_inodes_used{namespace="$1", persistentvolumeclaim="$2"})by(namespace, persistentvolumeclaim)*on(namespace, persistentvolumeclaim)group_left(storageclass)kube_persistentvolumeclaim_info`, - "pvc_inodes_total": `max (kubelet_volume_stats_inodes{namespace="$1", persistentvolumeclaim="$2"})by(namespace, persistentvolumeclaim)*on(namespace, persistentvolumeclaim)group_left(storageclass)kube_persistentvolumeclaim_info`, - "pvc_inodes_utilisation": `max (kubelet_volume_stats_inodes_used{namespace="$1", persistentvolumeclaim="$2"}/kubelet_volume_stats_inodes{namespace="$1", persistentvolumeclaim="$2"})by(namespace, persistentvolumeclaim)*on(namespace, persistentvolumeclaim)group_left(storageclass)kube_persistentvolumeclaim_info`, - "pvc_bytes_available": `max (kubelet_volume_stats_available_bytes{namespace="$1", persistentvolumeclaim="$2"})by(namespace, persistentvolumeclaim)*on(namespace, persistentvolumeclaim)group_left(storageclass)kube_persistentvolumeclaim_info`, - "pvc_bytes_used": `max (kubelet_volume_stats_used_bytes{namespace="$1", persistentvolumeclaim="$2"})by(namespace, persistentvolumeclaim)*on(namespace, persistentvolumeclaim)group_left(storageclass)kube_persistentvolumeclaim_info`, - "pvc_bytes_total": `max (kubelet_volume_stats_capacity_bytes{namespace="$1", persistentvolumeclaim="$2"})by(namespace, persistentvolumeclaim)*on(namespace, persistentvolumeclaim)group_left(storageclass)kube_persistentvolumeclaim_info`, - "pvc_bytes_utilisation": `max (kubelet_volume_stats_used_bytes{namespace="$1", persistentvolumeclaim="$2"}/kubelet_volume_stats_capacity_bytes{namespace="$1", persistentvolumeclaim="$2"})by(namespace, persistentvolumeclaim)*on(namespace, persistentvolumeclaim)group_left(storageclass)kube_persistentvolumeclaim_info`, - "pvc_inodes_available_ns": `max (kubelet_volume_stats_inodes_free{namespace="$1",persistentvolumeclaim=~"$2"})by(namespace, persistentvolumeclaim)*on(namespace, persistentvolumeclaim)group_left(storageclass)kube_persistentvolumeclaim_info`, - "pvc_inodes_used_ns": `max (kubelet_volume_stats_inodes_used{namespace="$1",persistentvolumeclaim=~"$2"})by(namespace, persistentvolumeclaim)*on(namespace, persistentvolumeclaim)group_left(storageclass)kube_persistentvolumeclaim_info`, - "pvc_inodes_total_ns": `max (kubelet_volume_stats_inodes{namespace="$1",persistentvolumeclaim=~"$2"})by(namespace, persistentvolumeclaim)*on(namespace, persistentvolumeclaim)group_left(storageclass)kube_persistentvolumeclaim_info`, - "pvc_inodes_utilisation_ns": `max (kubelet_volume_stats_inodes_used{namespace="$1", persistentvolumeclaim=~"$2"}/kubelet_volume_stats_inodes{namespace="$1", persistentvolumeclaim=~"$2"})by(namespace, persistentvolumeclaim)*on(namespace, persistentvolumeclaim)group_left(storageclass)kube_persistentvolumeclaim_info`, - "pvc_bytes_available_ns": `max (kubelet_volume_stats_available_bytes{namespace="$1",persistentvolumeclaim=~"$2"})by(namespace, persistentvolumeclaim)*on(namespace, persistentvolumeclaim)group_left(storageclass)kube_persistentvolumeclaim_info`, - "pvc_bytes_used_ns": `max (kubelet_volume_stats_used_bytes{namespace="$1",persistentvolumeclaim=~"$2"})by(namespace, persistentvolumeclaim)*on(namespace, persistentvolumeclaim)group_left(storageclass)kube_persistentvolumeclaim_info`, - "pvc_bytes_total_ns": `max (kubelet_volume_stats_capacity_bytes{namespace="$1",persistentvolumeclaim=~"$2"})by(namespace, persistentvolumeclaim)*on(namespace, persistentvolumeclaim)group_left(storageclass)kube_persistentvolumeclaim_info`, - "pvc_bytes_utilisation_ns": `max (kubelet_volume_stats_used_bytes{namespace="$1", persistentvolumeclaim=~"$2"}/kubelet_volume_stats_capacity_bytes{namespace="$1", persistentvolumeclaim=~"$2"})by(namespace, persistentvolumeclaim)*on(namespace, persistentvolumeclaim)group_left(storageclass)kube_persistentvolumeclaim_info`, - "pvc_inodes_available_sc": `max (kubelet_volume_stats_inodes_free)by(namespace,persistentvolumeclaim)*on(namespace, persistentvolumeclaim)group_left(storageclass)kube_persistentvolumeclaim_info{storageclass="$1"}`, - "pvc_inodes_used_sc": `max (kubelet_volume_stats_inodes_used)by(namespace, persistentvolumeclaim)*on(namespace, persistentvolumeclaim)group_left(storageclass)kube_persistentvolumeclaim_info{storageclass="$1"}`, - "pvc_inodes_total_sc": `max (kubelet_volume_stats_inodes)by(namespace, persistentvolumeclaim)*on(namespace, persistentvolumeclaim)group_left(storageclass)kube_persistentvolumeclaim_info{storageclass="$1"}`, - "pvc_inodes_utilisation_sc": `max (kubelet_volume_stats_inodes_used/kubelet_volume_stats_inodes)by(namespace, persistentvolumeclaim)*on(namespace, persistentvolumeclaim)group_left(storageclass)kube_persistentvolumeclaim_info{storageclass="$1"}`, - "pvc_bytes_available_sc": `max (kubelet_volume_stats_available_bytes)by(namespace, persistentvolumeclaim)*on(namespace, persistentvolumeclaim)group_left(storageclass)kube_persistentvolumeclaim_info{storageclass="$1"}`, - "pvc_bytes_used_sc": `max (kubelet_volume_stats_used_bytes)by(namespace, persistentvolumeclaim)*on(namespace, persistentvolumeclaim)group_left(storageclass)kube_persistentvolumeclaim_info{storageclass="$1"}`, - "pvc_bytes_total_sc": `max (kubelet_volume_stats_capacity_bytes)by(namespace, persistentvolumeclaim)*on(namespace, persistentvolumeclaim)group_left(storageclass)kube_persistentvolumeclaim_info{storageclass="$1"}`, - "pvc_bytes_utilisation_sc": `max (kubelet_volume_stats_used_bytes/kubelet_volume_stats_capacity_bytes)by(namespace, persistentvolumeclaim)*on(namespace, persistentvolumeclaim)group_left(storageclass)kube_persistentvolumeclaim_info{storageclass="$1"}`, - - // component - "etcd_server_list": `label_replace(up{job="etcd"}, "node_ip", "$1", "instance", "(.*):.*")`, - "etcd_server_total": `count(up{job="etcd"})`, - "etcd_server_up_total": `etcd:up:sum`, - "etcd_server_has_leader": `label_replace(etcd_server_has_leader, "node_ip", "$1", "instance", "(.*):.*")`, - "etcd_server_leader_changes": `label_replace(etcd:etcd_server_leader_changes_seen:sum_changes, "node_ip", "$1", "node", "(.*)")`, - "etcd_server_proposals_failed_rate": `avg(etcd:etcd_server_proposals_failed:sum_irate)`, - "etcd_server_proposals_applied_rate": `avg(etcd:etcd_server_proposals_applied:sum_irate)`, - "etcd_server_proposals_committed_rate": `avg(etcd:etcd_server_proposals_committed:sum_irate)`, - "etcd_server_proposals_pending_count": `avg(etcd:etcd_server_proposals_pending:sum)`, - "etcd_mvcc_db_size": `avg(etcd:etcd_debugging_mvcc_db_total_size:sum)`, - "etcd_network_client_grpc_received_bytes": `sum(etcd:etcd_network_client_grpc_received_bytes:sum_irate)`, - "etcd_network_client_grpc_sent_bytes": `sum(etcd:etcd_network_client_grpc_sent_bytes:sum_irate)`, - "etcd_grpc_call_rate": `sum(etcd:grpc_server_started:sum_irate)`, - "etcd_grpc_call_failed_rate": `sum(etcd:grpc_server_handled:sum_irate)`, - "etcd_grpc_server_msg_received_rate": `sum(etcd:grpc_server_msg_received:sum_irate)`, - "etcd_grpc_server_msg_sent_rate": `sum(etcd:grpc_server_msg_sent:sum_irate)`, - "etcd_disk_wal_fsync_duration": `avg(etcd:etcd_disk_wal_fsync_duration:avg)`, - "etcd_disk_wal_fsync_duration_quantile": `avg(etcd:etcd_disk_wal_fsync_duration:histogram_quantile) by (quantile)`, - "etcd_disk_backend_commit_duration": `avg(etcd:etcd_disk_backend_commit_duration:avg)`, - "etcd_disk_backend_commit_duration_quantile": `avg(etcd:etcd_disk_backend_commit_duration:histogram_quantile) by (quantile)`, - - "apiserver_up_sum": `apiserver:up:sum`, - "apiserver_request_rate": `apiserver:apiserver_request_count:sum_irate`, - "apiserver_request_by_verb_rate": `apiserver:apiserver_request_count:sum_verb_irate`, - "apiserver_request_latencies": `apiserver:apiserver_request_latencies:avg`, - "apiserver_request_by_verb_latencies": `apiserver:apiserver_request_latencies:avg_by_verb`, - - "scheduler_up_sum": `scheduler:up:sum`, - "scheduler_schedule_attempts": `scheduler:scheduler_schedule_attempts:sum`, - "scheduler_schedule_attempt_rate": `scheduler:scheduler_schedule_attempts:sum_rate`, - "scheduler_e2e_scheduling_latency": `scheduler:scheduler_e2e_scheduling_latency:avg`, - "scheduler_e2e_scheduling_latency_quantile": `scheduler:scheduler_e2e_scheduling_latency:histogram_quantile`, - - "controller_manager_up_sum": `controller_manager:up:sum`, - - "coredns_up_sum": `coredns:up:sum`, - "coredns_cache_hits": `coredns:coredns_cache_hits_total:sum_irate`, - "coredns_cache_misses": `coredns:coredns_cache_misses:sum_irate`, - "coredns_dns_request_rate": `coredns:coredns_dns_request_count:sum_irate`, - "coredns_dns_request_duration": `coredns:coredns_dns_request_duration:avg`, - "coredns_dns_request_duration_quantile": `coredns:coredns_dns_request_duration:histogram_quantile`, - "coredns_dns_request_by_type_rate": `coredns:coredns_dns_request_type_count:sum_irate`, - "coredns_dns_request_by_rcode_rate": `coredns:coredns_dns_response_rcode_count:sum_irate`, - "coredns_panic_rate": `coredns:coredns_panic_count:sum_irate`, - "coredns_proxy_request_rate": `coredns:coredns_proxy_request_count:sum_irate`, - "coredns_proxy_request_duration": `coredns:coredns_proxy_request_duration:avg`, - "coredns_proxy_request_duration_quantile": `coredns:coredns_proxy_request_duration:histogram_quantile`, - - "prometheus_up_sum": `prometheus:up:sum`, - "prometheus_tsdb_head_samples_appended_rate": `prometheus:prometheus_tsdb_head_samples_appended:sum_rate`, -} diff --git a/pkg/models/metrics/namespaces.go b/pkg/models/metrics/namespaces.go index 1c2ecac57..6099596c1 100644 --- a/pkg/models/metrics/namespaces.go +++ b/pkg/models/metrics/namespaces.go @@ -32,29 +32,25 @@ func GetNamespacesWithMetrics(namespaces []*v1.Namespace) []*v1.Namespace { nsFilter := "^(" + strings.Join(nsNameList, "|") + ")$" var timeRelateParams = make(url.Values) - params := MonitoringRequestParams{ + params := RequestParams{ ResourcesFilter: nsFilter, - Params: timeRelateParams, - QueryType: DefaultQueryType, + QueryParams: timeRelateParams, + QueryType: Query, MetricsFilter: "namespace_cpu_usage|namespace_memory_usage_wo_cache|namespace_pod_count", } - rawMetrics := GetNamespaceLevelMetrics(¶ms) + rawMetrics := GetNamespaceMetrics(params) for _, result := range rawMetrics.Results { for _, data := range result.Data.Result { - metricDescMap, ok := data[ResultItemMetric].(map[string]interface{}) - if ok { - if ns, exist := metricDescMap[ResultItemMetricResourceName]; exist { - timeAndValue, ok := data[ResultItemValue].([]interface{}) - if ok && len(timeAndValue) == 2 { - for i := 0; i < len(namespaces); i++ { - if namespaces[i].Name == ns { - if namespaces[i].Annotations == nil { - namespaces[i].Annotations = make(map[string]string, 0) - } - namespaces[i].Annotations[result.MetricName] = timeAndValue[1].(string) + if ns, exist := data.Metric["namespace"]; exist { + if len(data.Value) == 2 { + for i := 0; i < len(namespaces); i++ { + if namespaces[i].Name == ns { + if namespaces[i].Annotations == nil { + namespaces[i].Annotations = make(map[string]string, 0) } + namespaces[i].Annotations[result.MetricName] = data.Value[1].(string) } } } diff --git a/pkg/models/metrics/types.go b/pkg/models/metrics/types.go index 480a4ebd0..8d7fd0102 100644 --- a/pkg/models/metrics/types.go +++ b/pkg/models/metrics/types.go @@ -1,33 +1,59 @@ +/* + + Copyright 2019 The KubeSphere Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +*/ + package metrics -import "net/url" - -const ( - DefaultQueryStep = "10m" - DefaultQueryTimeout = "10s" - RangeQueryType = "query_range?" - DefaultQueryType = "query?" +import ( + "kubesphere.io/kubesphere/pkg/simple/client/prometheus" + "net/url" ) -type MonitoringRequestParams struct { - Params url.Values +type RequestParams struct { + QueryParams url.Values QueryType string - SortMetricName string + SortMetric string SortType string PageNum string LimitNum string - Tp string + Type string MetricsFilter string ResourcesFilter string - MetricsName string + NodeName string + WorkspaceName string + NamespaceName string + WorkloadKind string WorkloadName string - NodeId string - WsName string - NsName string PodName string + ContainerName string PVCName string StorageClassName string - ContainerName string - WorkloadKind string ComponentName string } + +type APIResponse struct { + MetricName string `json:"metric_name,omitempty" description:"metric name, eg. scheduler_up_sum"` + prometheus.APIResponse +} + +type Response struct { + MetricsLevel string `json:"metrics_level" description:"metric level, eg. cluster"` + Results []APIResponse `json:"results" description:"actual array of results"` + CurrentPage int `json:"page,omitempty" description:"current page returned"` + TotalPage int `json:"total_page,omitempty" description:"total number of pages"` + TotalItem int `json:"total_item,omitempty" description:"page size"` +} diff --git a/pkg/models/metrics/util.go b/pkg/models/metrics/util.go index 686201582..3c4eef6a7 100644 --- a/pkg/models/metrics/util.go +++ b/pkg/models/metrics/util.go @@ -19,23 +19,33 @@ package metrics import ( - "k8s.io/klog" + "k8s.io/apimachinery/pkg/labels" + "kubesphere.io/kubesphere/pkg/informers" + "kubesphere.io/kubesphere/pkg/simple/client/prometheus" "math" "sort" "strconv" - "unicode" "runtime/debug" + + "github.com/golang/glog" ) const ( DefaultPageLimit = 5 DefaultPage = 1 + + ResultTypeVector = "vector" + ResultTypeMatrix = "matrix" + MetricStatusSuccess = "success" + ResultItemMetricResourceName = "resource_name" + ResultSortTypeDesc = "desc" + ResultSortTypeAsc = "asc" ) type FormatedMetricDataWrapper struct { - fmtMetricData FormatedMetricData - by func(p, q *map[string]interface{}) bool + fmtMetricData prometheus.QueryResult + by func(p, q *prometheus.QueryValue) bool } func (wrapper FormatedMetricDataWrapper) Len() int { @@ -51,10 +61,10 @@ func (wrapper FormatedMetricDataWrapper) Swap(i, j int) { } // sorted metric by ascending or descending order -func Sort(sortMetricName string, sortType string, rawMetrics *FormatedLevelMetric) (*FormatedLevelMetric, int) { +func (rawMetrics *Response) SortBy(sortMetricName string, sortType string) (*Response, int) { defer func() { if err := recover(); err != nil { - klog.Errorln(err) + glog.Errorln(err) debug.PrintStack() } }() @@ -82,31 +92,31 @@ func Sort(sortMetricName string, sortType string, rawMetrics *FormatedLevelMetri if metricItem.MetricName == sortMetricName { if sortType == ResultSortTypeAsc { // asc - sort.Sort(FormatedMetricDataWrapper{metricItem.Data, func(p, q *map[string]interface{}) bool { - value1 := (*p)[ResultItemValue].([]interface{}) - value2 := (*q)[ResultItemValue].([]interface{}) + sort.Sort(FormatedMetricDataWrapper{metricItem.Data, func(p, q *prometheus.QueryValue) bool { + value1 := p.Value + value2 := q.Value v1, _ := strconv.ParseFloat(value1[len(value1)-1].(string), 64) v2, _ := strconv.ParseFloat(value2[len(value2)-1].(string), 64) if v1 == v2 { - resourceName1 := (*p)[ResultItemMetric].(map[string]interface{})[ResultItemMetricResourceName] - resourceName2 := (*q)[ResultItemMetric].(map[string]interface{})[ResultItemMetricResourceName] - return resourceName1.(string) < resourceName2.(string) + resourceName1 := p.Metric[ResultItemMetricResourceName] + resourceName2 := q.Metric[ResultItemMetricResourceName] + return resourceName1 < resourceName2 } return v1 < v2 }}) } else { // desc - sort.Sort(FormatedMetricDataWrapper{metricItem.Data, func(p, q *map[string]interface{}) bool { - value1 := (*p)[ResultItemValue].([]interface{}) - value2 := (*q)[ResultItemValue].([]interface{}) + sort.Sort(FormatedMetricDataWrapper{metricItem.Data, func(p, q *prometheus.QueryValue) bool { + value1 := p.Value + value2 := q.Value v1, _ := strconv.ParseFloat(value1[len(value1)-1].(string), 64) v2, _ := strconv.ParseFloat(value2[len(value2)-1].(string), 64) if v1 == v2 { - resourceName1 := (*p)[ResultItemMetric].(map[string]interface{})[ResultItemMetricResourceName] - resourceName2 := (*q)[ResultItemMetric].(map[string]interface{})[ResultItemMetricResourceName] - return resourceName1.(string) > resourceName2.(string) + resourceName1 := p.Metric[ResultItemMetricResourceName] + resourceName2 := q.Metric[ResultItemMetricResourceName] + return resourceName1 > resourceName2 } return v1 > v2 @@ -116,10 +126,10 @@ func Sort(sortMetricName string, sortType string, rawMetrics *FormatedLevelMetri for _, r := range metricItem.Data.Result { // record the ordering of resource_name to indexMap // example: {"metric":{ResultItemMetricResourceName: "Deployment:xxx"},"value":[1541142931.731,"3"]} - resourceName, exist := r[ResultItemMetric].(map[string]interface{})[ResultItemMetricResourceName] + resourceName, exist := r.Metric[ResultItemMetricResourceName] if exist { - if _, exist := indexMap[resourceName.(string)]; !exist { - indexMap[resourceName.(string)] = i + if _, exist := indexMap[resourceName]; !exist { + indexMap[resourceName] = i i = i + 1 } } @@ -128,9 +138,9 @@ func Sort(sortMetricName string, sortType string, rawMetrics *FormatedLevelMetri // iterator all metric to find max metricItems length for _, r := range metricItem.Data.Result { - k, ok := r[ResultItemMetric].(map[string]interface{})[ResultItemMetricResourceName] + k, ok := r.Metric[ResultItemMetricResourceName] if ok { - currentResourceMap[k.(string)] = 1 + currentResourceMap[k] = 1 } } @@ -154,12 +164,12 @@ func Sort(sortMetricName string, sortType string, rawMetrics *FormatedLevelMetri for i := 0; i < len(rawMetrics.Results); i++ { re := rawMetrics.Results[i] if re.Data.ResultType == ResultTypeVector && re.Status == MetricStatusSuccess { - sortedMetric := make([]map[string]interface{}, len(indexMap)) + sortedMetric := make([]prometheus.QueryValue, len(indexMap)) for j := 0; j < len(re.Data.Result); j++ { r := re.Data.Result[j] - k, exist := r[ResultItemMetric].(map[string]interface{})[ResultItemMetricResourceName] + k, exist := r.Metric[ResultItemMetricResourceName] if exist { - index, exist := indexMap[k.(string)] + index, exist := indexMap[k] if exist { sortedMetric[index] = r } @@ -173,7 +183,7 @@ func Sort(sortMetricName string, sortType string, rawMetrics *FormatedLevelMetri return rawMetrics, len(indexMap) } -func Page(pageNum string, limitNum string, fmtLevelMetric *FormatedLevelMetric, maxLength int) interface{} { +func (fmtLevelMetric *Response) Page(pageNum string, limitNum string, maxLength int) *Response { if maxLength <= 0 { return fmtLevelMetric } @@ -190,7 +200,7 @@ func Page(pageNum string, limitNum string, fmtLevelMetric *FormatedLevelMetric, if pageNum != "" { p, err := strconv.Atoi(pageNum) if err != nil { - klog.Errorln(err) + glog.Errorln(err) } else { if p > 0 { page = p @@ -206,7 +216,7 @@ func Page(pageNum string, limitNum string, fmtLevelMetric *FormatedLevelMetric, if limitNum != "" { l, err := strconv.Atoi(limitNum) if err != nil { - klog.Errorln(err) + glog.Errorln(err) } else { if l > 0 { limit = l @@ -245,72 +255,40 @@ func Page(pageNum string, limitNum string, fmtLevelMetric *FormatedLevelMetric, return fmtLevelMetric } -// maybe this function is time consuming -// The metric param is the result from Prometheus HTTP query -func ReformatJson(metric string, metricsName string, needAddParams map[string]string, needDelParams ...string) *FormatedMetric { - var formatMetric FormatedMetric - - err := jsonIter.Unmarshal([]byte(metric), &formatMetric) - +func getNodeAddressAndRole(nodeName string) (string, string) { + nodeLister := informers.SharedInformerFactory().Core().V1().Nodes().Lister() + node, err := nodeLister.Get(nodeName) if err != nil { - klog.Errorln("Unmarshal metric json failed", err.Error(), metric) + return "", "" } - if formatMetric.MetricName == "" { - if metricsName != "" { - formatMetric.MetricName = metricsName - } - } - // retrive metrics success - if formatMetric.Status == MetricStatusSuccess { - result := formatMetric.Data.Result - for _, res := range result { - metric, exist := res[ResultItemMetric] - // Prometheus query result format: .data.result[].metric - // metricMap is the value of .data.result[].metric - metricMap, sure := metric.(map[string]interface{}) - if exist && sure { - delete(metricMap, "__name__") - } - if len(needDelParams) > 0 { - for _, p := range needDelParams { - delete(metricMap, p) - } - } - if needAddParams != nil && len(needAddParams) > 0 { - for n := range needAddParams { - if v, ok := metricMap[n]; ok { - delete(metricMap, n) - metricMap[ResultItemMetricResourceName] = v - } else { - metricMap[ResultItemMetricResourceName] = needAddParams[n] - } - } - } + var addr string + for _, address := range node.Status.Addresses { + if address.Type == "InternalIP" { + addr = address.Address + break } } - return &formatMetric + role := "node" + _, exists := node.Labels["node-role.kubernetes.io/master"] + if exists { + role = "master" + } + return addr, role } -func ReformatNodeStatusField(nodeMetric *FormatedMetric) *FormatedMetric { - metricCount := len(nodeMetric.Data.Result) - for i := 0; i < metricCount; i++ { - metric, exist := nodeMetric.Data.Result[i][ResultItemMetric] - if exist { - status, exist := metric.(map[string]interface{})[MetricStatus] - if exist { - status = UpperFirstLetter(status.(string)) - metric.(map[string]interface{})[MetricStatus] = status +func getNodeName(nodeIp string) string { + nodeLister := informers.SharedInformerFactory().Core().V1().Nodes().Lister() + nodes, _ := nodeLister.List(labels.Everything()) + + for _, node := range nodes { + for _, address := range node.Status.Addresses { + if address.Type == "InternalIP" && address.Address == nodeIp { + return node.Name } } } - return nodeMetric -} -func UpperFirstLetter(str string) string { - for i, ch := range str { - return string(unicode.ToUpper(ch)) + str[i+1:] - } return "" } diff --git a/pkg/models/workspaces/workspaces.go b/pkg/models/workspaces/workspaces.go index c6d085e30..ebf9ce50c 100644 --- a/pkg/models/workspaces/workspaces.go +++ b/pkg/models/workspaces/workspaces.go @@ -168,11 +168,11 @@ func DeleteWorkspaceRoleBinding(workspace, username string, role string) error { return err } -func GetDevOpsProjects(workspaceName string) ([]string, error) { +func GetDevOpsProjectsCount(workspaceName string) (int, error) { dbconn, err := clientset.ClientSets().MySQL() if err != nil { - return nil, err + return 0, err } query := dbconn.Select(devops.DevOpsProjectIdColumn). @@ -183,9 +183,9 @@ func GetDevOpsProjects(workspaceName string) ([]string, error) { devOpsProjects := make([]string, 0) if _, err := query.Load(&devOpsProjects); err != nil { - return nil, err + return 0, err } - return devOpsProjects, nil + return len(devOpsProjects), nil } func WorkspaceUserCount(workspace string) (int, error) { @@ -196,24 +196,24 @@ func WorkspaceUserCount(workspace string) (int, error) { return count, nil } -func GetOrgRoles(name string) ([]string, error) { - return constants.WorkSpaceRoles, nil +func GetOrgRolesCount(name string) (int, error) { + return len(constants.WorkSpaceRoles), nil } -func WorkspaceNamespaces(workspaceName string) ([]string, error) { +func WorkspaceNamespaceCount(workspaceName string) (int, error) { ns, err := Namespaces(workspaceName) namespaces := make([]string, 0) if err != nil { - return namespaces, err + return 0, err } for i := 0; i < len(ns); i++ { namespaces = append(namespaces, ns[i].Name) } - return namespaces, nil + return len(namespaces), nil } func WorkspaceCount() (int, error) { diff --git a/pkg/simple/client/prometheus/prometheus.go b/pkg/simple/client/prometheus/prometheus.go index 126f55136..7cff474a5 100644 --- a/pkg/simple/client/prometheus/prometheus.go +++ b/pkg/simple/client/prometheus/prometheus.go @@ -18,12 +18,36 @@ package prometheus import ( + "fmt" + jsoniter "github.com/json-iterator/go" "io/ioutil" "k8s.io/klog" "net/http" "time" ) +// Prometheus query api response +type APIResponse struct { + Status string `json:"status" description:"result status, one of error, success"` + Data QueryResult `json:"data" description:"actual metric result"` + ErrorType string `json:"errorType,omitempty"` + Error string `json:"error,omitempty"` + Warnings []string `json:"warnings,omitempty"` +} + +// QueryResult includes result data from a query. +type QueryResult struct { + ResultType string `json:"resultType" description:"result type, one of matrix, vector"` + Result []QueryValue `json:"result" description:"metric data including labels, time series and values"` +} + +// Time Series +type QueryValue struct { + Metric map[string]string `json:"metric,omitempty" description:"time series labels"` + Value []interface{} `json:"value,omitempty" description:"time series, values of vector type"` + Values [][]interface{} `json:"values,omitempty" description:"time series, values of matrix type"` +} + type PrometheusClient struct { client *http.Client endpoint string @@ -40,28 +64,40 @@ func NewPrometheusClient(options *PrometheusOptions) (*PrometheusClient, error) }, nil } -func (c *PrometheusClient) SendMonitoringRequest(queryType string, params string) string { - return c.sendMonitoringRequest(c.endpoint, queryType, params) +func (c *PrometheusClient) QueryToK8SPrometheus(queryType string, params string) (apiResponse APIResponse) { + return c.query(c.endpoint, queryType, params) } -func (c *PrometheusClient) SendSecondaryMonitoringRequest(queryType string, params string) string { - return c.sendMonitoringRequest(c.secondaryEndpoint, queryType, params) +func (c *PrometheusClient) QueryToK8SSystemPrometheus(queryType string, params string) (apiResponse APIResponse) { + return c.query(c.secondaryEndpoint, queryType, params) } -func (c *PrometheusClient) sendMonitoringRequest(endpoint string, queryType string, params string) string { - epurl := endpoint + queryType + params - response, err := c.client.Get(epurl) +var jsonIter = jsoniter.ConfigCompatibleWithStandardLibrary + +func (c *PrometheusClient) query(endpoint string, queryType string, params string) (apiResponse APIResponse) { + url := fmt.Sprintf("%s/api/v1/%s?%s", endpoint, queryType, params) + + response, err := c.client.Get(url) if err != nil { klog.Error(err) - } else { - defer response.Body.Close() - - contents, err := ioutil.ReadAll(response.Body) - - if err != nil { - klog.Error(err) - } - return string(contents) + apiResponse.Status = "error" + return apiResponse } - return "" + defer response.Body.Close() + + body, err := ioutil.ReadAll(response.Body) + if err != nil { + klog.Error(err) + apiResponse.Status = "error" + return apiResponse + } + + err = jsonIter.Unmarshal(body, &apiResponse) + if err != nil { + klog.Errorf("fail to unmarshal prometheus query result: %s", err.Error()) + apiResponse.Status = "error" + return apiResponse + } + + return apiResponse }