diff --git a/pkg/apis/monitoring/v1alpha2/register.go b/pkg/apis/monitoring/v1alpha2/register.go index d0bb1dfcf..ac939ffe6 100644 --- a/pkg/apis/monitoring/v1alpha2/register.go +++ b/pkg/apis/monitoring/v1alpha2/register.go @@ -172,6 +172,8 @@ func addWebService(c *restful.Container) error { Consumes(restful.MIME_JSON, restful.MIME_XML). Produces(restful.MIME_JSON) + // Only use this api to monitor status of pods under the {workload} + // To monitor a specific workload, try the next two apis with "resources_filter" ws.Route(ws.GET("/namespaces/{namespace}/workloads/{workload_kind}/{workload}").To(monitoring.MonitorWorkload). Doc("monitor specific workload level metrics"). Param(ws.PathParameter("namespace", "namespace").DataType("string").Required(true).DefaultValue("kube-system")). @@ -188,6 +190,21 @@ func addWebService(c *restful.Container) error { Consumes(restful.MIME_JSON, restful.MIME_XML). Produces(restful.MIME_JSON) + ws.Route(ws.GET("/namespaces/{namespace}/workloads/{workload_kind}").To(monitoring.MonitorWorkload). + Doc("monitor specific workload kind level metrics"). + Param(ws.PathParameter("namespace", "namespace").DataType("string").Required(true).DefaultValue("kube-system")). + Param(ws.PathParameter("workload_kind", "workload kind").DataType("string").Required(true).DefaultValue("daemonset")). + Param(ws.QueryParameter("metrics_filter", "metrics name cpu memory...").DataType("string").Required(false)). + Param(ws.QueryParameter("resources_filter", "pod re2 expression filter").DataType("string").Required(false).DefaultValue("openpitrix.*")). + Param(ws.QueryParameter("sort_metric", "sort metric").DataType("string").Required(false)). + Param(ws.QueryParameter("sort_type", "ascending descending order").DataType("string").Required(false)). + Param(ws.QueryParameter("page", "page number").DataType("string").Required(false).DefaultValue("1")). + Param(ws.QueryParameter("limit", "max metric items in a page").DataType("string").Required(false).DefaultValue("4")). + Param(ws.QueryParameter("type", "rank, statistic").DataType("string").Required(false).DefaultValue("rank")). + Metadata(restfulspec.KeyOpenAPITags, tags)). + Consumes(restful.MIME_JSON, restful.MIME_XML). + Produces(restful.MIME_JSON) + ws.Route(ws.GET("/namespaces/{namespace}/workloads").To(monitoring.MonitorWorkload). Doc("monitor all workload level metrics"). Param(ws.PathParameter("namespace", "namespace").DataType("string").Required(true).DefaultValue("kube-system")). diff --git a/pkg/apiserver/monitoring/monitoring.go b/pkg/apiserver/monitoring/monitoring.go index 2d5d2e8f5..ca9f97294 100644 --- a/pkg/apiserver/monitoring/monitoring.go +++ b/pkg/apiserver/monitoring/monitoring.go @@ -20,11 +20,11 @@ package monitoring import ( "github.com/emicklei/go-restful" "kubesphere.io/kubesphere/pkg/models/metrics" - prom "kubesphere.io/kubesphere/pkg/simple/client/prometheus" + "kubesphere.io/kubesphere/pkg/simple/client/prometheus" ) func MonitorPod(request *restful.Request, response *restful.Response) { - requestParams := prom.ParseMonitoringRequestParams(request) + requestParams := prometheus.ParseMonitoringRequestParams(request) podName := requestParams.PodName metricName := requestParams.MetricsName if podName != "" { @@ -32,7 +32,7 @@ func MonitorPod(request *restful.Request, response *restful.Response) { queryType, params, nullRule := metrics.AssemblePodMetricRequestInfo(requestParams, metricName) var res *metrics.FormatedMetric if !nullRule { - metricsStr := prom.SendMonitoringRequest(queryType, params) + metricsStr := prometheus.SendMonitoringRequest(queryType, params) res = metrics.ReformatJson(metricsStr, metricName, map[string]string{"pod_name": ""}) } response.WriteAsJson(res) @@ -49,7 +49,7 @@ func MonitorPod(request *restful.Request, response *restful.Response) { } func MonitorContainer(request *restful.Request, response *restful.Response) { - requestParams := prom.ParseMonitoringRequestParams(request) + requestParams := prometheus.ParseMonitoringRequestParams(request) metricName := requestParams.MetricsName if requestParams.MetricsFilter != "" { rawMetrics := metrics.MonitorAllMetrics(requestParams, metrics.MetricLevelContainer) @@ -68,23 +68,12 @@ func MonitorContainer(request *restful.Request, response *restful.Response) { } func MonitorWorkload(request *restful.Request, response *restful.Response) { - requestParams := prom.ParseMonitoringRequestParams(request) + requestParams := prometheus.ParseMonitoringRequestParams(request) rawMetrics := metrics.MonitorAllMetrics(requestParams, metrics.MetricLevelWorkload) - var sortedMetrics *metrics.FormatedLevelMetric - var maxMetricCount int - - wlKind := requestParams.WorkloadKind - // sorting - if wlKind == "" { - - sortedMetrics, maxMetricCount = metrics.Sort(requestParams.SortMetricName, requestParams.SortType, rawMetrics) - } else { - - sortedMetrics, maxMetricCount = metrics.Sort(requestParams.SortMetricName, requestParams.SortType, rawMetrics) - } + sortedMetrics, maxMetricCount := metrics.Sort(requestParams.SortMetricName, requestParams.SortType, rawMetrics) // paging pagedMetrics := metrics.Page(requestParams.PageNum, requestParams.LimitNum, sortedMetrics, maxMetricCount) @@ -95,7 +84,7 @@ func MonitorWorkload(request *restful.Request, response *restful.Response) { func MonitorAllWorkspaces(request *restful.Request, response *restful.Response) { - requestParams := prom.ParseMonitoringRequestParams(request) + requestParams := prometheus.ParseMonitoringRequestParams(request) tp := requestParams.Tp if tp == "statistics" { @@ -105,27 +94,31 @@ func MonitorAllWorkspaces(request *restful.Request, response *restful.Response) } else if tp == "rank" { rawMetrics := metrics.MonitorAllWorkspaces(requestParams) + // sorting sortedMetrics, maxMetricCount := metrics.Sort(requestParams.SortMetricName, requestParams.SortType, rawMetrics) + // paging pagedMetrics := metrics.Page(requestParams.PageNum, requestParams.LimitNum, sortedMetrics, maxMetricCount) response.WriteAsJson(pagedMetrics) } else { - res := metrics.MonitorAllMetrics(requestParams, metrics.MetricLevelWorkspace) - response.WriteAsJson(res) + rawMetrics := metrics.MonitorAllWorkspaces(requestParams) + response.WriteAsJson(rawMetrics) } } func MonitorOneWorkspace(request *restful.Request, response *restful.Response) { - requestParams := prom.ParseMonitoringRequestParams(request) + requestParams := prometheus.ParseMonitoringRequestParams(request) tp := requestParams.Tp if tp == "rank" { // multiple rawMetrics := metrics.MonitorAllMetrics(requestParams, metrics.MetricLevelWorkspace) + // sorting sortedMetrics, maxMetricCount := metrics.Sort(requestParams.SortMetricName, requestParams.SortType, rawMetrics) + // paging pagedMetrics := metrics.Page(requestParams.PageNum, requestParams.LimitNum, sortedMetrics, maxMetricCount) response.WriteAsJson(pagedMetrics) @@ -143,34 +136,25 @@ func MonitorOneWorkspace(request *restful.Request, response *restful.Response) { } func MonitorNamespace(request *restful.Request, response *restful.Response) { - requestParams := prom.ParseMonitoringRequestParams(request) - metricName := requestParams.MetricsName - nsName := requestParams.NsName - if nsName != "" { - // single - queryType, params := metrics.AssembleNamespaceMetricRequestInfo(requestParams, metricName) - metricsStr := prom.SendMonitoringRequest(queryType, params) - res := metrics.ReformatJson(metricsStr, metricName, map[string]string{"namespace": ""}) - response.WriteAsJson(res) - } else { - // multiple - rawMetrics := metrics.MonitorAllMetrics(requestParams, metrics.MetricLevelNamespace) - // sorting - sortedMetrics, maxMetricCount := metrics.Sort(requestParams.SortMetricName, requestParams.SortType, rawMetrics) - // paging - pagedMetrics := metrics.Page(requestParams.PageNum, requestParams.LimitNum, sortedMetrics, maxMetricCount) - response.WriteAsJson(pagedMetrics) - } + requestParams := prometheus.ParseMonitoringRequestParams(request) + // multiple + rawMetrics := metrics.MonitorAllMetrics(requestParams, metrics.MetricLevelNamespace) + + // sorting + sortedMetrics, maxMetricCount := metrics.Sort(requestParams.SortMetricName, requestParams.SortType, rawMetrics) + // paging + pagedMetrics := metrics.Page(requestParams.PageNum, requestParams.LimitNum, sortedMetrics, maxMetricCount) + response.WriteAsJson(pagedMetrics) } func MonitorCluster(request *restful.Request, response *restful.Response) { - requestParams := prom.ParseMonitoringRequestParams(request) + requestParams := prometheus.ParseMonitoringRequestParams(request) metricName := requestParams.MetricsName if metricName != "" { // single queryType, params := metrics.AssembleClusterMetricRequestInfo(requestParams, metricName) - metricsStr := prom.SendMonitoringRequest(queryType, params) + metricsStr := prometheus.SendMonitoringRequest(queryType, params) res := metrics.ReformatJson(metricsStr, metricName, map[string]string{"cluster": "local"}) response.WriteAsJson(res) @@ -182,14 +166,17 @@ func MonitorCluster(request *restful.Request, response *restful.Response) { } func MonitorNode(request *restful.Request, response *restful.Response) { - requestParams := prom.ParseMonitoringRequestParams(request) + requestParams := prometheus.ParseMonitoringRequestParams(request) metricName := requestParams.MetricsName if metricName != "" { // single queryType, params := metrics.AssembleNodeMetricRequestInfo(requestParams, metricName) - metricsStr := prom.SendMonitoringRequest(queryType, params) + metricsStr := prometheus.SendMonitoringRequest(queryType, params) res := metrics.ReformatJson(metricsStr, metricName, map[string]string{"node": ""}) + // The raw node-exporter result doesn't include ip address information + // Thereby, append node ip address to .data.result[].metric + nodeAddress := metrics.GetNodeAddressInfo() metrics.AddNodeAddressMetric(res, nodeAddress) diff --git a/pkg/models/metrics/metrics.go b/pkg/models/metrics/metrics.go index 8b8efa0f6..600a7894e 100644 --- a/pkg/models/metrics/metrics.go +++ b/pkg/models/metrics/metrics.go @@ -19,17 +19,16 @@ limitations under the License. package metrics import ( + "github.com/golang/glog" "kubesphere.io/kubesphere/pkg/informers" "net/url" "regexp" + "runtime/debug" + "sort" "strings" "sync" "time" - "github.com/golang/glog" - "runtime/debug" - "sort" - "github.com/json-iterator/go" "k8s.io/api/core/v1" @@ -119,6 +118,7 @@ func getAllWorkspaceNames(formatedMetric *FormatedMetric) map[string]int { var wsMap = make(map[string]int) for i := 0; i < len(formatedMetric.Data.Result); i++ { + // metricDesc needs clear naming metricDesc := formatedMetric.Data.Result[i][ResultItemMetric] metricDescMap, ensure := metricDesc.(map[string]interface{}) if ensure { @@ -256,7 +256,8 @@ func AssembleAllWorkloadMetricRequestInfo(monitoringRequest *client.MonitoringRe paramValues := monitoringRequest.Params - rule := MakeWorkloadPromQL(metricName, monitoringRequest.NsName, monitoringRequest.ResourcesFilter) + rule := MakeWorkloadPromQL(metricName, monitoringRequest.NsName, monitoringRequest.ResourcesFilter, monitoringRequest.WorkloadKind) + params := makeRequestParamString(rule, paramValues) return queryType, params } @@ -293,7 +294,7 @@ func AddNodeAddressMetric(nodeMetric *FormatedMetric, nodeAddress *map[string][] metricDesc := nodeMetric.Data.Result[i][ResultItemMetric] metricDescMap, ensure := metricDesc.(map[string]interface{}) if ensure { - if nodeId, exist := metricDescMap["node"]; exist { + if nodeId, exist := metricDescMap["resource_name"]; exist { addr, exist := (*nodeAddress)[nodeId.(string)] if exist { metricDescMap["address"] = addr @@ -330,6 +331,17 @@ func AssembleNamespaceMetricRequestInfo(monitoringRequest *client.MonitoringRequ return queryType, params } +func AssembleNamespaceMetricRequestInfoByNamesapce(monitoringRequest *client.MonitoringRequestParams, namespace string, metricName string) (string, string) { + queryType := monitoringRequest.QueryType + + paramValues := monitoringRequest.Params + rule := MakeNamespacePromQL(namespace, monitoringRequest.ResourcesFilter, metricName) + + params := makeRequestParamString(rule, paramValues) + + return queryType, params +} + func AssembleSpecificWorkspaceMetricRequestInfo(monitoringRequest *client.MonitoringRequestParams, namespaceList []string, metricName string) (string, string) { nsFilter := "^(" + strings.Join(namespaceList, "|") + ")$" @@ -414,6 +426,7 @@ func MonitorAllWorkspaces(monitoringRequest *client.MonitoringRequestParams) *Fo wsMap := getAllWorkspaces() for ws := range wsMap { + // Only execute Prometheus queries for specific metrics on specific workspaces bol, err := regexp.MatchString(monitoringRequest.ResourcesFilter, ws) if err == nil && bol { // a workspace @@ -461,8 +474,9 @@ func collectWorkspaceMetric(monitoringRequest *client.MonitoringRequestParams, w var ch = make(chan *FormatedMetric, ChannelMaxCapacity) namespaceArray, err := workspaces.WorkspaceNamespaces(ws) if err != nil { - glog.Errorln(err.Error()) + glog.Errorln(err) } + // add by namespace for _, metricName := range filterMetricsName { wg.Add(1) @@ -470,7 +484,7 @@ func collectWorkspaceMetric(monitoringRequest *client.MonitoringRequestParams, w queryType, params := AssembleSpecificWorkspaceMetricRequestInfo(monitoringRequest, namespaceArray, metricName) metricsStr := client.SendMonitoringRequest(queryType, params) - ch <- ReformatJson(metricsStr, metricName, map[string]string{"namespace": ""}) + ch <- ReformatJson(metricsStr, metricName, map[string]string{"resource_name": ws}) // It's adding "resource_name" field wg.Done() }(metricName) @@ -482,7 +496,7 @@ func collectWorkspaceMetric(monitoringRequest *client.MonitoringRequestParams, w var metricsArray []FormatedMetric for oneMetric := range ch { if oneMetric != nil { - // add "workspace" filed to oneMetric `metric` field + // add "workspace" to oneMetric "metric" field for i := 0; i < len(oneMetric.Data.Result); i++ { tmap, sure := oneMetric.Data.Result[i][ResultItemMetric].(map[string]interface{}) if sure { @@ -510,8 +524,8 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour case MetricLevelCluster: { for _, metricName := range ClusterMetricsNames { - bol, err := regexp.MatchString(metricsFilter, metricName) - if err == nil && bol { + matched, err := regexp.MatchString(metricsFilter, metricName) + if err == nil && matched { wg.Add(1) go func(metricName string) { queryType, params := AssembleClusterMetricRequestInfo(monitoringRequest, metricName) @@ -525,8 +539,8 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour case MetricLevelNode: { for _, metricName := range NodeMetricsNames { - bol, err := regexp.MatchString(metricsFilter, metricName) - if err == nil && bol { + matched, err := regexp.MatchString(metricsFilter, metricName) + if err == nil && matched { wg.Add(1) go func(metricName string) { queryType, params := AssembleNodeMetricRequestInfo(monitoringRequest, metricName) @@ -553,36 +567,70 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour continue } - bol, err := regexp.MatchString(metricsFilter, metricName) - ns := "^(" + strings.Join(namespaceArray, "|") + ")$" - monitoringRequest.ResourcesFilter = ns - if err == nil && bol { - wg.Add(1) - go func(metricName string) { - queryType, params := AssembleNamespaceMetricRequestInfo(monitoringRequest, metricName) - metricsStr := client.SendMonitoringRequest(queryType, params) - ch <- ReformatJson(metricsStr, metricName, map[string]string{"workspace": "workspace"}) - wg.Done() - }(metricName) + matched, err := regexp.MatchString(metricsFilter, metricName) + if err != nil || !matched { + continue } + + wg.Add(1) + go func(metricName string) { + + var chForOneMetric = make(chan *FormatedMetric, ChannelMaxCapacity) + var wgForOneMetric sync.WaitGroup + + for _, ns := range namespaceArray { + wgForOneMetric.Add(1) + go func(metricName string, namespace string) { + + queryType, params := AssembleNamespaceMetricRequestInfoByNamesapce(monitoringRequest, namespace, metricName) + metricsStr := client.SendMonitoringRequest(queryType, params) + chForOneMetric <- ReformatJson(metricsStr, metricName, map[string]string{"resource_name": namespace}) + wgForOneMetric.Done() + }(metricName, ns) + } + + wgForOneMetric.Wait() + close(chForOneMetric) + + // ranking is for vector type result only + aggregatedResult := FormatedMetric{MetricName: metricName, Status: "success", Data: FormatedMetricData{Result: []map[string]interface{}{}, ResultType: ResultTypeVector}} + + for oneMetric := range chForOneMetric { + + if oneMetric != nil { + + // wrapper layer 1: append .data.result[0] + if len(oneMetric.Data.Result) > 0 { + aggregatedResult.Data.Result = append(aggregatedResult.Data.Result, oneMetric.Data.Result[0]) + } + } + } + + ch <- &aggregatedResult + wg.Done() + }(metricName) + } } else { + + workspace := monitoringRequest.WsName + for _, metricName := range WorkspaceMetricsNames { if metricName == MetricNameWorkspaceAllProjectCount { continue } - bol, err := regexp.MatchString(metricsFilter, metricName) - if err == nil && bol { + matched, err := regexp.MatchString(metricsFilter, metricName) + if err == nil && matched { wg.Add(1) - go func(metricName string) { + go func(metricName string, workspace string) { queryType, params := AssembleSpecificWorkspaceMetricRequestInfo(monitoringRequest, namespaceArray, metricName) metricsStr := client.SendMonitoringRequest(queryType, params) - ch <- ReformatJson(metricsStr, metricName, map[string]string{"workspace": "workspace"}) + ch <- ReformatJson(metricsStr, metricName, map[string]string{"resource_name": workspace}) wg.Done() - }(metricName) + }(metricName, workspace) } } } @@ -590,8 +638,8 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour // sum all workspaces for _, metricName := range WorkspaceMetricsNames { - bol, err := regexp.MatchString(metricsFilter, metricName) - if err == nil && bol { + matched, err := regexp.MatchString(metricsFilter, metricName) + if err == nil && matched { wg.Add(1) @@ -599,6 +647,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour queryType, params := AssembleAllWorkspaceMetricRequestInfo(monitoringRequest, nil, metricName) metricsStr := client.SendMonitoringRequest(queryType, params) ch <- ReformatJson(metricsStr, metricName, map[string]string{"workspace": "workspaces"}) + wg.Done() }(metricName) } @@ -608,13 +657,17 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour case MetricLevelNamespace: { for _, metricName := range NamespaceMetricsNames { - bol, err := regexp.MatchString(metricsFilter, metricName) - if err == nil && bol { + matched, err := regexp.MatchString(metricsFilter, metricName) + if err == nil && matched { wg.Add(1) go func(metricName string) { + queryType, params := AssembleNamespaceMetricRequestInfo(monitoringRequest, metricName) metricsStr := client.SendMonitoringRequest(queryType, params) - ch <- ReformatJson(metricsStr, metricName, map[string]string{"namespace": ""}) + + rawResult := ReformatJson(metricsStr, metricName, map[string]string{"namespace": ""}) + ch <- rawResult + wg.Done() }(metricName) } @@ -624,13 +677,15 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour { if monitoringRequest.WorkloadName == "" { for _, metricName := range WorkloadMetricsNames { - bol, err := regexp.MatchString(metricsFilter, metricName) - if err == nil && bol { + matched, err := regexp.MatchString(metricsFilter, metricName) + if err == nil && matched { wg.Add(1) go func(metricName string) { queryType, params := AssembleAllWorkloadMetricRequestInfo(monitoringRequest, metricName) metricsStr := client.SendMonitoringRequest(queryType, params) - ch <- ReformatJson(metricsStr, metricName, map[string]string{"workload": ""}) + reformattedResult := ReformatJson(metricsStr, metricName, map[string]string{"workload": ""}) + // no need to append a null result + ch <- reformattedResult wg.Done() }(metricName) } @@ -658,8 +713,8 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour case MetricLevelPod: { for _, metricName := range PodMetricsNames { - bol, err := regexp.MatchString(metricsFilter, metricName) - if err == nil && bol { + matched, err := regexp.MatchString(metricsFilter, metricName) + if err == nil && matched { wg.Add(1) go func(metricName string) { queryType, params, nullRule := AssemblePodMetricRequestInfo(monitoringRequest, metricName) @@ -677,8 +732,8 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour case MetricLevelContainer: { for _, metricName := range ContainerMetricsNames { - bol, err := regexp.MatchString(metricsFilter, metricName) - if err == nil && bol { + matched, err := regexp.MatchString(metricsFilter, metricName) + if err == nil && matched { wg.Add(1) go func(metricName string) { queryType, params := AssembleContainerMetricRequestInfo(monitoringRequest, metricName) @@ -702,6 +757,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour } } + // wrapper layer 2: return &FormatedLevelMetric{ MetricsLevel: resourceType, Results: metricsArray, diff --git a/pkg/models/metrics/metricsrule.go b/pkg/models/metrics/metricsrule.go index b8cf9fdf5..0bd947576 100644 --- a/pkg/models/metrics/metricsrule.go +++ b/pkg/models/metrics/metricsrule.go @@ -17,14 +17,54 @@ import ( "strings" ) -func MakeWorkloadPromQL(metricName, nsName, wlFilter string) string { - if wlFilter == "" { - wlFilter = ".*" +// resources_filter = xxxx|xxxx +func MakeWorkloadPromQL(metricName, nsName, resources_filter, wkKind string) string { + + switch wkKind { + case "deployment": + wkKind = Deployment + case "daemonset": + wkKind = DaemonSet + case "statefulset": + wkKind = StatefulSet + default: + wkKind = "(.*)" + } + + if resources_filter == "" { + resources_filter = ".*" + } else { + var prefix string + + // The "workload_{deployment,statefulset,daemonset}_xxx" metric uses "deployment","statefulset" or "daemonset" label selectors + // which match exactly a workload name + // eg. kube_daemonset_status_number_unavailable{daemonset=~"^xxx$"} + if strings.Contains(metricName, "deployment") || strings.Contains(metricName, "daemonset") || strings.Contains(metricName, "statefulset") { + // to pass "resources_filter" to PromQL, we reformat it + prefix = "" + } else { + // While workload_{cpu,memory,net}_xxx metrics uses "workload" + // eg. namespace:workload_cpu_usage:sum{workload="Deployment:xxx"} + prefix = wkKind + ":" + } + + filters := strings.Split(resources_filter, "|") + // reshape it to match PromQL re2 syntax + resources_filter = "" + for i, filter := range filters { + + resources_filter += "^" + prefix + filter + "$" // eg. ^Deployment:xxx$ + + if i != len(filters)-1 { + resources_filter += resources_filter + "|" + } + } } var promql = RulePromQLTmplMap[metricName] promql = strings.Replace(promql, "$2", nsName, -1) - promql = strings.Replace(promql, "$3", wlFilter, -1) + promql = strings.Replace(promql, "$3", resources_filter, -1) + return promql } diff --git a/pkg/models/metrics/metricsruleconst.go b/pkg/models/metrics/metricsruleconst.go index 72f8adc52..26833bac1 100644 --- a/pkg/models/metrics/metricsruleconst.go +++ b/pkg/models/metrics/metricsruleconst.go @@ -134,12 +134,16 @@ var ClusterMetricsNames = []string{ "cluster_replicaset_count", "cluster_service_count", "cluster_secret_count", - + "cluster_ingresses_extensions_count", "cluster_namespace_count", "cluster_load1", "cluster_load5", "cluster_load15", + + // New in ks 2.0 + "cluster_pod_abnormal_ratio", + "cluster_node_offline_ratio", } var NodeMetricsNames = []string{ "node_cpu_utilisation", @@ -176,6 +180,9 @@ var NodeMetricsNames = []string{ "node_load1", "node_load5", "node_load15", + + // New in ks 2.0 + "node_pod_abnormal_ratio", } var WorkspaceMetricsNames = []string{ "workspace_cpu_usage", @@ -202,6 +209,9 @@ var WorkspaceMetricsNames = []string{ "workspace_secret_count", "workspace_all_project_count", + + // New in ks 2.0 + "workspace_pod_abnormal_ratio", } var NamespaceMetricsNames = []string{ "namespace_cpu_usage", @@ -267,6 +277,10 @@ var NamespaceMetricsNames = []string{ "namespace_secret_count", "namespace_ingresses_extensions_count", + + // New in ks 2.0 + "namespace_pod_abnormal_ratio", + "namespace_resourcequota_used_ratio", } var PodMetricsNames = []string{ @@ -290,6 +304,11 @@ var WorkloadMetricsNames = []string{ "workload_statefulset_replica_available", "workload_daemonset_replica", "workload_daemonset_replica_available", + + // New in ks 2.0 + "workload_deployment_unavailable_replicas_ratio", + "workload_daemonset_unavailable_replicas_ratio", + "workload_statefulset_unavailable_replicas_ratio", } var ContainerMetricsNames = []string{ @@ -319,30 +338,28 @@ var RulePromQLTmplMap = MetricMap{ "cluster_disk_write_throughput": "sum(node:data_volume_throughput_bytes_written:sum)", "cluster_disk_size_usage": `sum(max((node_filesystem_size_bytes{device=~"/dev/.+", job="node-exporter"} - node_filesystem_avail_bytes{device=~"/dev/.+", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:) by (node))`, - "cluster_disk_size_utilisation": `1 - sum(max(node_filesystem_avail_bytes{device=~"/dev/.+", job="node-exporter"} * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:) by (node)) / sum(max(node_filesystem_size_bytes{device=~"/dev/.+", job="node-exporter"} * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:) by (node))`, + "cluster_disk_size_utilisation": `cluster:disk_utilization:ratio`, "cluster_disk_size_capacity": `sum(max(node_filesystem_size_bytes{device=~"/dev/.+", job="node-exporter"} * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:) by (node))`, "cluster_disk_size_available": `sum(max(node_filesystem_avail_bytes{device=~"/dev/.+", job="node-exporter"} * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:) by (node))`, "cluster_disk_inode_total": `sum(node:node_inodes_total:)`, "cluster_disk_inode_usage": `sum(node:node_inodes_total:) - sum(node:node_inodes_free:)`, - "cluster_disk_inode_utilisation": `1 - sum(node:node_inodes_free:) / sum(node:node_inodes_total:)`, + "cluster_disk_inode_utilisation": `cluster:disk_inode_utilization:ratio`, "cluster_namespace_count": `count(kube_namespace_annotations)`, // cluster_pod_count = cluster_pod_running_count + cluster_pod_succeeded_count + cluster_pod_abnormal_count - "cluster_pod_count": `sum((kube_pod_status_scheduled{condition="true"} > 0) * on (namespace, pod) group_left(node) (sum by (node, namespace, pod) (kube_pod_info)) unless on (node) (kube_node_status_condition{condition="Ready",status=~"unknown|false"} > 0))`, + "cluster_pod_count": `cluster:pod:sum`, "cluster_pod_quota": `sum(kube_node_status_capacity_pods unless on (node) (kube_node_status_condition{condition="Ready",status=~"unknown|false"} > 0))`, - "cluster_pod_utilisation": `sum(kube_pod_info unless on (node) (kube_node_status_condition{condition="Ready",status=~"unknown|false"} > 0)) / sum(kube_node_status_capacity_pods unless on (node) (kube_node_status_condition{condition="Ready",status=~"unknown|false"} > 0))`, + "cluster_pod_utilisation": `cluster:pod_utilization:ratio`, "cluster_pod_running_count": `count(kube_pod_info unless on (pod) (kube_pod_status_phase{phase=~"Failed|Pending|Unknown|Succeeded"} > 0) unless on (node) (kube_node_status_condition{condition="Ready",status=~"unknown|false"} > 0))`, "cluster_pod_succeeded_count": `count(kube_pod_info unless on (pod) (kube_pod_status_phase{phase=~"Failed|Pending|Unknown|Running"} > 0) unless on (node) (kube_node_status_condition{condition="Ready",status=~"unknown|false"} > 0))`, - "cluster_pod_abnormal_count": `count(kube_pod_info unless on (pod) (kube_pod_status_phase{phase=~"Succeeded|Running"} > 0) unless on (node) (kube_node_status_condition{condition="Ready",status=~"unknown|false"} > 0))`, + "cluster_pod_abnormal_count": `cluster:pod_abnormal:sum`, "cluster_node_online": `sum(kube_node_status_condition{condition="Ready",status="true"})`, - "cluster_node_offline": `sum(kube_node_status_condition{condition="Ready",status=~"unknown|false"})`, + "cluster_node_offline": `cluster:node_offline:sum`, "cluster_node_total": `sum(kube_node_status_condition{condition="Ready"})`, - "cluster_ingresses_extensions_count": `sum(kube_ingress_labels)`, - "cluster_configmap_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", resource="count/configmaps"}) by (resource, type)`, "cluster_jobs_batch_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", resource="count/jobs.batch"}) by (resource, type)`, "cluster_roles_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", resource="count/roles.rbac.authorization.k8s.io"}) by (resource, type)`, @@ -363,23 +380,28 @@ var RulePromQLTmplMap = MetricMap{ "cluster_cpu_request_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", resource="requests.cpu"}) by (resource, type)`, "cluster_service_loadbalancer_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", resource="services.loadbalancers"}) by (resource, type)`, - "cluster_cronjob_count": `sum(kube_cronjob_labels)`, - "cluster_pvc_count": `sum(kube_persistentvolumeclaim_info)`, - "cluster_daemonset_count": `sum(kube_daemonset_labels)`, - "cluster_deployment_count": `sum(kube_deployment_labels)`, - "cluster_endpoint_count": `sum(kube_endpoint_labels)`, - "cluster_hpa_count": `sum(kube_hpa_labels)`, - "cluster_job_count": `sum(kube_job_labels)`, - "cluster_statefulset_count": `sum(kube_statefulset_labels)`, - "cluster_replicaset_count": `count(kube_replicaset_created)`, - "cluster_service_count": `sum(kube_service_info)`, - "cluster_secret_count": `sum(kube_secret_info)`, - "cluster_pv_count": `sum(kube_persistentvolume_labels)`, + "cluster_cronjob_count": `sum(kube_cronjob_labels)`, + "cluster_pvc_count": `sum(kube_persistentvolumeclaim_info)`, + "cluster_daemonset_count": `sum(kube_daemonset_labels)`, + "cluster_deployment_count": `sum(kube_deployment_labels)`, + "cluster_endpoint_count": `sum(kube_endpoint_labels)`, + "cluster_hpa_count": `sum(kube_hpa_labels)`, + "cluster_job_count": `sum(kube_job_labels)`, + "cluster_statefulset_count": `sum(kube_statefulset_labels)`, + "cluster_replicaset_count": `count(kube_replicaset_created)`, + "cluster_service_count": `sum(kube_service_info)`, + "cluster_secret_count": `sum(kube_secret_info)`, + "cluster_pv_count": `sum(kube_persistentvolume_labels)`, + "cluster_ingresses_extensions_count": `sum(kube_ingress_labels)`, "cluster_load1": `sum(node_load1{job="node-exporter"}) / sum(node:node_num_cpu:sum)`, "cluster_load5": `sum(node_load5{job="node-exporter"}) / sum(node:node_num_cpu:sum)`, "cluster_load15": `sum(node_load15{job="node-exporter"}) / sum(node:node_num_cpu:sum)`, + // cluster: New added in ks 2.0 + "cluster_pod_abnormal_ratio": `cluster:pod_abnormal:ratio`, + "cluster_node_offline_ratio": `cluster:node_offline:ratio`, + //node "node_cpu_utilisation": "node:node_cpu_utilisation:avg1m", "node_cpu_total": "node:node_num_cpu:sum", @@ -397,27 +419,30 @@ var RulePromQLTmplMap = MetricMap{ "node_disk_write_throughput": "node:data_volume_throughput_bytes_written:sum", "node_disk_size_capacity": `max(node_filesystem_size_bytes{device=~"/dev/.+", job="node-exporter"} * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1) by (node)`, - "node_disk_size_available": `max(node_filesystem_avail_bytes{device=~"/dev/.+", job="node-exporter"} * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1) by (node)`, + "node_disk_size_available": `node:disk_space_available:$1`, "node_disk_size_usage": `max((node_filesystem_size_bytes{device=~"/dev/.+", job="node-exporter"} - node_filesystem_avail_bytes{device=~"/dev/.+", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1) by (node)`, - "node_disk_size_utilisation": `max(((node_filesystem_size_bytes{device=~"/dev/.+", job="node-exporter"} - node_filesystem_avail_bytes{device=~"/dev/.+", job="node-exporter"}) / node_filesystem_size_bytes{device=~"/dev/.+", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1) by (node)`, + "node_disk_size_utilisation": `node:disk_space_utilization:ratio$1`, "node_disk_inode_total": `node:node_inodes_total:$1`, "node_disk_inode_usage": `node:node_inodes_total:$1 - node:node_inodes_free:$1`, - "node_disk_inode_utilisation": `(1 - (node:node_inodes_free:$1 / node:node_inodes_total:$1))`, + "node_disk_inode_utilisation": `node:disk_inode_utilization:ratio$1`, - "node_pod_count": `sum by (node) ((kube_pod_status_scheduled{condition="true"} > 0) * on (namespace, pod) group_left(node) kube_pod_info$1 unless on (node) (kube_node_status_condition{condition="Ready",status=~"unknown|false"} > 0))`, + "node_pod_count": `node:pod_count:sum$1`, "node_pod_quota": `sum(kube_node_status_capacity_pods$1) by (node) unless on (node) (kube_node_status_condition{condition="Ready",status=~"unknown|false"} > 0)`, - "node_pod_utilisation": `(sum(kube_pod_info$1) by (node) / sum(kube_node_status_capacity_pods$1) by (node)) unless on (node) (kube_node_status_condition{condition="Ready",status=~"unknown|false"} > 0)`, - "node_pod_running_count": `count(kube_pod_info$1 unless on (pod) (kube_pod_status_phase{phase=~"Failed|Pending|Unknown|Succeeded"} > 0)) by (node) unless on (node) (kube_node_status_condition{condition="Ready",status=~"unknown|false"} > 0)`, - "node_pod_succeeded_count": `count(kube_pod_info$1 unless on (pod) (kube_pod_status_phase{phase=~"Failed|Pending|Unknown|Running"} > 0)) by (node) unless on (node) (kube_node_status_condition{condition="Ready",status=~"unknown|false"} > 0)`, - "node_pod_abnormal_count": `count(kube_pod_info$1 unless on (pod) (kube_pod_status_phase{phase=~"Succeeded|Running"} > 0)) by (node) unless on (node) (kube_node_status_condition{condition="Ready",status=~"unknown|false"} > 0)`, + "node_pod_utilisation": `node:pod_utilization:ratio$1`, + "node_pod_running_count": `node:pod_running:count$1`, + "node_pod_succeeded_count": `node:pod_succeeded:count$1`, + "node_pod_abnormal_count": `node:pod_abnormal:count$1`, // without log node: unless on(node) kube_node_labels{label_role="log"} "node_cpu_usage": `node:node_cpu_utilisation:avg1m$1 * node:node_num_cpu:sum$1`, - "node_load1": `sum by (node) (node_load1{job="node-exporter"} * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1) / node:node_num_cpu:sum`, - "node_load5": `sum by (node) (node_load5{job="node-exporter"} * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1) / node:node_num_cpu:sum`, - "node_load15": `sum by (node) (node_load15{job="node-exporter"} * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1) / node:node_num_cpu:sum`, + "node_load1": `node:load1:ratio$1`, + "node_load5": `node:load5:ratio$1`, + "node_load15": `node:load15:ratio$1`, + + // New in ks 2.0 + "node_pod_abnormal_ratio": `node:pod_abnormal:ratio$1`, //namespace "namespace_cpu_usage": `namespace:container_cpu_usage_seconds_total:sum_rate{namespace!="", namespace=~"$1"} * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, @@ -430,45 +455,45 @@ var RulePromQLTmplMap = MetricMap{ "namespace_pod_succeeded_count": `sum(kube_pod_status_phase{phase="Succeeded", namespace!="", namespace=~"$1"}) by (namespace) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, "namespace_pod_abnormal_count": `sum(kube_pod_status_phase{phase=~"Failed|Pending|Unknown", namespace!="", namespace=~"$1"}) by (namespace) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_configmap_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="count/configmaps"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_jobs_batch_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="count/jobs.batch"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_roles_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="count/roles.rbac.authorization.k8s.io"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_memory_limit_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="limits.memory"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_pvc_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="persistentvolumeclaims"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_memory_request_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="requests.memory"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_pvc_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="count/persistentvolumeclaims"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_cronjobs_batch_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="count/cronjobs.batch"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_ingresses_extensions_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="count/ingresses.extensions"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_cpu_limit_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="limits.cpu"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_storage_request_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="requests.storage"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_deployment_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="count/deployments.apps"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_pod_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="count/pods"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_statefulset_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="count/statefulsets.apps"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_daemonset_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="count/daemonsets.apps"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_secret_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="count/secrets"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_service_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="count/services"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_cpu_request_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="requests.cpu"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_service_loadbalancer_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="services.loadbalancers"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_configmap_count_used": `max(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="count/configmaps"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_jobs_batch_count_used": `max(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="count/jobs.batch"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_roles_count_used": `max(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="count/roles.rbac.authorization.k8s.io"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_memory_limit_used": `max(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="limits.memory"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_pvc_used": `max(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="persistentvolumeclaims"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_memory_request_used": `max(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="requests.memory"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_pvc_count_used": `max(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="count/persistentvolumeclaims"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_cronjobs_batch_count_used": `max(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="count/cronjobs.batch"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_ingresses_extensions_count_used": `max(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="count/ingresses.extensions"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_cpu_limit_used": `max(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="limits.cpu"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_storage_request_used": `max(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="requests.storage"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_deployment_count_used": `max(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="count/deployments.apps"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_pod_count_used": `max(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="count/pods"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_statefulset_count_used": `max(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="count/statefulsets.apps"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_daemonset_count_used": `max(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="count/daemonsets.apps"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_secret_count_used": `max(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="count/secrets"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_service_count_used": `max(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="count/services"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_cpu_request_used": `max(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="requests.cpu"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_service_loadbalancer_used": `max(kube_resourcequota{resourcequota!="quota", type="used", namespace!="", namespace=~"$1", resource="services.loadbalancers"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_configmap_count_hard": `sum(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/configmaps"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_jobs_batch_count_hard": `sum(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/jobs.batch"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_roles_count_hard": `sum(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/roles.rbac.authorization.k8s.io"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_memory_limit_hard": `sum(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="limits.memory"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_pvc_hard": `sum(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="persistentvolumeclaims"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_memory_request_hard": `sum(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="requests.memory"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_pvc_count_hard": `sum(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/persistentvolumeclaims"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_cronjobs_batch_count_hard": `sum(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/cronjobs.batch"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_ingresses_extensions_count_hard": `sum(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/ingresses.extensions"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_cpu_limit_hard": `sum(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="limits.cpu"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_storage_request_hard": `sum(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="requests.storage"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_deployment_count_hard": `sum(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/deployments.apps"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_pod_count_hard": `sum(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/pods"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_statefulset_count_hard": `sum(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/statefulsets.apps"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_daemonset_count_hard": `sum(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/daemonsets.apps"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_secret_count_hard": `sum(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/secrets"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_service_count_hard": `sum(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/services"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_cpu_request_hard": `sum(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="requests.cpu"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, - "namespace_service_loadbalancer_hard": `sum(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="services.loadbalancers"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_configmap_count_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/configmaps"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_jobs_batch_count_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/jobs.batch"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_roles_count_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/roles.rbac.authorization.k8s.io"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_memory_limit_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="limits.memory"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_pvc_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="persistentvolumeclaims"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_memory_request_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="requests.memory"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_pvc_count_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/persistentvolumeclaims"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_cronjobs_batch_count_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/cronjobs.batch"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_ingresses_extensions_count_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/ingresses.extensions"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_cpu_limit_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="limits.cpu"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_storage_request_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="requests.storage"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_deployment_count_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/deployments.apps"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_pod_count_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/pods"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_statefulset_count_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/statefulsets.apps"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_daemonset_count_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/daemonsets.apps"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_secret_count_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/secrets"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_service_count_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="count/services"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_cpu_request_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="requests.cpu"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + "namespace_service_loadbalancer_hard": `min(kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", namespace=~"$1", resource="services.loadbalancers"}) by (namespace, resource, type) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, "namespace_cronjob_count": `sum(kube_cronjob_labels{namespace!="", namespace=~"$1"}) by (namespace) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, "namespace_pvc_count": `sum(kube_persistentvolumeclaim_info{namespace!="", namespace=~"$1"}) by (namespace) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, @@ -484,6 +509,10 @@ var RulePromQLTmplMap = MetricMap{ "namespace_ingresses_extensions_count": `sum(kube_ingress_labels{namespace!="", namespace=~"$1"}) by (namespace) * on (namespace) group_left(label_kubesphere_io_workspace)(kube_namespace_labels)`, + // New in ks 2.0 + "namespace_pod_abnormal_ratio": `namespace:pod_abnormal:ratio{namespace!="", namespace=~"$1"}`, + "namespace_resourcequota_used_ratio": `namespace:resourcequota_used:ratio{namespace!="", namespace=~"$1"}`, + // pod "pod_cpu_usage": `sum(irate(container_cpu_usage_seconds_total{job="kubelet", namespace="$1", pod_name!="", pod_name="$2", image!=""}[5m])) by (namespace, pod_name)`, "pod_memory_usage": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name!="", pod_name="$2", image!=""}) by (namespace, pod_name)`, @@ -504,11 +533,12 @@ var RulePromQLTmplMap = MetricMap{ "pod_net_bytes_received_node": `sum by (node, pod_name) (irate(container_network_receive_bytes_total{pod_name!="", pod_name=~"$2", ` + ExcludedVirtualNetworkInterfaces + `, job="kubelet"}[5m]) * on (pod_name) group_left(node) label_join(node_namespace_pod:kube_pod_info:{node="$3"}, "pod_name", "", "pod", "_name"))`, // workload - "workload_pod_cpu_usage": `label_join(sum(label_replace(label_replace(label_replace(label_join(label_join(label_replace(sum(irate(container_cpu_usage_seconds_total{job="kubelet", namespace="$2", pod_name!="", pod_name=~"$3", image!=""}[5m])) by (namespace, pod_name) * on (pod_name) group_left(owner_kind) label_join(label_replace(kube_pod_owner{namespace="$2", pod=~".*"}, "owner_kind", "POD", "owner_kind", ""), "pod_name", "", "pod", "_name") , "postfix", "-POD", "owner_kind", "POD"), "pod_name", "", "pod_name", "postfix"), "dist", "-", "owner_kind", "pod_name"), "pod_name", "$1", "dist", "ReplicaSet-(.+)-(.+)"), "workload", "$1", "pod_name", "(.+)-(.+)"), "owner_kind", "Deployment", "owner_kind", "ReplicaSet.*")) by (namespace, workload, owner_kind), "workload", ":", "owner_kind", "workload")`, - "workload_pod_memory_usage": `label_join(sum(label_replace(label_replace(label_replace(label_join(label_join(label_replace(sum(container_memory_usage_bytes{job="kubelet", namespace="$2", pod_name!="", pod_name=~"$3", image!=""}) by (namespace, pod_name) * on (pod_name) group_left(owner_kind) label_join(label_replace(kube_pod_owner{namespace="$2", pod=~".*"}, "owner_kind", "POD", "owner_kind", ""), "pod_name", "", "pod", "_name") , "postfix", "-POD", "owner_kind", "POD"), "pod_name", "", "pod_name", "postfix"), "dist", "-", "owner_kind", "pod_name"), "pod_name", "$1", "dist", "ReplicaSet-(.+)-(.+)"), "workload", "$1", "pod_name", "(.+)-(.+)"), "owner_kind", "Deployment", "owner_kind", "ReplicaSet.*")) by (namespace, workload, owner_kind), "workload", ":", "owner_kind", "workload")`, - "workload_pod_memory_usage_wo_cache": `label_join(sum(label_replace(label_replace(label_replace(label_join(label_join(label_replace(sum(container_memory_usage_bytes{job="kubelet", namespace="$2", pod_name!="", pod_name=~"$3", image!=""} - container_memory_cache{job="kubelet", namespace="$2", pod_name!="", pod_name=~"$3", image!=""}) by (namespace, pod_name) * on (pod_name) group_left(owner_kind) label_join(label_replace(kube_pod_owner{namespace="$2", pod=~".*"}, "owner_kind", "POD", "owner_kind", ""), "pod_name", "", "pod", "_name") , "postfix", "-POD", "owner_kind", "POD"), "pod_name", "", "pod_name", "postfix"), "dist", "-", "owner_kind", "pod_name"), "pod_name", "$1", "dist", "ReplicaSet-(.+)-(.+)"), "workload", "$1", "pod_name", "(.+)-(.+)"), "owner_kind", "Deployment", "owner_kind", "ReplicaSet.*")) by (namespace, workload, owner_kind), "workload", ":", "owner_kind", "workload")`, - "workload_pod_net_bytes_transmitted": `label_join(sum(label_replace(label_replace(label_replace(label_join(label_join(label_replace(sum(irate(container_network_transmit_bytes_total{namespace="$2", pod_name!="", pod_name=~"$3", ` + ExcludedVirtualNetworkInterfaces + `, job="kubelet"}[5m])) by (namespace, pod_name) * on (pod_name) group_left(owner_kind) label_join(label_replace(kube_pod_owner{namespace="$2", pod=~".*"}, "owner_kind", "POD", "owner_kind", ""), "pod_name", "", "pod", "_name") , "postfix", "-POD", "owner_kind", "POD"), "pod_name", "", "pod_name", "postfix"), "dist", "-", "owner_kind", "pod_name"), "pod_name", "$1", "dist", "ReplicaSet-(.+)-(.+)"), "workload", "$1", "pod_name", "(.+)-(.+)"), "owner_kind", "Deployment", "owner_kind", "ReplicaSet.*")) by (namespace, workload, owner_kind), "workload", ":", "owner_kind", "workload")`, - "workload_pod_net_bytes_received": `label_join(sum(label_replace(label_replace(label_replace(label_join(label_join(label_replace(sum(irate(container_network_receive_bytes_total{namespace="$2", pod_name!="", pod_name=~"$3", ` + ExcludedVirtualNetworkInterfaces + `, job="kubelet"}[5m])) by (namespace, pod_name) * on (pod_name) group_left(owner_kind) label_join(label_replace(kube_pod_owner{namespace="$2", pod=~".*"}, "owner_kind", "POD", "owner_kind", ""), "pod_name", "", "pod", "_name") , "postfix", "-POD", "owner_kind", "POD"), "pod_name", "", "pod_name", "postfix"), "dist", "-", "owner_kind", "pod_name"), "pod_name", "$1", "dist", "ReplicaSet-(.+)-(.+)"), "workload", "$1", "pod_name", "(.+)-(.+)"), "owner_kind", "Deployment", "owner_kind", "ReplicaSet.*")) by (namespace, workload, owner_kind), "workload", ":", "owner_kind", "workload")`, + // Join the "container_cpu_usage_seconds_total" metric with "kube_pod_owner" to calculate workload-level resource usage + "workload_pod_cpu_usage": `namespace:workload_cpu_usage:sum{namespace="$2", workload=~"$3"}`, + "workload_pod_memory_usage": `namespace:workload_memory_usage:sum{namespace="$2", workload=~"$3"}`, + "workload_pod_memory_usage_wo_cache": `namespace:workload_memory_usage_wo_cache:sum{namespace="$2", workload=~"$3"}`, + "workload_pod_net_bytes_transmitted": `namespace:workload_net_bytes_transmitted:sum_irate{namespace="$2", workload=~"$3"}`, + "workload_pod_net_bytes_received": `namespace:workload_net_bytes_received:sum_irate{namespace="$2", workload=~"$3"}`, "workload_deployment_replica": `label_join(sum (label_join(label_replace(kube_deployment_spec_replicas{namespace="$2", deployment=~"$3"}, "owner_kind", "Deployment", "", ""), "workload", "", "deployment")) by (namespace, owner_kind, workload), "workload", ":", "owner_kind", "workload")`, "workload_deployment_replica_available": `label_join(sum (label_join(label_replace(kube_deployment_status_replicas_available{namespace="$2", deployment=~"$3"}, "owner_kind", "Deployment", "", ""), "workload", "", "deployment")) by (namespace, owner_kind, workload), "workload", ":", "owner_kind", "workload")`, @@ -517,6 +547,11 @@ var RulePromQLTmplMap = MetricMap{ "workload_daemonset_replica": `label_join(sum (label_join(label_replace(kube_daemonset_status_desired_number_scheduled{namespace="$2", daemonset=~"$3"}, "owner_kind", "DaemonSet", "", ""), "workload", "", "daemonset")) by (namespace, owner_kind, workload), "workload", ":", "owner_kind", "workload")`, "workload_daemonset_replica_available": `label_join(sum (label_join(label_replace(kube_daemonset_status_number_available{namespace="$2", daemonset=~"$3"}, "owner_kind", "DaemonSet", "", ""), "workload", "", "daemonset")) by (namespace, owner_kind, workload), "workload", ":", "owner_kind", "workload")`, + // New in ks 2.0 + "workload_deployment_unavailable_replicas_ratio": `namespace:deployment_unavailable_replicas:ratio{namespace="$2", deployment=~"$3"}`, + "workload_daemonset_unavailable_replicas_ratio": `namespace:daemonset_unavailable_replicas:ratio{namespace="$2", daemonset=~"$3"}`, + "workload_statefulset_unavailable_replicas_ratio": `namespace:statefulset_unavailable_replicas:ratio{namespace="$2", statefulset=~"$3"}`, + // container "container_cpu_usage": `sum(irate(container_cpu_usage_seconds_total{namespace="$1", pod_name="$2", container_name!="POD", container_name=~"$3"}[5m])) by (namespace, pod_name, container_name)`, "container_memory_usage": `sum(container_memory_usage_bytes{namespace="$1", pod_name="$2", container_name!="POD", container_name=~"$3"}) by (namespace, pod_name, container_name)`, @@ -576,4 +611,7 @@ var RulePromQLTmplMap = MetricMap{ "workspace_secret_count": `sum(kube_secret_info{namespace!="", namespace$1})`, "workspace_all_project_count": `count(kube_namespace_annotations)`, + + // New in ks 2.0 + "workspace_pod_abnormal_ratio": `sum(kube_pod_status_phase{phase=~"Failed|Pending|Unknown", namespace!="", namespace$1}) / sum(kube_pod_status_phase{phase!~"Succeeded", namespace!="", namespace$1})`, } diff --git a/pkg/models/metrics/util.go b/pkg/models/metrics/util.go index cdf440cd7..c4ca053e5 100644 --- a/pkg/models/metrics/util.go +++ b/pkg/models/metrics/util.go @@ -50,7 +50,7 @@ func (wrapper FormatedMetricDataWrapper) Swap(i, j int) { } // sorted metric by ascending or descending order -func Sort(sortMetricName string, sortType string, fmtLevelMetric *FormatedLevelMetric) (*FormatedLevelMetric, int) { +func Sort(sortMetricName string, sortType string, rawMetrics *FormatedLevelMetric) (*FormatedLevelMetric, int) { defer func() { if err := recover(); err != nil { glog.Errorln(err) @@ -59,7 +59,7 @@ func Sort(sortMetricName string, sortType string, fmtLevelMetric *FormatedLevelM }() if sortMetricName == "" { - return fmtLevelMetric, -1 + return rawMetrics, -1 } // default sort type is descending order @@ -69,14 +69,18 @@ func Sort(sortMetricName string, sortType string, fmtLevelMetric *FormatedLevelM var currentResourceMap = make(map[string]int) - // indexMap store sorted index for each node/namespace/pod + // {: } var indexMap = make(map[string]int) i := 0 - for _, metricItem := range fmtLevelMetric.Results { + + // each metricItem is the result for a specific metric name + // so we find the metricItem with sortMetricName, and sort it + for _, metricItem := range rawMetrics.Results { + // only vector type result can be sorted if metricItem.Data.ResultType == ResultTypeVector && metricItem.Status == MetricStatusSuccess { if metricItem.MetricName == sortMetricName { if sortType == ResultSortTypeAsc { - // desc + // asc sort.Sort(FormatedMetricDataWrapper{metricItem.Data, func(p, q *map[string]interface{}) bool { value1 := (*p)[ResultItemValue].([]interface{}) value2 := (*q)[ResultItemValue].([]interface{}) @@ -109,16 +113,14 @@ func Sort(sortMetricName string, sortType string, fmtLevelMetric *FormatedLevelM } for _, r := range metricItem.Data.Result { - // for some reasons, 'metric' may not contain `resourceType` field - // example: {"metric":{},"value":[1541142931.731,"3"]} - k, exist := r[ResultItemMetric].(map[string]interface{})["resource_name"] - key := k.(string) + // record the ordering of resource_name to indexMap + // example: {"metric":{"resource_name": "Deployment:xxx"},"value":[1541142931.731,"3"]} + resourceName, exist := r[ResultItemMetric].(map[string]interface{})["resource_name"] if exist { - if _, exist := indexMap[key]; !exist { - indexMap[key] = i + if _, exist := indexMap[resourceName.(string)]; !exist { + indexMap[resourceName.(string)] = i i = i + 1 } - } } } @@ -148,8 +150,8 @@ func Sort(sortMetricName string, sortType string, fmtLevelMetric *FormatedLevelM } // sort other metric - for i := 0; i < len(fmtLevelMetric.Results); i++ { - re := fmtLevelMetric.Results[i] + for i := 0; i < len(rawMetrics.Results); i++ { + re := rawMetrics.Results[i] if re.Data.ResultType == ResultTypeVector && re.Status == MetricStatusSuccess { sortedMetric := make([]map[string]interface{}, len(indexMap)) for j := 0; j < len(re.Data.Result); j++ { @@ -163,11 +165,11 @@ func Sort(sortMetricName string, sortType string, fmtLevelMetric *FormatedLevelM } } - fmtLevelMetric.Results[i].Data.Result = sortedMetric + rawMetrics.Results[i].Data.Result = sortedMetric } } - return fmtLevelMetric, len(indexMap) + return rawMetrics, len(indexMap) } func Page(pageNum string, limitNum string, fmtLevelMetric *FormatedLevelMetric, maxLength int) interface{} { @@ -251,6 +253,7 @@ func Page(pageNum string, limitNum string, fmtLevelMetric *FormatedLevelMetric, } // maybe this function is time consuming +// The metric param is the result from Prometheus HTTP query func ReformatJson(metric string, metricsName string, needAddParams map[string]string, needDelParams ...string) *FormatedMetric { var formatMetric FormatedMetric @@ -268,6 +271,8 @@ func ReformatJson(metric string, metricsName string, needAddParams map[string]st result := formatMetric.Data.Result for _, res := range result { metric, exist := res[ResultItemMetric] + // Prometheus query result format: .data.result[].metric + // metricMap is the value of .data.result[].metric metricMap, sure := metric.(map[string]interface{}) if exist && sure { delete(metricMap, "__name__") diff --git a/pkg/simple/client/prometheus/prometheusclient.go b/pkg/simple/client/prometheus/prometheusclient.go index de931b2f3..9324ff6d2 100644 --- a/pkg/simple/client/prometheus/prometheusclient.go +++ b/pkg/simple/client/prometheus/prometheusclient.go @@ -31,21 +31,21 @@ import ( ) const ( - DefaultScheme = "http" - DefaultPrometheusPort = "9090" - PrometheusApiPath = "/api/v1/" - DefaultQueryStep = "10m" - DefaultQueryTimeout = "10s" - RangeQueryType = "query_range?" - DefaultQueryType = "query?" - PrometheusAPIServerEnv = "PROMETHEUS_API_SERVER" + DefaultQueryStep = "10m" + DefaultQueryTimeout = "10s" + RangeQueryType = "query_range?" + DefaultQueryType = "query?" ) -var PrometheusAPIServer = "prometheus-k8s.kubesphere-monitoring-system.svc" -var PrometheusAPIEndpoint string +// Kubesphere sets up two Prometheus servers to balance monitoring workloads +var ( + PrometheusEndpoint string // For monitoring node, namespace, pod ... level resources + SecondaryPrometheusEndpoint string // For monitoring components including etcd, apiserver, coredns, etc. +) func init() { - flag.StringVar(&PrometheusAPIEndpoint, "prometheus-endpoint", "http://prometheus-k8s.kubesphere-monitoring-system.svc:9090/api/v1", "") + flag.StringVar(&PrometheusEndpoint, "prometheus-endpoint", "http://prometheus-k8s.kubesphere-monitoring-system.svc:9090/api/v1/", "") + flag.StringVar(&SecondaryPrometheusEndpoint, "secondary-prometheus-endpoint", "http://prometheus-k8s-system.kubesphere-monitoring-system.svc:9090/api/v1/", "") } type MonitoringRequestParams struct { @@ -71,7 +71,7 @@ type MonitoringRequestParams struct { var client = &http.Client{} func SendMonitoringRequest(queryType string, params string) string { - epurl := PrometheusAPIEndpoint + queryType + params + epurl := PrometheusEndpoint + queryType + params response, err := client.Get(epurl) if err != nil { glog.Error(err) @@ -157,7 +157,6 @@ func ParseMonitoringRequestParams(request *restful.Request) *MonitoringRequestPa requestParams.Params = u return &requestParams } else { - //u.Set("time", strconv.FormatInt(int64(time.Now().Unix()), 10)) u.Set("timeout", timeout) requestParams.QueryType = DefaultQueryType requestParams.Params = u