From df31cab3433c14b93fa21d7b8c550cbd9068567e Mon Sep 17 00:00:00 2001 From: Carman Zhang Date: Fri, 16 Nov 2018 17:41:17 +0800 Subject: [PATCH] add load average metrics, change prometheus apiserver svc --- pkg/client/prometheusclient.go | 2 +- pkg/models/metrics/metricsrule.go | 2 +- pkg/models/metrics/metricsruleconst.go | 24 ++++++++++++++++++++++-- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/pkg/client/prometheusclient.go b/pkg/client/prometheusclient.go index 5320ad44b..c879db3c6 100644 --- a/pkg/client/prometheusclient.go +++ b/pkg/client/prometheusclient.go @@ -37,7 +37,7 @@ const ( PrometheusAPIServerEnv = "PROMETHEUS_API_SERVER" ) -var PrometheusAPIServer = "prometheus-k8s.monitoring.svc" +var PrometheusAPIServer = "prometheus-k8s.kubesphere-monitoring-system.svc" var PrometheusEndpointUrl string func init() { diff --git a/pkg/models/metrics/metricsrule.go b/pkg/models/metrics/metricsrule.go index f0ff0811e..12a8dd063 100755 --- a/pkg/models/metrics/metricsrule.go +++ b/pkg/models/metrics/metricsrule.go @@ -152,7 +152,7 @@ func MakeNodeRule(nodeID string, nodesFilter string, metricsName string) string if nodesFilter == "" { nodesFilter = ".*" } - if strings.Contains(metricsName, "disk_size") || strings.Contains(metricsName, "pod") || strings.Contains(metricsName, "usage") || strings.Contains(metricsName, "inode") { + if strings.Contains(metricsName, "disk_size") || strings.Contains(metricsName, "pod") || strings.Contains(metricsName, "usage") || strings.Contains(metricsName, "inode") || strings.Contains(metricsName, "load") { // disk size promql if nodeID != "" { nodesFilter = "{" + "node" + "=" + "\"" + nodeID + "\"" + "}" diff --git a/pkg/models/metrics/metricsruleconst.go b/pkg/models/metrics/metricsruleconst.go index 1a49413e2..6ba2883ba 100644 --- a/pkg/models/metrics/metricsruleconst.go +++ b/pkg/models/metrics/metricsruleconst.go @@ -132,6 +132,10 @@ var ClusterMetricsNames = []string{ "cluster_namespace_count", "workspace_all_project_count", + + "cluster_load1", + "cluster_load5", + "cluster_load15", } var NodeMetricsNames = []string{ "node_cpu_utilisation", @@ -163,6 +167,10 @@ var NodeMetricsNames = []string{ "node_pod_running_count", "node_pod_succeeded_count", "node_pod_abnormal_count", + + "node_load1", + "node_load5", + "node_load15", } var WorkspaceMetricsNames = []string{ "workspace_cpu_usage", @@ -250,6 +258,8 @@ var NamespaceMetricsNames = []string{ "namespace_replicaset_count", "namespace_service_count", "namespace_secret_count", + + "namespace_ingresses_extensions_count", } var PodMetricsNames = []string{ @@ -308,7 +318,7 @@ var RulePromQLTmplMap = MetricMap{ "cluster_node_offline": `sum(kube_node_status_condition{condition="Ready",status=~"unknown|false"})`, "cluster_node_total": `sum(kube_node_status_condition{condition="Ready"})`, - "cluster_ingresses_extensions_count": `sum(kube_resourcequota{resourcequota!="quota", type="used", resource="count/ingresses.extensions"}) by (resource, type)`, + "cluster_ingresses_extensions_count": `sum(kube_ingress_labels)`, "cluster_configmap_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", resource="count/configmaps"}) by (resource, type)`, "cluster_jobs_batch_count_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", resource="count/jobs.batch"}) by (resource, type)`, @@ -343,6 +353,10 @@ var RulePromQLTmplMap = MetricMap{ "cluster_secret_count": `sum(kube_secret_info)`, "cluster_pv_count": `sum(kube_persistentvolume_labels)`, + "cluster_load1": `sum(node_load1{job="node-exporter"})`, + "cluster_load5": `sum(node_load5{job="node-exporter"})`, + "cluster_load15": `sum(node_load15{job="node-exporter"})`, + //node "node_cpu_utilisation": "node:node_cpu_utilisation:avg1m", "node_cpu_total": "node:node_num_cpu:sum", @@ -385,6 +399,10 @@ var RulePromQLTmplMap = MetricMap{ "node_cpu_usage": `node:node_cpu_utilisation:avg1m$1 * node:node_num_cpu:sum$1`, "node_memory_bytes_usage": "node:node_memory_bytes_total:sum$1 - node:node_memory_bytes_available:sum$1", + "node_load1": `sum by (node) (node_load1{job="node-exporter"} * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1)`, + "node_load5": `sum by (node) (node_load5{job="node-exporter"} * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1)`, + "node_load15": `sum by (node) (node_load15{job="node-exporter"} * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:$1)`, + //namespace "namespace_cpu_usage": `namespace:container_cpu_usage_seconds_total:sum_rate{namespace=~"$1"}`, "namespace_memory_usage": `namespace:container_memory_usage_bytes:sum{namespace=~"$1"}`, @@ -448,6 +466,8 @@ var RulePromQLTmplMap = MetricMap{ "namespace_service_count": `sum(kube_service_info{namespace=~"$1"}) by (namespace)`, "namespace_secret_count": `sum(kube_secret_info{namespace=~"$1"}) by (namespace)`, + "namespace_ingresses_extensions_count": `sum(kube_ingress_labels{namespace=~"$1"})`, + // pod "pod_cpu_usage": `sum(irate(container_cpu_usage_seconds_total{job="kubelet", namespace="$1", pod_name="$2", image!=""}[5m])) by (namespace, pod_name)`, "pod_memory_usage": `sum(container_memory_usage_bytes{job="kubelet", namespace="$1", pod_name="$2", image!=""}) by (namespace, pod_name)`, @@ -506,7 +526,7 @@ var RulePromQLTmplMap = MetricMap{ "workspace_cpu_request_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace=~"$1", resource="requests.cpu"}) by (resource, type)`, "workspace_service_loadbalancer_used": `sum(kube_resourcequota{resourcequota!="quota", type="used", namespace=~"$1", resource="services.loadbalancers"}) by (resource, type)`, - "workspace_ingresses_extensions_count": `sum(kube_resourcequota{type="used", namespace=~"$1", resource="count/ingresses.extensions"}) by (resource, type)`, + "workspace_ingresses_extensions_count": `sum(kube_ingress_labels{namespace=~"$1"})`, "workspace_cronjob_count": `sum(kube_cronjob_labels{namespace=~"$1"})`, "workspace_pvc_count": `sum(kube_persistentvolumeclaim_info{namespace=~"$1"})`,