add component monitoring

Signed-off-by: huanggze <loganhuang@yunify.com>
This commit is contained in:
huanggze
2019-04-10 11:21:29 +08:00
committed by zryfish
parent 17e09190be
commit 3a3e49547b
6 changed files with 174 additions and 19 deletions

View File

@@ -135,7 +135,7 @@ func getAllWorkspaces() map[string]int {
paramValues := make(url.Values)
paramValues.Set("query", WorkspaceNamespaceLabelRule)
params := paramValues.Encode()
res := client.SendMonitoringRequest(client.DefaultQueryType, params)
res := client.SendMonitoringRequest(client.PrometheusEndpoint, client.DefaultQueryType, params)
metric := ReformatJson(res, "", map[string]string{"workspace": "workspace"})
@@ -240,7 +240,7 @@ func AssembleSpecificWorkloadMetricRequestInfo(monitoringRequest *client.Monitor
paramValues := monitoringRequest.Params
params := makeRequestParamString(rule, paramValues)
res := client.SendMonitoringRequest(client.DefaultQueryType, params)
res := client.SendMonitoringRequest(client.PrometheusEndpoint, client.DefaultQueryType, params)
podNamesFilter := getPodNameRegexInWorkload(res, podsFilter)
@@ -306,7 +306,7 @@ func AddNodeAddressMetric(nodeMetric *FormatedMetric, nodeAddress *map[string][]
func MonitorContainer(monitoringRequest *client.MonitoringRequestParams, metricName string) *FormatedMetric {
queryType, params := AssembleContainerMetricRequestInfo(monitoringRequest, metricName)
metricsStr := client.SendMonitoringRequest(queryType, params)
metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params)
res := ReformatJson(metricsStr, metricName, map[string]string{"container_name": ""})
return res
}
@@ -480,7 +480,7 @@ func collectWorkspaceMetric(monitoringRequest *client.MonitoringRequestParams, w
go func(metricName string) {
queryType, params := AssembleSpecificWorkspaceMetricRequestInfo(monitoringRequest, namespaceArray, metricName)
metricsStr := client.SendMonitoringRequest(queryType, params)
metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params)
ch <- ReformatJson(metricsStr, metricName, map[string]string{"resource_name": ws}) // It's adding "resource_name" field
wg.Done()
@@ -526,7 +526,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour
wg.Add(1)
go func(metricName string) {
queryType, params := AssembleClusterMetricRequestInfo(monitoringRequest, metricName)
metricsStr := client.SendMonitoringRequest(queryType, params)
metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params)
ch <- ReformatJson(metricsStr, metricName, map[string]string{"cluster": "local"})
wg.Done()
}(metricName)
@@ -541,7 +541,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour
wg.Add(1)
go func(metricName string) {
queryType, params := AssembleNodeMetricRequestInfo(monitoringRequest, metricName)
metricsStr := client.SendMonitoringRequest(queryType, params)
metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params)
ch <- ReformatJson(metricsStr, metricName, map[string]string{"node": ""})
wg.Done()
}(metricName)
@@ -580,7 +580,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour
go func(metricName string, namespace string) {
queryType, params := AssembleNamespaceMetricRequestInfoByNamesapce(monitoringRequest, namespace, metricName)
metricsStr := client.SendMonitoringRequest(queryType, params)
metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params)
chForOneMetric <- ReformatJson(metricsStr, metricName, map[string]string{"resource_name": namespace})
wgForOneMetric.Done()
}(metricName, ns)
@@ -624,7 +624,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour
wg.Add(1)
go func(metricName string, workspace string) {
queryType, params := AssembleSpecificWorkspaceMetricRequestInfo(monitoringRequest, namespaceArray, metricName)
metricsStr := client.SendMonitoringRequest(queryType, params)
metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params)
ch <- ReformatJson(metricsStr, metricName, map[string]string{"resource_name": workspace})
wg.Done()
}(metricName, workspace)
@@ -642,7 +642,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour
go func(metricName string) {
queryType, params := AssembleAllWorkspaceMetricRequestInfo(monitoringRequest, nil, metricName)
metricsStr := client.SendMonitoringRequest(queryType, params)
metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params)
ch <- ReformatJson(metricsStr, metricName, map[string]string{"workspace": "workspaces"})
wg.Done()
@@ -660,7 +660,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour
go func(metricName string) {
queryType, params := AssembleNamespaceMetricRequestInfo(monitoringRequest, metricName)
metricsStr := client.SendMonitoringRequest(queryType, params)
metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params)
rawResult := ReformatJson(metricsStr, metricName, map[string]string{"namespace": ""})
ch <- rawResult
@@ -679,7 +679,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour
wg.Add(1)
go func(metricName string) {
queryType, params := AssembleAllWorkloadMetricRequestInfo(monitoringRequest, metricName)
metricsStr := client.SendMonitoringRequest(queryType, params)
metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params)
reformattedResult := ReformatJson(metricsStr, metricName, map[string]string{"workload": ""})
// no need to append a null result
ch <- reformattedResult
@@ -696,7 +696,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour
metricName = strings.TrimLeft(metricName, "workload_")
queryType, params, nullRule := AssembleSpecificWorkloadMetricRequestInfo(monitoringRequest, metricName)
if !nullRule {
metricsStr := client.SendMonitoringRequest(queryType, params)
metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params)
fmtMetrics := ReformatJson(metricsStr, metricName, map[string]string{"pod_name": ""})
unifyMetricHistoryTimeRange(fmtMetrics)
ch <- fmtMetrics
@@ -716,7 +716,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour
go func(metricName string) {
queryType, params, nullRule := AssemblePodMetricRequestInfo(monitoringRequest, metricName)
if !nullRule {
metricsStr := client.SendMonitoringRequest(queryType, params)
metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params)
ch <- ReformatJson(metricsStr, metricName, map[string]string{"pod_name": ""})
} else {
ch <- nil
@@ -734,13 +734,28 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour
wg.Add(1)
go func(metricName string) {
queryType, params := AssembleContainerMetricRequestInfo(monitoringRequest, metricName)
metricsStr := client.SendMonitoringRequest(queryType, params)
metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params)
ch <- ReformatJson(metricsStr, metricName, map[string]string{"container_name": ""})
wg.Done()
}(metricName)
}
}
}
case MetricLevelComponent:
{
for _, metricName := range ComponentMetricsNames {
matched, err := regexp.MatchString(metricsFilter, metricName)
if err == nil && matched {
wg.Add(1)
go func(metricName string) {
queryType, params := AssembleComponentRequestInfo(monitoringRequest, metricName)
metricsStr := client.SendMonitoringRequest(client.SecondaryPrometheusEndpoint, queryType, params)
ch <- ReformatJson(metricsStr, metricName, map[string]string{"resource_name": monitoringRequest.ComponentName})
wg.Done()
}(metricName)
}
}
}
}
wg.Wait()
@@ -928,3 +943,12 @@ func AssembleNodeMetricRequestInfo(monitoringRequest *client.MonitoringRequestPa
return queryType, params
}
func AssembleComponentRequestInfo(monitoringRequest *client.MonitoringRequestParams, metricName string) (string, string) {
queryType := monitoringRequest.QueryType
paramValues := monitoringRequest.Params
rule := MakeComponentRule(metricName)
params := makeRequestParamString(rule, paramValues)
return queryType, params
}

View File

@@ -238,3 +238,8 @@ func MakeNodeRule(nodeID string, nodesFilter string, metricsName string) string
return rule
}
func MakeComponentRule(metricsName string) string {
var rule = RulePromQLTmplMap[metricsName]
return rule
}

View File

@@ -65,6 +65,7 @@ const (
MetricLevelContainer = "container"
MetricLevelContainerName = "container_name"
MetricLevelWorkload = "workload"
MetricLevelComponent = "component"
)
const (
@@ -319,6 +320,58 @@ var ContainerMetricsNames = []string{
//"container_net_bytes_received",
}
var ComponentMetricsNames = []string{
"etcd_server_deployed_sum",
"etcd_server_up_sum",
"etcd_server_has_leader",
"etcd_server_leader_changes",
"etcd_server_proposals_failed_rate",
"etcd_server_proposals_applied_rate",
"etcd_server_proposals_committed_rate",
"etcd_server_proposals_pending_count",
"etcd_mvcc_db_size",
"etcd_network_client_grpc_received_bytes",
"etcd_network_client_grpc_sent_bytes",
"etcd_grpc_call_rate",
"etcd_grpc_call_failed_rate",
"etcd_grpc_server_msg_received_rate",
"etcd_grpc_server_msg_sent_rate",
"etcd_disk_wal_fsync_duration",
"etcd_disk_wal_fsync_duration_quantile",
"etcd_disk_backend_commit_duration",
"etcd_disk_backend_commit_duration_quantile",
"apiserver_up_sum",
"apiserver_request_rate",
"apiserver_request_by_verb_rate",
"apiserver_request_latencies",
"apiserver_request_by_verb_latencies",
"scheduler_up_sum",
"scheduler_schedule_attempts",
"scheduler_schedule_attempt_rate",
"scheduler_e2e_scheduling_latency",
"scheduler_e2e_scheduling_latency_quantile",
"controller_manager_up_sum",
"coredns_up_sum",
"coredns_cache_hits",
"coredns_cache_misses",
"coredns_dns_request_rate",
"coredns_dns_request_duration",
"coredns_dns_request_duration_quantile",
"coredns_dns_request_by_type_rate",
"coredns_dns_request_by_rcode_rate",
"coredns_panic_rate",
"coredns_proxy_request_rate",
"coredns_proxy_request_duration",
"coredns_proxy_request_duration_quantile",
"prometheus_up_sum",
"prometheus_tsdb_head_samples_appended_rate",
}
var RulePromQLTmplMap = MetricMap{
//cluster
"cluster_cpu_utilisation": ":node_cpu_utilisation:avg1m",
@@ -614,4 +667,55 @@ var RulePromQLTmplMap = MetricMap{
// New in ks 2.0
"workspace_pod_abnormal_ratio": `sum(kube_pod_status_phase{phase=~"Failed|Pending|Unknown", namespace!="", namespace$1}) / sum(kube_pod_status_phase{phase!~"Succeeded", namespace!="", namespace$1})`,
// component
"etcd_server_deployed_sum": `count(up{job="etcd"})`,
"etcd_server_up_sum": `etcd:up:sum`,
"etcd_server_has_leader": `etcd_server_has_leader`,
"etcd_server_leader_changes": `etcd:etcd_server_leader_changes_seen:sum_changes`,
"etcd_server_proposals_failed_rate": `etcd:etcd_server_proposals_failed:sum_irate`,
"etcd_server_proposals_applied_rate": `etcd:etcd_server_proposals_applied:sum_irate`,
"etcd_server_proposals_committed_rate": `etcd:etcd_server_proposals_committed:sum_irate`,
"etcd_server_proposals_pending_count": `etcd:etcd_server_proposals_pending:sum`,
"etcd_mvcc_db_size": `etcd:etcd_debugging_mvcc_db_total_size:sum`,
"etcd_network_client_grpc_received_bytes": `etcd:etcd_network_client_grpc_received_bytes:sum_irate`,
"etcd_network_client_grpc_sent_bytes": `etcd:etcd_network_client_grpc_sent_bytes:sum_irate`,
"etcd_grpc_call_rate": `etcd:grpc_server_started:sum_irate`,
"etcd_grpc_call_failed_rate": `etcd:grpc_server_handled:sum_irate`,
"etcd_grpc_server_msg_received_rate": `etcd:grpc_server_msg_received:sum_irate`,
"etcd_grpc_server_msg_sent_rate": `etcd:grpc_server_msg_sent:sum_irate`,
"etcd_disk_wal_fsync_duration": `etcd:etcd_disk_wal_fsync_duration:avg`,
"etcd_disk_wal_fsync_duration_quantile": `etcd:etcd_disk_wal_fsync_duration:histogram_quantile`,
"etcd_disk_backend_commit_duration": `etcd:etcd_disk_backend_commit_duration:avg`,
"etcd_disk_backend_commit_duration_quantile": `etcd:etcd_disk_backend_commit_duration:histogram_quantile`,
"apiserver_up_sum": `apiserver:up:sum`,
"apiserver_request_rate": `apiserver:apiserver_request_count:sum_irate`,
"apiserver_request_by_verb_rate": `apiserver:apiserver_request_count:sum_verb_irate`,
"apiserver_request_latencies": `apiserver:apiserver_request_latencies:avg`,
"apiserver_request_by_verb_latencies": `apiserver:apiserver_request_latencies:avg_by_verb`,
"scheduler_up_sum": `scheduler:up:sum`,
"scheduler_schedule_attempts": `scheduler:scheduler_schedule_attempts:sum`,
"scheduler_schedule_attempt_rate": `scheduler:scheduler_schedule_attempts:sum_rate`,
"scheduler_e2e_scheduling_latency": `scheduler:scheduler_e2e_scheduling_latency:avg`,
"scheduler_e2e_scheduling_latency_quantile": `scheduler:scheduler_e2e_scheduling_latency:histogram_quantile`,
"controller_manager_up_sum": `controller_manager:up:sum`,
"coredns_up_sum": `coredns:up:sum`,
"coredns_cache_hits": `coredns:coredns_cache_hits_total:sum_irate`,
"coredns_cache_misses": `coredns:coredns_cache_misses:sum_irate`,
"coredns_dns_request_rate": `coredns:coredns_dns_request_count:sum_irate`,
"coredns_dns_request_duration": `coredns:coredns_dns_request_duration:avg`,
"coredns_dns_request_duration_quantile": `coredns:coredns_dns_request_duration:histogram_quantile`,
"coredns_dns_request_by_type_rate": `coredns:coredns_dns_request_type_count:sum_irate`,
"coredns_dns_request_by_rcode_rate": `coredns:coredns_dns_response_rcode_count:sum_irate`,
"coredns_panic_rate": `coredns:coredns_panic_count:sum_irate`,
"coredns_proxy_request_rate": `coredns:coredns_proxy_request_count:sum_irate`,
"coredns_proxy_request_duration": `coredns:coredns_proxy_request_duration:avg`,
"coredns_proxy_request_duration_quantile": `coredns:coredns_proxy_request_duration:histogram_quantile`,
"prometheus_up_sum": `prometheus:up:sum`,
"prometheus_tsdb_head_samples_appended_rate": `prometheus:prometheus_tsdb_head_samples_appended:sum_rate`,
}