From d640c5324a9c8e8b059514a40f7a5f9c5e9c5cf3 Mon Sep 17 00:00:00 2001 From: junot <49136171+junotx@users.noreply.github.com> Date: Wed, 7 Sep 2022 17:04:41 +0800 Subject: [PATCH] alerting v2beta1 tunes (#5200) Signed-off-by: junot Signed-off-by: junot --- pkg/api/alerting/v2beta1/types.go | 9 ++++ pkg/controller/alerting/util.go | 31 +++++++++---- pkg/models/alerting/rulegroup.go | 75 ++++++++++++++++++++++++++++--- 3 files changed, 100 insertions(+), 15 deletions(-) diff --git a/pkg/api/alerting/v2beta1/types.go b/pkg/api/alerting/v2beta1/types.go index 10adb383a..6bce6ab01 100644 --- a/pkg/api/alerting/v2beta1/types.go +++ b/pkg/api/alerting/v2beta1/types.go @@ -68,6 +68,14 @@ type RuleGroupStatus struct { EvaluationTime *float64 `json:"evaluationTime,omitempty" description:"time spent on rule group evaluation in seconds"` LastEvaluation *time.Time `json:"lastEvaluation,omitempty" description:"time of last evaluation"` RulesStatus []RuleStatus `json:"rulesStatus,omitempty" description:"status of rules in one RuleGroup"` + RulesStats RulesStats `json:"rulesStats,omitempty" description:"statistics of rules in one RuleGroup"` +} + +type RulesStats struct { + Inactive int `json:"inactive" description:"count of rules in the inactive state"` + Pending int `json:"pending" description:"count of rules in the pending state"` + Firing int `json:"firing" description:"count of rules in the firing state"` + Disabled int `json:"disabled" description:"count of disabled rules"` } type RuleStatus struct { @@ -77,6 +85,7 @@ type RuleStatus struct { LastError string `json:"lastError,omitempty" description:"error of the last evaluation"` EvaluationTime *float64 `json:"evaluationTime,omitempty" description:"time spent on the expression evaluation in seconds"` LastEvaluation *time.Time `json:"lastEvaluation,omitempty" description:"time of last evaluation"` + ActiveAt *time.Time `json:"activeAt,omitempty" description:"time when this rule became active"` Alerts []*Alert `json:"alerts,omitempty" description:"alerts"` } diff --git a/pkg/controller/alerting/util.go b/pkg/controller/alerting/util.go index 63b6eaea3..55fb6c686 100644 --- a/pkg/controller/alerting/util.go +++ b/pkg/controller/alerting/util.go @@ -42,11 +42,14 @@ const ( RuleLevelCluster RuleLevel = "cluster" RuleLevelGlobal RuleLevel = "global" - // label keys in rule.labels - RuleLabelKeyRuleLevel = "rule_level" - RuleLabelKeyCluster = "cluster" - RuleLabelKeyNamespace = "namespace" - RuleLabelKeySeverity = "severity" + // for rule.labels + RuleLabelKeyRuleLevel = "rule_level" + RuleLabelKeyRuleGroup = "rule_group" + RuleLabelKeyCluster = "cluster" + RuleLabelKeyNamespace = "namespace" + RuleLabelKeySeverity = "severity" + RuleLabelKeyAlertType = "alerttype" + RuleLabelValueAlertTypeMetric = "metric" // label keys in RuleGroup/ClusterRuleGroup/GlobalRuleGroup.metadata.labels SourceGroupResourceLabelKeyEnable = "alerting.kubesphere.io/enable" @@ -132,7 +135,7 @@ func makePrometheusRuleGroups(log logr.Logger, groupList client.ObjectList, commonEnforceFuncs ...enforceRuleFunc) ([]*promresourcesv1.RuleGroup, error) { var rulegroups []*promresourcesv1.RuleGroup - convertRule := func(rule *alertingv2beta1.Rule, enforceFuncs ...enforceRuleFunc) (*promresourcesv1.Rule, error) { + convertRule := func(rule *alertingv2beta1.Rule, groupName string, enforceFuncs ...enforceRuleFunc) (*promresourcesv1.Rule, error) { if rule.Disable { // ignoring disabled rule return nil, nil } @@ -156,6 +159,15 @@ func makePrometheusRuleGroups(log logr.Logger, groupList client.ObjectList, } enforceFuncs = append(enforceFuncs, commonEnforceFuncs...) + // enforce rule group label and alert type label + enforceFuncs = append(enforceFuncs, func(rule *promresourcesv1.Rule) error { + if rule.Labels == nil { + rule.Labels = make(map[string]string) + } + rule.Labels[RuleLabelKeyRuleGroup] = groupName + rule.Labels[RuleLabelKeyAlertType] = RuleLabelValueAlertTypeMetric + return nil + }) for _, f := range enforceFuncs { if f == nil { @@ -175,7 +187,7 @@ func makePrometheusRuleGroups(log logr.Logger, groupList client.ObjectList, for _, group := range list.Items { var prules []promresourcesv1.Rule for _, rule := range group.Spec.Rules { - prule, err := convertRule(&rule.Rule) + prule, err := convertRule(&rule.Rule, group.Name) if err != nil { log.WithValues("rulegroup", group.Namespace+"/"+group.Name).Error(err, "failed to convert") continue @@ -195,7 +207,7 @@ func makePrometheusRuleGroups(log logr.Logger, groupList client.ObjectList, for _, group := range list.Items { var prules []promresourcesv1.Rule for _, rule := range group.Spec.Rules { - prule, err := convertRule(&rule.Rule) + prule, err := convertRule(&rule.Rule, group.Name) if err != nil { log.WithValues("clusterrulegroup", group.Name).Error(err, "failed to convert") continue @@ -216,7 +228,8 @@ func makePrometheusRuleGroups(log logr.Logger, groupList client.ObjectList, var prules []promresourcesv1.Rule for _, rule := range group.Spec.Rules { - prule, err := convertRule(&rule.Rule, createEnforceRuleFuncs(ParseGlobalRuleEnforceMatchers(&rule), nil)...) + prule, err := convertRule(&rule.Rule, group.Name, + createEnforceRuleFuncs(ParseGlobalRuleEnforceMatchers(&rule), nil)...) if err != nil { log.WithValues("globalrulegroup", group.Name).Error(err, "failed to convert") continue diff --git a/pkg/models/alerting/rulegroup.go b/pkg/models/alerting/rulegroup.go index 8453dbca4..6dfcc66e6 100644 --- a/pkg/models/alerting/rulegroup.go +++ b/pkg/models/alerting/rulegroup.go @@ -16,6 +16,7 @@ package alerting import ( "context" + "time" promlabels "github.com/prometheus/prometheus/pkg/labels" promrules "github.com/prometheus/prometheus/rules" @@ -129,7 +130,7 @@ func (o *ruleGroupOperator) ListRuleGroups(ctx context.Context, namespace string return nil, err } - return resources.DefaultList(groups, queryParam, func(left, right runtime.Object, field query.Field) bool { + listResult := resources.DefaultList(groups, queryParam, func(left, right runtime.Object, field query.Field) bool { hit, great := o.compareRuleGroupStatus( &(left.(*kapialertingv2beta1.RuleGroup).Status), &(right.(*kapialertingv2beta1.RuleGroup).Status), field) if hit { @@ -143,7 +144,32 @@ func (o *ruleGroupOperator) ListRuleGroups(ctx context.Context, namespace string return selected } return resources.DefaultObjectMetaFilter(obj.(*kapialertingv2beta1.RuleGroup).ObjectMeta, filter) - }), nil + }) + + for i := range listResult.Items { + item := listResult.Items[i].(*kapialertingv2beta1.RuleGroup) + for j, ruleStatus := range item.Status.RulesStatus { + updateRulesStats(&item.Status.RulesStats, item.Spec.Rules[j].Disable, ruleStatus.State) + } + listResult.Items[i] = item + } + + return listResult, nil +} + +func updateRulesStats(rulesStats *kapialertingv2beta1.RulesStats, ruleDisable bool, ruleState string) { + if ruleDisable { + rulesStats.Disabled++ + return + } + switch ruleState { + case stateInactiveString: + rulesStats.Inactive++ + case statePendingString: + rulesStats.Pending++ + case stateFiringString: + rulesStats.Firing++ + } } // compareRuleGroupStatus compare rulegroup status. @@ -299,6 +325,10 @@ func (o *ruleGroupOperator) GetRuleGroup(ctx context.Context, namespace, name st } } + for j, ruleStatus := range ret.Status.RulesStatus { + updateRulesStats(&ret.Status.RulesStats, ret.Spec.Rules[j].Disable, ruleStatus.State) + } + return ret, nil } @@ -366,7 +396,7 @@ func (o *ruleGroupOperator) ListClusterRuleGroups(ctx context.Context, return nil, err } - return resources.DefaultList(groups, queryParam, func(left, right runtime.Object, field query.Field) bool { + listResult := resources.DefaultList(groups, queryParam, func(left, right runtime.Object, field query.Field) bool { hit, great := o.compareRuleGroupStatus( &(left.(*kapialertingv2beta1.ClusterRuleGroup).Status), &(right.(*kapialertingv2beta1.ClusterRuleGroup).Status), field) if hit { @@ -380,7 +410,17 @@ func (o *ruleGroupOperator) ListClusterRuleGroups(ctx context.Context, return selected } return resources.DefaultObjectMetaFilter(obj.(*kapialertingv2beta1.ClusterRuleGroup).ObjectMeta, filter) - }), nil + }) + + for i := range listResult.Items { + item := listResult.Items[i].(*kapialertingv2beta1.ClusterRuleGroup) + for j, ruleStatus := range item.Status.RulesStatus { + updateRulesStats(&item.Status.RulesStats, item.Spec.Rules[j].Disable, ruleStatus.State) + } + listResult.Items[i] = item + } + + return listResult, nil } func (o *ruleGroupOperator) ListClusterAlerts(ctx context.Context, @@ -456,6 +496,10 @@ func (o *ruleGroupOperator) GetClusterRuleGroup(ctx context.Context, name string } } + for j, ruleStatus := range ret.Status.RulesStatus { + updateRulesStats(&ret.Status.RulesStats, ret.Spec.Rules[j].Disable, ruleStatus.State) + } + return ret, nil } @@ -546,7 +590,7 @@ func (o *ruleGroupOperator) ListGlobalRuleGroups(ctx context.Context, return nil, err } - return resources.DefaultList(groups, queryParam, func(left, right runtime.Object, field query.Field) bool { + listResult := resources.DefaultList(groups, queryParam, func(left, right runtime.Object, field query.Field) bool { hit, great := o.compareRuleGroupStatus( &(left.(*kapialertingv2beta1.GlobalRuleGroup).Status), &(right.(*kapialertingv2beta1.GlobalRuleGroup).Status), field) if hit { @@ -563,7 +607,17 @@ func (o *ruleGroupOperator) ListGlobalRuleGroups(ctx context.Context, return selected } return resources.DefaultObjectMetaFilter(obj.(*kapialertingv2beta1.GlobalRuleGroup).ObjectMeta, filter) - }), nil + }) + + for i := range listResult.Items { + item := listResult.Items[i].(*kapialertingv2beta1.GlobalRuleGroup) + for j, ruleStatus := range item.Status.RulesStatus { + updateRulesStats(&item.Status.RulesStats, item.Spec.Rules[j].Disable, ruleStatus.State) + } + listResult.Items[i] = item + } + + return listResult, nil } func (o *ruleGroupOperator) ListGlobalAlerts(ctx context.Context, @@ -661,6 +715,10 @@ func (o *ruleGroupOperator) GetGlobalRuleGroup(ctx context.Context, name string) } } + for j, ruleStatus := range ret.Status.RulesStatus { + updateRulesStats(&ret.Status.RulesStats, ret.Spec.Rules[j].Disable, ruleStatus.State) + } + return ret, nil } @@ -677,6 +735,7 @@ func copyRuleGroupStatus(source *alerting.RuleGroup, target *kapialertingv2beta1 if ruleState := parseAlertState(rule.State); ruleState > groupState { groupState = ruleState } + var ruleActiveAt *time.Time alerts := []*kapialertingv2beta1.Alert{} for _, alert := range rule.Alerts { alerts = append(alerts, &kapialertingv2beta1.Alert{ @@ -686,6 +745,9 @@ func copyRuleGroupStatus(source *alerting.RuleGroup, target *kapialertingv2beta1 State: alert.State, Value: alert.Value, }) + if alert.ActiveAt != nil && (ruleActiveAt == nil || alert.ActiveAt.Before(*ruleActiveAt)) { + ruleActiveAt = alert.ActiveAt + } } ruleStatus := kapialertingv2beta1.RuleStatus{ State: rule.State, @@ -693,6 +755,7 @@ func copyRuleGroupStatus(source *alerting.RuleGroup, target *kapialertingv2beta1 LastError: rule.LastError, EvaluationTime: rule.EvaluationTime, LastEvaluation: rule.LastEvaluation, + ActiveAt: ruleActiveAt, Alerts: alerts, } if len(rule.Labels) > 0 {