alerting v2beta1 tunes (#5200)

Signed-off-by: junot <junotxiang@kubesphere.io>

Signed-off-by: junot <junotxiang@kubesphere.io>
This commit is contained in:
junot
2022-09-07 17:04:41 +08:00
committed by GitHub
parent d3cf418c6e
commit d640c5324a
3 changed files with 100 additions and 15 deletions

View File

@@ -68,6 +68,14 @@ type RuleGroupStatus struct {
EvaluationTime *float64 `json:"evaluationTime,omitempty" description:"time spent on rule group evaluation in seconds"`
LastEvaluation *time.Time `json:"lastEvaluation,omitempty" description:"time of last evaluation"`
RulesStatus []RuleStatus `json:"rulesStatus,omitempty" description:"status of rules in one RuleGroup"`
RulesStats RulesStats `json:"rulesStats,omitempty" description:"statistics of rules in one RuleGroup"`
}
type RulesStats struct {
Inactive int `json:"inactive" description:"count of rules in the inactive state"`
Pending int `json:"pending" description:"count of rules in the pending state"`
Firing int `json:"firing" description:"count of rules in the firing state"`
Disabled int `json:"disabled" description:"count of disabled rules"`
}
type RuleStatus struct {
@@ -77,6 +85,7 @@ type RuleStatus struct {
LastError string `json:"lastError,omitempty" description:"error of the last evaluation"`
EvaluationTime *float64 `json:"evaluationTime,omitempty" description:"time spent on the expression evaluation in seconds"`
LastEvaluation *time.Time `json:"lastEvaluation,omitempty" description:"time of last evaluation"`
ActiveAt *time.Time `json:"activeAt,omitempty" description:"time when this rule became active"`
Alerts []*Alert `json:"alerts,omitempty" description:"alerts"`
}

View File

@@ -42,11 +42,14 @@ const (
RuleLevelCluster RuleLevel = "cluster"
RuleLevelGlobal RuleLevel = "global"
// label keys in rule.labels
RuleLabelKeyRuleLevel = "rule_level"
RuleLabelKeyCluster = "cluster"
RuleLabelKeyNamespace = "namespace"
RuleLabelKeySeverity = "severity"
// for rule.labels
RuleLabelKeyRuleLevel = "rule_level"
RuleLabelKeyRuleGroup = "rule_group"
RuleLabelKeyCluster = "cluster"
RuleLabelKeyNamespace = "namespace"
RuleLabelKeySeverity = "severity"
RuleLabelKeyAlertType = "alerttype"
RuleLabelValueAlertTypeMetric = "metric"
// label keys in RuleGroup/ClusterRuleGroup/GlobalRuleGroup.metadata.labels
SourceGroupResourceLabelKeyEnable = "alerting.kubesphere.io/enable"
@@ -132,7 +135,7 @@ func makePrometheusRuleGroups(log logr.Logger, groupList client.ObjectList,
commonEnforceFuncs ...enforceRuleFunc) ([]*promresourcesv1.RuleGroup, error) {
var rulegroups []*promresourcesv1.RuleGroup
convertRule := func(rule *alertingv2beta1.Rule, enforceFuncs ...enforceRuleFunc) (*promresourcesv1.Rule, error) {
convertRule := func(rule *alertingv2beta1.Rule, groupName string, enforceFuncs ...enforceRuleFunc) (*promresourcesv1.Rule, error) {
if rule.Disable { // ignoring disabled rule
return nil, nil
}
@@ -156,6 +159,15 @@ func makePrometheusRuleGroups(log logr.Logger, groupList client.ObjectList,
}
enforceFuncs = append(enforceFuncs, commonEnforceFuncs...)
// enforce rule group label and alert type label
enforceFuncs = append(enforceFuncs, func(rule *promresourcesv1.Rule) error {
if rule.Labels == nil {
rule.Labels = make(map[string]string)
}
rule.Labels[RuleLabelKeyRuleGroup] = groupName
rule.Labels[RuleLabelKeyAlertType] = RuleLabelValueAlertTypeMetric
return nil
})
for _, f := range enforceFuncs {
if f == nil {
@@ -175,7 +187,7 @@ func makePrometheusRuleGroups(log logr.Logger, groupList client.ObjectList,
for _, group := range list.Items {
var prules []promresourcesv1.Rule
for _, rule := range group.Spec.Rules {
prule, err := convertRule(&rule.Rule)
prule, err := convertRule(&rule.Rule, group.Name)
if err != nil {
log.WithValues("rulegroup", group.Namespace+"/"+group.Name).Error(err, "failed to convert")
continue
@@ -195,7 +207,7 @@ func makePrometheusRuleGroups(log logr.Logger, groupList client.ObjectList,
for _, group := range list.Items {
var prules []promresourcesv1.Rule
for _, rule := range group.Spec.Rules {
prule, err := convertRule(&rule.Rule)
prule, err := convertRule(&rule.Rule, group.Name)
if err != nil {
log.WithValues("clusterrulegroup", group.Name).Error(err, "failed to convert")
continue
@@ -216,7 +228,8 @@ func makePrometheusRuleGroups(log logr.Logger, groupList client.ObjectList,
var prules []promresourcesv1.Rule
for _, rule := range group.Spec.Rules {
prule, err := convertRule(&rule.Rule, createEnforceRuleFuncs(ParseGlobalRuleEnforceMatchers(&rule), nil)...)
prule, err := convertRule(&rule.Rule, group.Name,
createEnforceRuleFuncs(ParseGlobalRuleEnforceMatchers(&rule), nil)...)
if err != nil {
log.WithValues("globalrulegroup", group.Name).Error(err, "failed to convert")
continue

View File

@@ -16,6 +16,7 @@ package alerting
import (
"context"
"time"
promlabels "github.com/prometheus/prometheus/pkg/labels"
promrules "github.com/prometheus/prometheus/rules"
@@ -129,7 +130,7 @@ func (o *ruleGroupOperator) ListRuleGroups(ctx context.Context, namespace string
return nil, err
}
return resources.DefaultList(groups, queryParam, func(left, right runtime.Object, field query.Field) bool {
listResult := resources.DefaultList(groups, queryParam, func(left, right runtime.Object, field query.Field) bool {
hit, great := o.compareRuleGroupStatus(
&(left.(*kapialertingv2beta1.RuleGroup).Status), &(right.(*kapialertingv2beta1.RuleGroup).Status), field)
if hit {
@@ -143,7 +144,32 @@ func (o *ruleGroupOperator) ListRuleGroups(ctx context.Context, namespace string
return selected
}
return resources.DefaultObjectMetaFilter(obj.(*kapialertingv2beta1.RuleGroup).ObjectMeta, filter)
}), nil
})
for i := range listResult.Items {
item := listResult.Items[i].(*kapialertingv2beta1.RuleGroup)
for j, ruleStatus := range item.Status.RulesStatus {
updateRulesStats(&item.Status.RulesStats, item.Spec.Rules[j].Disable, ruleStatus.State)
}
listResult.Items[i] = item
}
return listResult, nil
}
func updateRulesStats(rulesStats *kapialertingv2beta1.RulesStats, ruleDisable bool, ruleState string) {
if ruleDisable {
rulesStats.Disabled++
return
}
switch ruleState {
case stateInactiveString:
rulesStats.Inactive++
case statePendingString:
rulesStats.Pending++
case stateFiringString:
rulesStats.Firing++
}
}
// compareRuleGroupStatus compare rulegroup status.
@@ -299,6 +325,10 @@ func (o *ruleGroupOperator) GetRuleGroup(ctx context.Context, namespace, name st
}
}
for j, ruleStatus := range ret.Status.RulesStatus {
updateRulesStats(&ret.Status.RulesStats, ret.Spec.Rules[j].Disable, ruleStatus.State)
}
return ret, nil
}
@@ -366,7 +396,7 @@ func (o *ruleGroupOperator) ListClusterRuleGroups(ctx context.Context,
return nil, err
}
return resources.DefaultList(groups, queryParam, func(left, right runtime.Object, field query.Field) bool {
listResult := resources.DefaultList(groups, queryParam, func(left, right runtime.Object, field query.Field) bool {
hit, great := o.compareRuleGroupStatus(
&(left.(*kapialertingv2beta1.ClusterRuleGroup).Status), &(right.(*kapialertingv2beta1.ClusterRuleGroup).Status), field)
if hit {
@@ -380,7 +410,17 @@ func (o *ruleGroupOperator) ListClusterRuleGroups(ctx context.Context,
return selected
}
return resources.DefaultObjectMetaFilter(obj.(*kapialertingv2beta1.ClusterRuleGroup).ObjectMeta, filter)
}), nil
})
for i := range listResult.Items {
item := listResult.Items[i].(*kapialertingv2beta1.ClusterRuleGroup)
for j, ruleStatus := range item.Status.RulesStatus {
updateRulesStats(&item.Status.RulesStats, item.Spec.Rules[j].Disable, ruleStatus.State)
}
listResult.Items[i] = item
}
return listResult, nil
}
func (o *ruleGroupOperator) ListClusterAlerts(ctx context.Context,
@@ -456,6 +496,10 @@ func (o *ruleGroupOperator) GetClusterRuleGroup(ctx context.Context, name string
}
}
for j, ruleStatus := range ret.Status.RulesStatus {
updateRulesStats(&ret.Status.RulesStats, ret.Spec.Rules[j].Disable, ruleStatus.State)
}
return ret, nil
}
@@ -546,7 +590,7 @@ func (o *ruleGroupOperator) ListGlobalRuleGroups(ctx context.Context,
return nil, err
}
return resources.DefaultList(groups, queryParam, func(left, right runtime.Object, field query.Field) bool {
listResult := resources.DefaultList(groups, queryParam, func(left, right runtime.Object, field query.Field) bool {
hit, great := o.compareRuleGroupStatus(
&(left.(*kapialertingv2beta1.GlobalRuleGroup).Status), &(right.(*kapialertingv2beta1.GlobalRuleGroup).Status), field)
if hit {
@@ -563,7 +607,17 @@ func (o *ruleGroupOperator) ListGlobalRuleGroups(ctx context.Context,
return selected
}
return resources.DefaultObjectMetaFilter(obj.(*kapialertingv2beta1.GlobalRuleGroup).ObjectMeta, filter)
}), nil
})
for i := range listResult.Items {
item := listResult.Items[i].(*kapialertingv2beta1.GlobalRuleGroup)
for j, ruleStatus := range item.Status.RulesStatus {
updateRulesStats(&item.Status.RulesStats, item.Spec.Rules[j].Disable, ruleStatus.State)
}
listResult.Items[i] = item
}
return listResult, nil
}
func (o *ruleGroupOperator) ListGlobalAlerts(ctx context.Context,
@@ -661,6 +715,10 @@ func (o *ruleGroupOperator) GetGlobalRuleGroup(ctx context.Context, name string)
}
}
for j, ruleStatus := range ret.Status.RulesStatus {
updateRulesStats(&ret.Status.RulesStats, ret.Spec.Rules[j].Disable, ruleStatus.State)
}
return ret, nil
}
@@ -677,6 +735,7 @@ func copyRuleGroupStatus(source *alerting.RuleGroup, target *kapialertingv2beta1
if ruleState := parseAlertState(rule.State); ruleState > groupState {
groupState = ruleState
}
var ruleActiveAt *time.Time
alerts := []*kapialertingv2beta1.Alert{}
for _, alert := range rule.Alerts {
alerts = append(alerts, &kapialertingv2beta1.Alert{
@@ -686,6 +745,9 @@ func copyRuleGroupStatus(source *alerting.RuleGroup, target *kapialertingv2beta1
State: alert.State,
Value: alert.Value,
})
if alert.ActiveAt != nil && (ruleActiveAt == nil || alert.ActiveAt.Before(*ruleActiveAt)) {
ruleActiveAt = alert.ActiveAt
}
}
ruleStatus := kapialertingv2beta1.RuleStatus{
State: rule.State,
@@ -693,6 +755,7 @@ func copyRuleGroupStatus(source *alerting.RuleGroup, target *kapialertingv2beta1
LastError: rule.LastError,
EvaluationTime: rule.EvaluationTime,
LastEvaluation: rule.LastEvaluation,
ActiveAt: ruleActiveAt,
Alerts: alerts,
}
if len(rule.Labels) > 0 {