alerting v2beta1 tunes (#5200)
Signed-off-by: junot <junotxiang@kubesphere.io> Signed-off-by: junot <junotxiang@kubesphere.io>
This commit is contained in:
@@ -68,6 +68,14 @@ type RuleGroupStatus struct {
|
||||
EvaluationTime *float64 `json:"evaluationTime,omitempty" description:"time spent on rule group evaluation in seconds"`
|
||||
LastEvaluation *time.Time `json:"lastEvaluation,omitempty" description:"time of last evaluation"`
|
||||
RulesStatus []RuleStatus `json:"rulesStatus,omitempty" description:"status of rules in one RuleGroup"`
|
||||
RulesStats RulesStats `json:"rulesStats,omitempty" description:"statistics of rules in one RuleGroup"`
|
||||
}
|
||||
|
||||
type RulesStats struct {
|
||||
Inactive int `json:"inactive" description:"count of rules in the inactive state"`
|
||||
Pending int `json:"pending" description:"count of rules in the pending state"`
|
||||
Firing int `json:"firing" description:"count of rules in the firing state"`
|
||||
Disabled int `json:"disabled" description:"count of disabled rules"`
|
||||
}
|
||||
|
||||
type RuleStatus struct {
|
||||
@@ -77,6 +85,7 @@ type RuleStatus struct {
|
||||
LastError string `json:"lastError,omitempty" description:"error of the last evaluation"`
|
||||
EvaluationTime *float64 `json:"evaluationTime,omitempty" description:"time spent on the expression evaluation in seconds"`
|
||||
LastEvaluation *time.Time `json:"lastEvaluation,omitempty" description:"time of last evaluation"`
|
||||
ActiveAt *time.Time `json:"activeAt,omitempty" description:"time when this rule became active"`
|
||||
|
||||
Alerts []*Alert `json:"alerts,omitempty" description:"alerts"`
|
||||
}
|
||||
|
||||
@@ -42,11 +42,14 @@ const (
|
||||
RuleLevelCluster RuleLevel = "cluster"
|
||||
RuleLevelGlobal RuleLevel = "global"
|
||||
|
||||
// label keys in rule.labels
|
||||
RuleLabelKeyRuleLevel = "rule_level"
|
||||
RuleLabelKeyCluster = "cluster"
|
||||
RuleLabelKeyNamespace = "namespace"
|
||||
RuleLabelKeySeverity = "severity"
|
||||
// for rule.labels
|
||||
RuleLabelKeyRuleLevel = "rule_level"
|
||||
RuleLabelKeyRuleGroup = "rule_group"
|
||||
RuleLabelKeyCluster = "cluster"
|
||||
RuleLabelKeyNamespace = "namespace"
|
||||
RuleLabelKeySeverity = "severity"
|
||||
RuleLabelKeyAlertType = "alerttype"
|
||||
RuleLabelValueAlertTypeMetric = "metric"
|
||||
|
||||
// label keys in RuleGroup/ClusterRuleGroup/GlobalRuleGroup.metadata.labels
|
||||
SourceGroupResourceLabelKeyEnable = "alerting.kubesphere.io/enable"
|
||||
@@ -132,7 +135,7 @@ func makePrometheusRuleGroups(log logr.Logger, groupList client.ObjectList,
|
||||
commonEnforceFuncs ...enforceRuleFunc) ([]*promresourcesv1.RuleGroup, error) {
|
||||
var rulegroups []*promresourcesv1.RuleGroup
|
||||
|
||||
convertRule := func(rule *alertingv2beta1.Rule, enforceFuncs ...enforceRuleFunc) (*promresourcesv1.Rule, error) {
|
||||
convertRule := func(rule *alertingv2beta1.Rule, groupName string, enforceFuncs ...enforceRuleFunc) (*promresourcesv1.Rule, error) {
|
||||
if rule.Disable { // ignoring disabled rule
|
||||
return nil, nil
|
||||
}
|
||||
@@ -156,6 +159,15 @@ func makePrometheusRuleGroups(log logr.Logger, groupList client.ObjectList,
|
||||
}
|
||||
|
||||
enforceFuncs = append(enforceFuncs, commonEnforceFuncs...)
|
||||
// enforce rule group label and alert type label
|
||||
enforceFuncs = append(enforceFuncs, func(rule *promresourcesv1.Rule) error {
|
||||
if rule.Labels == nil {
|
||||
rule.Labels = make(map[string]string)
|
||||
}
|
||||
rule.Labels[RuleLabelKeyRuleGroup] = groupName
|
||||
rule.Labels[RuleLabelKeyAlertType] = RuleLabelValueAlertTypeMetric
|
||||
return nil
|
||||
})
|
||||
|
||||
for _, f := range enforceFuncs {
|
||||
if f == nil {
|
||||
@@ -175,7 +187,7 @@ func makePrometheusRuleGroups(log logr.Logger, groupList client.ObjectList,
|
||||
for _, group := range list.Items {
|
||||
var prules []promresourcesv1.Rule
|
||||
for _, rule := range group.Spec.Rules {
|
||||
prule, err := convertRule(&rule.Rule)
|
||||
prule, err := convertRule(&rule.Rule, group.Name)
|
||||
if err != nil {
|
||||
log.WithValues("rulegroup", group.Namespace+"/"+group.Name).Error(err, "failed to convert")
|
||||
continue
|
||||
@@ -195,7 +207,7 @@ func makePrometheusRuleGroups(log logr.Logger, groupList client.ObjectList,
|
||||
for _, group := range list.Items {
|
||||
var prules []promresourcesv1.Rule
|
||||
for _, rule := range group.Spec.Rules {
|
||||
prule, err := convertRule(&rule.Rule)
|
||||
prule, err := convertRule(&rule.Rule, group.Name)
|
||||
if err != nil {
|
||||
log.WithValues("clusterrulegroup", group.Name).Error(err, "failed to convert")
|
||||
continue
|
||||
@@ -216,7 +228,8 @@ func makePrometheusRuleGroups(log logr.Logger, groupList client.ObjectList,
|
||||
var prules []promresourcesv1.Rule
|
||||
for _, rule := range group.Spec.Rules {
|
||||
|
||||
prule, err := convertRule(&rule.Rule, createEnforceRuleFuncs(ParseGlobalRuleEnforceMatchers(&rule), nil)...)
|
||||
prule, err := convertRule(&rule.Rule, group.Name,
|
||||
createEnforceRuleFuncs(ParseGlobalRuleEnforceMatchers(&rule), nil)...)
|
||||
if err != nil {
|
||||
log.WithValues("globalrulegroup", group.Name).Error(err, "failed to convert")
|
||||
continue
|
||||
|
||||
@@ -16,6 +16,7 @@ package alerting
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
promlabels "github.com/prometheus/prometheus/pkg/labels"
|
||||
promrules "github.com/prometheus/prometheus/rules"
|
||||
@@ -129,7 +130,7 @@ func (o *ruleGroupOperator) ListRuleGroups(ctx context.Context, namespace string
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return resources.DefaultList(groups, queryParam, func(left, right runtime.Object, field query.Field) bool {
|
||||
listResult := resources.DefaultList(groups, queryParam, func(left, right runtime.Object, field query.Field) bool {
|
||||
hit, great := o.compareRuleGroupStatus(
|
||||
&(left.(*kapialertingv2beta1.RuleGroup).Status), &(right.(*kapialertingv2beta1.RuleGroup).Status), field)
|
||||
if hit {
|
||||
@@ -143,7 +144,32 @@ func (o *ruleGroupOperator) ListRuleGroups(ctx context.Context, namespace string
|
||||
return selected
|
||||
}
|
||||
return resources.DefaultObjectMetaFilter(obj.(*kapialertingv2beta1.RuleGroup).ObjectMeta, filter)
|
||||
}), nil
|
||||
})
|
||||
|
||||
for i := range listResult.Items {
|
||||
item := listResult.Items[i].(*kapialertingv2beta1.RuleGroup)
|
||||
for j, ruleStatus := range item.Status.RulesStatus {
|
||||
updateRulesStats(&item.Status.RulesStats, item.Spec.Rules[j].Disable, ruleStatus.State)
|
||||
}
|
||||
listResult.Items[i] = item
|
||||
}
|
||||
|
||||
return listResult, nil
|
||||
}
|
||||
|
||||
func updateRulesStats(rulesStats *kapialertingv2beta1.RulesStats, ruleDisable bool, ruleState string) {
|
||||
if ruleDisable {
|
||||
rulesStats.Disabled++
|
||||
return
|
||||
}
|
||||
switch ruleState {
|
||||
case stateInactiveString:
|
||||
rulesStats.Inactive++
|
||||
case statePendingString:
|
||||
rulesStats.Pending++
|
||||
case stateFiringString:
|
||||
rulesStats.Firing++
|
||||
}
|
||||
}
|
||||
|
||||
// compareRuleGroupStatus compare rulegroup status.
|
||||
@@ -299,6 +325,10 @@ func (o *ruleGroupOperator) GetRuleGroup(ctx context.Context, namespace, name st
|
||||
}
|
||||
}
|
||||
|
||||
for j, ruleStatus := range ret.Status.RulesStatus {
|
||||
updateRulesStats(&ret.Status.RulesStats, ret.Spec.Rules[j].Disable, ruleStatus.State)
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
@@ -366,7 +396,7 @@ func (o *ruleGroupOperator) ListClusterRuleGroups(ctx context.Context,
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return resources.DefaultList(groups, queryParam, func(left, right runtime.Object, field query.Field) bool {
|
||||
listResult := resources.DefaultList(groups, queryParam, func(left, right runtime.Object, field query.Field) bool {
|
||||
hit, great := o.compareRuleGroupStatus(
|
||||
&(left.(*kapialertingv2beta1.ClusterRuleGroup).Status), &(right.(*kapialertingv2beta1.ClusterRuleGroup).Status), field)
|
||||
if hit {
|
||||
@@ -380,7 +410,17 @@ func (o *ruleGroupOperator) ListClusterRuleGroups(ctx context.Context,
|
||||
return selected
|
||||
}
|
||||
return resources.DefaultObjectMetaFilter(obj.(*kapialertingv2beta1.ClusterRuleGroup).ObjectMeta, filter)
|
||||
}), nil
|
||||
})
|
||||
|
||||
for i := range listResult.Items {
|
||||
item := listResult.Items[i].(*kapialertingv2beta1.ClusterRuleGroup)
|
||||
for j, ruleStatus := range item.Status.RulesStatus {
|
||||
updateRulesStats(&item.Status.RulesStats, item.Spec.Rules[j].Disable, ruleStatus.State)
|
||||
}
|
||||
listResult.Items[i] = item
|
||||
}
|
||||
|
||||
return listResult, nil
|
||||
}
|
||||
|
||||
func (o *ruleGroupOperator) ListClusterAlerts(ctx context.Context,
|
||||
@@ -456,6 +496,10 @@ func (o *ruleGroupOperator) GetClusterRuleGroup(ctx context.Context, name string
|
||||
}
|
||||
}
|
||||
|
||||
for j, ruleStatus := range ret.Status.RulesStatus {
|
||||
updateRulesStats(&ret.Status.RulesStats, ret.Spec.Rules[j].Disable, ruleStatus.State)
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
@@ -546,7 +590,7 @@ func (o *ruleGroupOperator) ListGlobalRuleGroups(ctx context.Context,
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return resources.DefaultList(groups, queryParam, func(left, right runtime.Object, field query.Field) bool {
|
||||
listResult := resources.DefaultList(groups, queryParam, func(left, right runtime.Object, field query.Field) bool {
|
||||
hit, great := o.compareRuleGroupStatus(
|
||||
&(left.(*kapialertingv2beta1.GlobalRuleGroup).Status), &(right.(*kapialertingv2beta1.GlobalRuleGroup).Status), field)
|
||||
if hit {
|
||||
@@ -563,7 +607,17 @@ func (o *ruleGroupOperator) ListGlobalRuleGroups(ctx context.Context,
|
||||
return selected
|
||||
}
|
||||
return resources.DefaultObjectMetaFilter(obj.(*kapialertingv2beta1.GlobalRuleGroup).ObjectMeta, filter)
|
||||
}), nil
|
||||
})
|
||||
|
||||
for i := range listResult.Items {
|
||||
item := listResult.Items[i].(*kapialertingv2beta1.GlobalRuleGroup)
|
||||
for j, ruleStatus := range item.Status.RulesStatus {
|
||||
updateRulesStats(&item.Status.RulesStats, item.Spec.Rules[j].Disable, ruleStatus.State)
|
||||
}
|
||||
listResult.Items[i] = item
|
||||
}
|
||||
|
||||
return listResult, nil
|
||||
}
|
||||
|
||||
func (o *ruleGroupOperator) ListGlobalAlerts(ctx context.Context,
|
||||
@@ -661,6 +715,10 @@ func (o *ruleGroupOperator) GetGlobalRuleGroup(ctx context.Context, name string)
|
||||
}
|
||||
}
|
||||
|
||||
for j, ruleStatus := range ret.Status.RulesStatus {
|
||||
updateRulesStats(&ret.Status.RulesStats, ret.Spec.Rules[j].Disable, ruleStatus.State)
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
@@ -677,6 +735,7 @@ func copyRuleGroupStatus(source *alerting.RuleGroup, target *kapialertingv2beta1
|
||||
if ruleState := parseAlertState(rule.State); ruleState > groupState {
|
||||
groupState = ruleState
|
||||
}
|
||||
var ruleActiveAt *time.Time
|
||||
alerts := []*kapialertingv2beta1.Alert{}
|
||||
for _, alert := range rule.Alerts {
|
||||
alerts = append(alerts, &kapialertingv2beta1.Alert{
|
||||
@@ -686,6 +745,9 @@ func copyRuleGroupStatus(source *alerting.RuleGroup, target *kapialertingv2beta1
|
||||
State: alert.State,
|
||||
Value: alert.Value,
|
||||
})
|
||||
if alert.ActiveAt != nil && (ruleActiveAt == nil || alert.ActiveAt.Before(*ruleActiveAt)) {
|
||||
ruleActiveAt = alert.ActiveAt
|
||||
}
|
||||
}
|
||||
ruleStatus := kapialertingv2beta1.RuleStatus{
|
||||
State: rule.State,
|
||||
@@ -693,6 +755,7 @@ func copyRuleGroupStatus(source *alerting.RuleGroup, target *kapialertingv2beta1
|
||||
LastError: rule.LastError,
|
||||
EvaluationTime: rule.EvaluationTime,
|
||||
LastEvaluation: rule.LastEvaluation,
|
||||
ActiveAt: ruleActiveAt,
|
||||
Alerts: alerts,
|
||||
}
|
||||
if len(rule.Labels) > 0 {
|
||||
|
||||
Reference in New Issue
Block a user