custom alerting

Signed-off-by: junotx <junotx@126.com>
This commit is contained in:
junotx
2020-11-24 17:56:26 +08:00
parent 242193ddb0
commit 371c9b187d
342 changed files with 64021 additions and 1934 deletions

View File

@@ -0,0 +1,632 @@
package customalerting
import (
"context"
"sort"
"strings"
promresourcesv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
prominformersv1 "github.com/prometheus-operator/prometheus-operator/pkg/client/informers/externalversions/monitoring/v1"
promresourcesclient "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned"
"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/intstr"
coreinformersv1 "k8s.io/client-go/informers/core/v1"
"kubesphere.io/kubesphere/pkg/api/customalerting/v1alpha1"
"kubesphere.io/kubesphere/pkg/constants"
"kubesphere.io/kubesphere/pkg/informers"
"kubesphere.io/kubesphere/pkg/models/customalerting/rules"
"kubesphere.io/kubesphere/pkg/simple/client/customalerting"
)
const (
rulerNamespace = constants.KubeSphereMonitoringNamespace
customRuleGroupDefault = "alerting.custom.defaults"
customRuleResourceLabelKeyLevel = "custom-alerting-rule-level"
)
var (
maxSecretSize = corev1.MaxSecretSize
maxConfigMapDataSize = int(float64(maxSecretSize) * 0.45)
)
// Operator contains all operations to alerting rules. The operations may involve manipulations of prometheusrule
// custom resources where the rules are persisted, and querying the rules state from prometheus endpoint and
// thanos ruler endpoint.
// For the following apis, if namespace is empty, do operations to alerting rules with cluster level,
// or do operations only to rules of the specified namespaces.
// All custom rules will be configured for thanos ruler, so the operations to custom alerting rule can not be done
// if thanos ruler is not enabled.
type Operator interface {
// ListCustomAlertingRules lists the custom alerting rules.
ListCustomAlertingRules(ctx context.Context, namespace string,
queryParams *v1alpha1.AlertingRuleQueryParams) (*v1alpha1.GettableAlertingRuleList, error)
// ListCustomRulesAlerts lists the alerts of the custom alerting rules.
ListCustomRulesAlerts(ctx context.Context, namespace string,
queryParams *v1alpha1.AlertQueryParams) (*v1alpha1.AlertList, error)
// GetCustomAlertingRule gets the custom alerting rule with the given name.
GetCustomAlertingRule(ctx context.Context, namespace, ruleName string) (*v1alpha1.GettableAlertingRule, error)
// ListCustomSpecifiedRuleAlerts lists the alerts of the custom alerting rule with the given name.
ListCustomSpecifiedRuleAlerts(ctx context.Context, namespace, ruleName string) ([]*v1alpha1.Alert, error)
// CreateCustomAlertingRule creates a custom alerting rule.
CreateCustomAlertingRule(ctx context.Context, namespace string, rule *v1alpha1.PostableAlertingRule) error
// UpdateCustomAlertingRule updates the custom alerting rule with the given name.
UpdateCustomAlertingRule(ctx context.Context, namespace, ruleName string, rule *v1alpha1.PostableAlertingRule) error
// DeleteCustomAlertingRule deletes the custom alerting rule with the given name.
DeleteCustomAlertingRule(ctx context.Context, namespace, ruleName string) error
// ListBuiltinAlertingRules lists the builtin(non-custom) alerting rules
ListBuiltinAlertingRules(ctx context.Context,
queryParams *v1alpha1.AlertingRuleQueryParams) (*v1alpha1.GettableAlertingRuleList, error)
// ListBuiltinRulesAlerts lists the alerts of the builtin(non-custom) alerting rules
ListBuiltinRulesAlerts(ctx context.Context,
queryParams *v1alpha1.AlertQueryParams) (*v1alpha1.AlertList, error)
// GetBuiltinAlertingRule gets the builtin(non-custom) alerting rule with the given id
GetBuiltinAlertingRule(ctx context.Context, ruleId string) (*v1alpha1.GettableAlertingRule, error)
// ListBuiltinSpecifiedRuleAlerts lists the alerts of the builtin(non-custom) alerting rule with the given id
ListBuiltinSpecifiedRuleAlerts(ctx context.Context, ruleId string) ([]*v1alpha1.Alert, error)
}
func NewOperator(informers informers.InformerFactory,
promResourceClient promresourcesclient.Interface, ruleClient customalerting.RuleClient,
option *customalerting.Options) Operator {
o := operator{
namespaceInformer: informers.KubernetesSharedInformerFactory().Core().V1().Namespaces(),
promResourceClient: promResourceClient,
prometheusInformer: informers.PrometheusSharedInformerFactory().Monitoring().V1().Prometheuses(),
thanosRulerInformer: informers.PrometheusSharedInformerFactory().Monitoring().V1().ThanosRulers(),
ruleResourceInformer: informers.PrometheusSharedInformerFactory().Monitoring().V1().PrometheusRules(),
ruleClient: ruleClient,
thanosRuleResourceLabels: make(map[string]string),
}
o.resourceRuleCache = rules.NewRuleCache(o.ruleResourceInformer)
if option != nil && len(option.ThanosRuleResourceLabels) != 0 {
lblStrings := strings.Split(option.ThanosRuleResourceLabels, ",")
for _, lblString := range lblStrings {
lbl := strings.Split(lblString, "=")
if len(lbl) == 2 {
o.thanosRuleResourceLabels[lbl[0]] = lbl[1]
}
}
}
return &o
}
type operator struct {
ruleClient customalerting.RuleClient
promResourceClient promresourcesclient.Interface
prometheusInformer prominformersv1.PrometheusInformer
thanosRulerInformer prominformersv1.ThanosRulerInformer
ruleResourceInformer prominformersv1.PrometheusRuleInformer
namespaceInformer coreinformersv1.NamespaceInformer
resourceRuleCache *rules.RuleCache
thanosRuleResourceLabels map[string]string
}
func (o *operator) ListCustomAlertingRules(ctx context.Context, namespace string,
queryParams *v1alpha1.AlertingRuleQueryParams) (*v1alpha1.GettableAlertingRuleList, error) {
var level v1alpha1.RuleLevel
if namespace == "" {
namespace = rulerNamespace
level = v1alpha1.RuleLevelCluster
} else {
level = v1alpha1.RuleLevelNamespace
}
ruleNamespace, err := o.namespaceInformer.Lister().Get(namespace)
if err != nil {
return nil, err
}
alertingRules, err := o.listCustomAlertingRules(ctx, ruleNamespace, level)
if err != nil {
return nil, err
}
return pageAlertingRules(alertingRules, queryParams), nil
}
func (o *operator) ListCustomRulesAlerts(ctx context.Context, namespace string,
queryParams *v1alpha1.AlertQueryParams) (*v1alpha1.AlertList, error) {
var level v1alpha1.RuleLevel
if namespace == "" {
namespace = rulerNamespace
level = v1alpha1.RuleLevelCluster
} else {
level = v1alpha1.RuleLevelNamespace
}
ruleNamespace, err := o.namespaceInformer.Lister().Get(namespace)
if err != nil {
return nil, err
}
alertingRules, err := o.listCustomAlertingRules(ctx, ruleNamespace, level)
if err != nil {
return nil, err
}
return pageAlerts(alertingRules, queryParams), nil
}
func (o *operator) GetCustomAlertingRule(ctx context.Context, namespace, ruleName string) (
*v1alpha1.GettableAlertingRule, error) {
var level v1alpha1.RuleLevel
if namespace == "" {
namespace = rulerNamespace
level = v1alpha1.RuleLevelCluster
} else {
level = v1alpha1.RuleLevelNamespace
}
ruleNamespace, err := o.namespaceInformer.Lister().Get(namespace)
if err != nil {
return nil, err
}
return o.getCustomAlertingRule(ctx, ruleNamespace, ruleName, level)
}
func (o *operator) ListCustomSpecifiedRuleAlerts(ctx context.Context, namespace, ruleName string) (
[]*v1alpha1.Alert, error) {
rule, err := o.GetCustomAlertingRule(ctx, namespace, ruleName)
if err != nil {
return nil, err
}
if rule == nil {
return nil, v1alpha1.ErrAlertingRuleNotFound
}
return rule.Alerts, nil
}
func (o *operator) ListBuiltinAlertingRules(ctx context.Context,
queryParams *v1alpha1.AlertingRuleQueryParams) (*v1alpha1.GettableAlertingRuleList, error) {
alertingRules, err := o.listBuiltinAlertingRules(ctx)
if err != nil {
return nil, err
}
return pageAlertingRules(alertingRules, queryParams), nil
}
func (o *operator) ListBuiltinRulesAlerts(ctx context.Context,
queryParams *v1alpha1.AlertQueryParams) (*v1alpha1.AlertList, error) {
alertingRules, err := o.listBuiltinAlertingRules(ctx)
if err != nil {
return nil, err
}
return pageAlerts(alertingRules, queryParams), nil
}
func (o *operator) GetBuiltinAlertingRule(ctx context.Context, ruleId string) (
*v1alpha1.GettableAlertingRule, error) {
return o.getBuiltinAlertingRule(ctx, ruleId)
}
func (o *operator) ListBuiltinSpecifiedRuleAlerts(ctx context.Context, ruleId string) ([]*v1alpha1.Alert, error) {
rule, err := o.getBuiltinAlertingRule(ctx, ruleId)
if err != nil {
return nil, err
}
if rule == nil {
return nil, v1alpha1.ErrAlertingRuleNotFound
}
return rule.Alerts, nil
}
func (o *operator) ListClusterAlertingRules(ctx context.Context, customFlag string,
queryParams *v1alpha1.AlertingRuleQueryParams) (*v1alpha1.GettableAlertingRuleList, error) {
namespace := rulerNamespace
ruleNamespace, err := o.namespaceInformer.Lister().Get(namespace)
if err != nil {
return nil, err
}
alertingRules, err := o.listCustomAlertingRules(ctx, ruleNamespace, v1alpha1.RuleLevelCluster)
if err != nil {
return nil, err
}
return pageAlertingRules(alertingRules, queryParams), nil
}
func (o *operator) ListClusterRulesAlerts(ctx context.Context,
queryParams *v1alpha1.AlertQueryParams) (*v1alpha1.AlertList, error) {
namespace := rulerNamespace
ruleNamespace, err := o.namespaceInformer.Lister().Get(namespace)
if err != nil {
return nil, err
}
alertingRules, err := o.listCustomAlertingRules(ctx, ruleNamespace, v1alpha1.RuleLevelCluster)
if err != nil {
return nil, err
}
return pageAlerts(alertingRules, queryParams), nil
}
func (o *operator) listCustomAlertingRules(ctx context.Context, ruleNamespace *corev1.Namespace,
level v1alpha1.RuleLevel) ([]*v1alpha1.GettableAlertingRule, error) {
ruler, err := o.getThanosRuler()
if err != nil {
return nil, err
}
if ruler == nil {
return nil, v1alpha1.ErrThanosRulerNotEnabled
}
resourceRulesMap, err := o.resourceRuleCache.ListRules(ruler, ruleNamespace,
labels.SelectorFromSet(labels.Set{customRuleResourceLabelKeyLevel: string(level)}))
if err != nil {
return nil, err
}
ruleGroups, err := o.ruleClient.ThanosRules(ctx)
if err != nil {
return nil, err
}
return rules.MixAlertingRules(ruleNamespace.Name, &rules.ResourceRuleChunk{
ResourceRulesMap: resourceRulesMap,
Custom: true,
Level: level,
}, ruleGroups, ruler.ExternalLabels())
}
func (o *operator) getCustomAlertingRule(ctx context.Context, ruleNamespace *corev1.Namespace,
ruleName string, level v1alpha1.RuleLevel) (*v1alpha1.GettableAlertingRule, error) {
ruler, err := o.getThanosRuler()
if err != nil {
return nil, err
}
if ruler == nil {
return nil, v1alpha1.ErrThanosRulerNotEnabled
}
resourceRule, err := o.resourceRuleCache.GetRule(ruler, ruleNamespace,
labels.SelectorFromSet(labels.Set{customRuleResourceLabelKeyLevel: string(level)}), ruleName)
if err != nil {
return nil, err
}
if resourceRule == nil {
return nil, v1alpha1.ErrAlertingRuleNotFound
}
ruleGroups, err := o.ruleClient.ThanosRules(ctx)
if err != nil {
return nil, err
}
return rules.MixAlertingRule(ruleNamespace.Name, &rules.ResourceRuleSole{
ResourceRule: *resourceRule,
Custom: true,
Level: level,
}, ruleGroups, ruler.ExternalLabels())
}
func (o *operator) listBuiltinAlertingRules(ctx context.Context) (
[]*v1alpha1.GettableAlertingRule, error) {
ruler, err := o.getPrometheusRuler()
if err != nil {
return nil, err
}
ruleGroups, err := o.ruleClient.PrometheusRules(ctx)
if err != nil {
return nil, err
}
if ruler == nil {
// for out-cluster prometheus
return rules.ParseAlertingRules(ruleGroups, false, v1alpha1.RuleLevelCluster,
func(group, id string, rule *customalerting.AlertingRule) bool {
return true
})
}
namespace := rulerNamespace
ruleNamespace, err := o.namespaceInformer.Lister().Get(namespace)
if err != nil {
return nil, err
}
resourceRulesMap, err := o.resourceRuleCache.ListRules(ruler, ruleNamespace, nil)
if err != nil {
return nil, err
}
return rules.MixAlertingRules(ruleNamespace.Name, &rules.ResourceRuleChunk{
ResourceRulesMap: resourceRulesMap,
Custom: false,
Level: v1alpha1.RuleLevelCluster,
}, ruleGroups, ruler.ExternalLabels())
}
func (o *operator) getBuiltinAlertingRule(ctx context.Context, ruleId string) (*v1alpha1.GettableAlertingRule, error) {
ruler, err := o.getPrometheusRuler()
if err != nil {
return nil, err
}
ruleGroups, err := o.ruleClient.PrometheusRules(ctx)
if err != nil {
return nil, err
}
if ruler == nil {
// for out-cluster prometheus
alertingRules, err := rules.ParseAlertingRules(ruleGroups, false, v1alpha1.RuleLevelCluster,
func(group, id string, rule *customalerting.AlertingRule) bool {
return ruleId == id
})
if err != nil {
return nil, err
}
if len(alertingRules) == 0 {
return nil, v1alpha1.ErrAlertingRuleNotFound
}
sort.Slice(alertingRules, func(i, j int) bool {
return v1alpha1.AlertingRuleIdCompare(alertingRules[i].Id, alertingRules[j].Id)
})
return alertingRules[0], nil
}
namespace := rulerNamespace
ruleNamespace, err := o.namespaceInformer.Lister().Get(namespace)
if err != nil {
return nil, err
}
resourceRule, err := o.resourceRuleCache.GetRule(ruler, ruleNamespace, nil, ruleId)
if err != nil {
return nil, err
}
if resourceRule == nil {
return nil, v1alpha1.ErrAlertingRuleNotFound
}
return rules.MixAlertingRule(ruleNamespace.Name, &rules.ResourceRuleSole{
ResourceRule: *resourceRule,
Custom: false,
Level: v1alpha1.RuleLevelCluster,
}, ruleGroups, ruler.ExternalLabels())
}
func (o *operator) CreateCustomAlertingRule(ctx context.Context, namespace string,
rule *v1alpha1.PostableAlertingRule) error {
ruler, err := o.getThanosRuler()
if err != nil {
return err
}
if ruler == nil {
return v1alpha1.ErrThanosRulerNotEnabled
}
var (
level v1alpha1.RuleLevel
ruleResourceLabels = make(map[string]string)
)
for k, v := range o.thanosRuleResourceLabels {
ruleResourceLabels[k] = v
}
if namespace == "" {
namespace = rulerNamespace
level = v1alpha1.RuleLevelCluster
} else {
level = v1alpha1.RuleLevelNamespace
expr, err := rules.InjectExprNamespaceLabel(rule.Query, namespace)
if err != nil {
return err
}
rule.Query = expr
}
ruleResourceLabels[customRuleResourceLabelKeyLevel] = string(level)
ruleNamespace, err := o.namespaceInformer.Lister().Get(namespace)
if err != nil {
return err
}
extraRuleResourceSelector := labels.SelectorFromSet(labels.Set{customRuleResourceLabelKeyLevel: string(level)})
resourceRule, err := o.resourceRuleCache.GetRule(ruler, ruleNamespace, extraRuleResourceSelector, rule.Name)
if err != nil {
return err
}
if resourceRule != nil {
return v1alpha1.ErrAlertingRuleAlreadyExists
}
return ruler.AddAlertingRule(ctx, ruleNamespace, extraRuleResourceSelector,
customRuleGroupDefault, parseToPrometheusRule(rule), ruleResourceLabels)
}
func (o *operator) UpdateCustomAlertingRule(ctx context.Context, namespace, name string,
rule *v1alpha1.PostableAlertingRule) error {
rule.Name = name
ruler, err := o.getThanosRuler()
if err != nil {
return err
}
if ruler == nil {
return v1alpha1.ErrThanosRulerNotEnabled
}
var (
level v1alpha1.RuleLevel
ruleResourceLabels = make(map[string]string)
)
for k, v := range o.thanosRuleResourceLabels {
ruleResourceLabels[k] = v
}
if namespace == "" {
namespace = rulerNamespace
level = v1alpha1.RuleLevelCluster
} else {
level = v1alpha1.RuleLevelNamespace
expr, err := rules.InjectExprNamespaceLabel(rule.Query, namespace)
if err != nil {
return err
}
rule.Query = expr
}
ruleResourceLabels[customRuleResourceLabelKeyLevel] = string(level)
ruleNamespace, err := o.namespaceInformer.Lister().Get(namespace)
if err != nil {
return err
}
extraRuleResourceSelector := labels.SelectorFromSet(labels.Set{customRuleResourceLabelKeyLevel: string(level)})
resourceRule, err := o.resourceRuleCache.GetRule(ruler, ruleNamespace, extraRuleResourceSelector, rule.Name)
if err != nil {
return err
}
if resourceRule == nil {
return v1alpha1.ErrAlertingRuleNotFound
}
return ruler.UpdateAlertingRule(ctx, ruleNamespace, extraRuleResourceSelector,
resourceRule.Group, parseToPrometheusRule(rule), ruleResourceLabels)
}
func (o *operator) DeleteCustomAlertingRule(ctx context.Context, namespace, name string) error {
ruler, err := o.getThanosRuler()
if err != nil {
return err
}
if ruler == nil {
return v1alpha1.ErrThanosRulerNotEnabled
}
var (
level v1alpha1.RuleLevel
)
if namespace == "" {
namespace = rulerNamespace
level = v1alpha1.RuleLevelCluster
} else {
level = v1alpha1.RuleLevelNamespace
}
ruleNamespace, err := o.namespaceInformer.Lister().Get(namespace)
if err != nil {
return err
}
extraRuleResourceSelector := labels.SelectorFromSet(labels.Set{customRuleResourceLabelKeyLevel: string(level)})
resourceRule, err := o.resourceRuleCache.GetRule(ruler, ruleNamespace, extraRuleResourceSelector, name)
if err != nil {
return err
}
if resourceRule == nil {
return v1alpha1.ErrAlertingRuleNotFound
}
return ruler.DeleteAlertingRule(ctx, ruleNamespace, extraRuleResourceSelector, resourceRule.Group, name)
}
// getPrometheusRuler gets the cluster-in prometheus
func (o *operator) getPrometheusRuler() (rules.Ruler, error) {
prometheuses, err := o.prometheusInformer.Lister().Prometheuses(rulerNamespace).List(labels.Everything())
if err != nil {
return nil, errors.Wrap(err, "error listing prometheuses")
}
if len(prometheuses) > 1 {
// it is not supported temporarily to have multiple prometheuses in the monitoring namespace
return nil, errors.Errorf(
"there is more than one prometheus custom resource in %s", rulerNamespace)
}
if len(prometheuses) == 0 {
return nil, nil
}
return rules.NewPrometheusRuler(prometheuses[0], o.ruleResourceInformer, o.promResourceClient), nil
}
func (o *operator) getThanosRuler() (rules.Ruler, error) {
thanosrulers, err := o.thanosRulerInformer.Lister().ThanosRulers(rulerNamespace).List(labels.Everything())
if err != nil {
return nil, errors.Wrap(err, "error listing thanosrulers: ")
}
if len(thanosrulers) > 1 {
// it is not supported temporarily to have multiple thanosrulers in the monitoring namespace
return nil, errors.Errorf(
"there is more than one thanosruler custom resource in %s", rulerNamespace)
}
if len(thanosrulers) == 0 {
// if there is no thanos ruler, custom rules will not be supported
return nil, nil
}
return rules.NewThanosRuler(thanosrulers[0], o.ruleResourceInformer, o.promResourceClient), nil
}
func parseToPrometheusRule(rule *v1alpha1.PostableAlertingRule) *promresourcesv1.Rule {
lbls := rule.Labels
lbls[rules.LabelKeyInternalRuleAlias] = rule.Alias
lbls[rules.LabelKeyInternalRuleDescription] = rule.Description
return &promresourcesv1.Rule{
Alert: rule.Name,
Expr: intstr.FromString(rule.Query),
For: rule.Duration,
Labels: lbls,
Annotations: rule.Annotations,
}
}
func pageAlertingRules(alertingRules []*v1alpha1.GettableAlertingRule,
queryParams *v1alpha1.AlertingRuleQueryParams) *v1alpha1.GettableAlertingRuleList {
alertingRules = queryParams.Filter(alertingRules)
queryParams.Sort(alertingRules)
return &v1alpha1.GettableAlertingRuleList{
Total: len(alertingRules),
Items: queryParams.Sub(alertingRules),
}
}
func pageAlerts(alertingRules []*v1alpha1.GettableAlertingRule,
queryParams *v1alpha1.AlertQueryParams) *v1alpha1.AlertList {
var alerts []*v1alpha1.Alert
for _, rule := range alertingRules {
alerts = append(alerts, queryParams.Filter(rule.Alerts)...)
}
queryParams.Sort(alerts)
return &v1alpha1.AlertList{
Total: len(alerts),
Items: queryParams.Sub(alerts),
}
}

View File

@@ -0,0 +1,246 @@
package rules
import (
"sort"
"sync"
promresourcesv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
prominformersv1 "github.com/prometheus-operator/prometheus-operator/pkg/client/informers/externalversions/monitoring/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/tools/cache"
"kubesphere.io/kubesphere/pkg/api/customalerting/v1alpha1"
"kubesphere.io/kubesphere/pkg/server/errors"
)
// RuleCache caches all rules from the prometheusrule custom resources
type RuleCache struct {
lock sync.RWMutex
namespaces map[string]*namespaceRuleCache
}
func NewRuleCache(ruleResourceInformer prominformersv1.PrometheusRuleInformer) *RuleCache {
rc := RuleCache{
namespaces: make(map[string]*namespaceRuleCache),
}
ruleResourceInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: rc.addCache,
UpdateFunc: func(oldObj, newObj interface{}) {
rc.addCache(newObj)
},
DeleteFunc: rc.deleteCache,
})
return &rc
}
func (c *RuleCache) addCache(referObj interface{}) {
pr, ok := referObj.(*promresourcesv1.PrometheusRule)
if !ok {
return
}
cr := parseRuleResource(pr)
c.lock.Lock()
defer c.lock.Unlock()
cn, ok := c.namespaces[pr.Namespace]
if !ok || cn == nil {
cn = &namespaceRuleCache{
namespace: pr.Namespace,
resources: make(map[string]*resourceRuleCache),
}
c.namespaces[pr.Namespace] = cn
}
cn.resources[pr.Name] = cr
}
func (c *RuleCache) deleteCache(referObj interface{}) {
pr, ok := referObj.(*promresourcesv1.PrometheusRule)
if !ok {
return
}
c.lock.Lock()
defer c.lock.Unlock()
cn, ok := c.namespaces[pr.Namespace]
if !ok {
return
}
delete(cn.resources, pr.Name)
if len(cn.resources) == 0 {
delete(c.namespaces, pr.Namespace)
}
}
func (c *RuleCache) getResourceRuleCaches(ruler Ruler, ruleNamespace *corev1.Namespace,
extraRuleResourceSelector labels.Selector) (map[string]*resourceRuleCache, error) {
selected, err := ruleNamespaceSelected(ruler, ruleNamespace)
if err != nil {
return nil, err
}
if !selected {
return nil, nil
}
rSelector, err := ruler.RuleResourceSelector(extraRuleResourceSelector)
if err != nil {
return nil, err
}
var m = make(map[string]*resourceRuleCache)
c.lock.RLock()
defer c.lock.RUnlock()
cn, ok := c.namespaces[ruleNamespace.Name]
if ok && cn != nil {
for _, cr := range cn.resources {
if rSelector.Matches(labels.Set(cr.Labels)) {
m[cr.Name] = cr
}
}
}
return m, nil
}
func (c *RuleCache) GetRule(ruler Ruler, ruleNamespace *corev1.Namespace,
extraRuleResourceSelector labels.Selector, idOrName string) (*ResourceRule, error) {
caches, err := c.getResourceRuleCaches(ruler, ruleNamespace, extraRuleResourceSelector)
if err != nil {
return nil, err
}
if len(caches) == 0 {
return nil, nil
}
var rules []*ResourceRule
switch ruler.(type) {
case *PrometheusRuler:
for rn, rc := range caches {
if rule, ok := rc.IdRules[idOrName]; ok {
rules = append(rules, &ResourceRule{
Group: rule.Group,
Id: rule.Id,
Rule: rule.Rule.DeepCopy(),
ResourceName: rn,
})
}
}
case *ThanosRuler:
for rn, rc := range caches {
if nrules, ok := rc.NameRules[idOrName]; ok {
for _, nrule := range nrules {
rules = append(rules, &ResourceRule{
Group: nrule.Group,
Id: nrule.Id,
Rule: nrule.Rule.DeepCopy(),
ResourceName: rn,
})
}
}
}
default:
return nil, errors.New("unsupported ruler type")
}
if l := len(rules); l == 0 {
return nil, nil
} else if l > 1 {
// guarantees the stability of the get operations.
sort.Slice(rules, func(i, j int) bool {
return v1alpha1.AlertingRuleIdCompare(rules[i].Id, rules[j].Id)
})
}
return rules[0], nil
}
func (c *RuleCache) ListRules(ruler Ruler, ruleNamespace *corev1.Namespace,
extraRuleResourceSelector labels.Selector) (map[string]*ResourceRules, error) {
caches, err := c.getResourceRuleCaches(ruler, ruleNamespace, extraRuleResourceSelector)
if err != nil {
return nil, err
}
if len(caches) == 0 {
return nil, nil
}
ret := make(map[string]*ResourceRules)
for rn, rc := range caches {
rrs := &ResourceRules{
GroupSet: make(map[string]struct{}),
IdRules: make(map[string]*ResourceRule),
NameRules: make(map[string][]*ResourceRule),
}
for name, rules := range rc.NameRules {
for _, rule := range rules {
rrs.GroupSet[rule.Group] = struct{}{}
rr := &ResourceRule{
Group: rule.Group,
Id: rule.Id,
Rule: rule.Rule.DeepCopy(),
ResourceName: rn,
}
rrs.IdRules[rr.Id] = rr
rrs.NameRules[name] = append(rrs.NameRules[name], rr)
}
}
if len(rrs.IdRules) > 0 {
ret[rn] = rrs
}
}
return ret, nil
}
type namespaceRuleCache struct {
namespace string
resources map[string]*resourceRuleCache
}
type resourceRuleCache struct {
Name string
Labels map[string]string
GroupSet map[string]struct{}
IdRules map[string]*cacheRule
NameRules map[string][]*cacheRule
}
type cacheRule struct {
Group string
Id string
Rule *promresourcesv1.Rule
}
func parseRuleResource(pr *promresourcesv1.PrometheusRule) *resourceRuleCache {
var (
groupSet = make(map[string]struct{})
idRules = make(map[string]*cacheRule)
nameRules = make(map[string][]*cacheRule)
)
for i := 0; i < len(pr.Spec.Groups); i++ {
g := pr.Spec.Groups[i]
for j := 0; j < len(g.Rules); j++ {
gr := g.Rules[j]
if gr.Alert == "" {
continue
}
groupSet[g.Name] = struct{}{}
cr := &cacheRule{
Group: g.Name,
Id: GenResourceRuleIdIgnoreFormat(g.Name, &gr),
Rule: &gr,
}
nameRules[cr.Rule.Alert] = append(nameRules[cr.Rule.Alert], cr)
idRules[cr.Id] = cr
}
}
return &resourceRuleCache{
Name: pr.Name,
Labels: pr.Labels,
GroupSet: groupSet,
IdRules: idRules,
NameRules: nameRules,
}
}

View File

@@ -0,0 +1,31 @@
package rules
import (
promresourcesv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
"kubesphere.io/kubesphere/pkg/api/customalerting/v1alpha1"
)
type ResourceRules struct {
GroupSet map[string]struct{}
IdRules map[string]*ResourceRule
NameRules map[string][]*ResourceRule
}
type ResourceRule struct {
ResourceName string
Group string
Id string
Rule *promresourcesv1.Rule
}
type ResourceRuleSole struct {
Level v1alpha1.RuleLevel
Custom bool
ResourceRule
}
type ResourceRuleChunk struct {
Level v1alpha1.RuleLevel
Custom bool
ResourceRulesMap map[string]*ResourceRules
}

View File

@@ -0,0 +1,501 @@
package rules
import (
"context"
"fmt"
"sort"
"github.com/ghodss/yaml"
"github.com/pkg/errors"
promresourcesv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
prominformersv1 "github.com/prometheus-operator/prometheus-operator/pkg/client/informers/externalversions/monitoring/v1"
promresourcesclient "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"kubesphere.io/kubesphere/pkg/api/customalerting/v1alpha1"
)
const (
customAlertingRuleResourcePrefix = "custom-alerting-rule-"
)
var (
maxSecretSize = corev1.MaxSecretSize
maxConfigMapDataSize = int(float64(maxSecretSize) * 0.3)
errOutOfConfigMapSize = errors.New("out of config map size")
)
type Ruler interface {
Namespace() string
RuleResourceNamespaceSelector() (labels.Selector, error)
RuleResourceSelector(extraRuleResourceSelector labels.Selector) (labels.Selector, error)
ExternalLabels() func() map[string]string
ListRuleResources(ruleNamespace *corev1.Namespace, extraRuleResourceSelector labels.Selector) (
[]*promresourcesv1.PrometheusRule, error)
AddAlertingRule(ctx context.Context, ruleNamespace *corev1.Namespace, extraRuleResourceSelector labels.Selector,
group string, rule *promresourcesv1.Rule, ruleResourceLabels map[string]string) error
UpdateAlertingRule(ctx context.Context, ruleNamespace *corev1.Namespace, extraRuleResourceSelector labels.Selector,
group string, rule *promresourcesv1.Rule, ruleResourceLabels map[string]string) error
DeleteAlertingRule(ctx context.Context, ruleNamespace *corev1.Namespace, extraRuleResourceSelector labels.Selector,
group string, name string) error
}
type ruleResource promresourcesv1.PrometheusRule
// deleteAlertingRule deletes the rules with the given name.
// If the rule is deleted, return true to indicate the resource should be updated.
func (r *ruleResource) deleteAlertingRule(name string) (bool, error) {
var (
nGroups []promresourcesv1.RuleGroup
ok bool
)
for _, g := range r.Spec.Groups {
var rules []promresourcesv1.Rule
for _, gr := range g.Rules {
if gr.Alert != "" && gr.Alert == name {
ok = true
continue
}
rules = append(rules, gr)
}
if len(rules) > 0 {
nGroups = append(nGroups, promresourcesv1.RuleGroup{
Name: g.Name,
Interval: g.Interval,
PartialResponseStrategy: g.PartialResponseStrategy,
Rules: rules,
})
}
}
if ok {
r.Spec.Groups = nGroups
}
return ok, nil
}
// updateAlertingRule updates the rule with the given group.
// If the rule is updated, return true to indicate the resource should be updated.
func (r *ruleResource) updateAlertingRule(groupName string, rule *promresourcesv1.Rule) (bool, error) {
var (
ok bool
pr = (promresourcesv1.PrometheusRule)(*r)
npr = pr.DeepCopy()
groupMap = make(map[string]*promresourcesv1.RuleGroup)
)
for _, g := range npr.Spec.Groups {
var rules []promresourcesv1.Rule
for i, gr := range g.Rules {
if gr.Alert != "" && gr.Alert == rule.Alert {
ok = true
continue
}
rules = append(rules, g.Rules[i])
}
if len(rules) > 0 {
groupMap[g.Name] = &promresourcesv1.RuleGroup{
Name: g.Name,
Interval: g.Interval,
PartialResponseStrategy: g.PartialResponseStrategy,
Rules: rules,
}
}
}
if ok {
if g, exist := groupMap[groupName]; exist {
g.Rules = append(g.Rules, *rule)
} else {
groupMap[groupName] = &promresourcesv1.RuleGroup{
Name: groupName,
Rules: []promresourcesv1.Rule{*rule},
}
}
var groups []promresourcesv1.RuleGroup
for _, g := range groupMap {
groups = append(groups, *g)
}
npr.Spec.Groups = groups
content, err := yaml.Marshal(npr)
if err != nil {
return false, errors.Wrap(err, "failed to unmarshal content")
}
if len(string(content)) < maxConfigMapDataSize { // check size limit
r.Spec.Groups = groups
return true, nil
}
return false, errOutOfConfigMapSize
}
return false, nil
}
func (r *ruleResource) addAlertingRule(group string, rule *promresourcesv1.Rule) (bool, error) {
var (
err error
pr = (promresourcesv1.PrometheusRule)(*r)
npr = pr.DeepCopy()
ok bool
)
for i := 0; i < len(npr.Spec.Groups); i++ {
if npr.Spec.Groups[i].Name == group {
npr.Spec.Groups[i].Rules = append(npr.Spec.Groups[i].Rules, *rule)
ok = true
break
}
}
if !ok { // add a group when there is no group with the specified group name
npr.Spec.Groups = append(npr.Spec.Groups, promresourcesv1.RuleGroup{
Name: group,
Rules: []promresourcesv1.Rule{*rule},
})
}
content, err := yaml.Marshal(npr)
if err != nil {
return false, errors.Wrap(err, "failed to unmarshal content")
}
if len(string(content)) < maxConfigMapDataSize { // check size limit
r.Spec.Groups = npr.Spec.Groups
return true, nil
} else {
return false, errOutOfConfigMapSize
}
}
func (r *ruleResource) commit(ctx context.Context, prometheusResourceClient promresourcesclient.Interface) error {
var pr = (promresourcesv1.PrometheusRule)(*r)
if len(pr.Spec.Groups) == 0 {
return prometheusResourceClient.MonitoringV1().PrometheusRules(r.Namespace).Delete(ctx, r.Name, metav1.DeleteOptions{})
}
newPr, err := prometheusResourceClient.MonitoringV1().PrometheusRules(r.Namespace).Update(ctx, &pr, metav1.UpdateOptions{})
if err != nil {
return err
}
newPr.DeepCopyInto(&pr)
return nil
}
type PrometheusRuler struct {
resource *promresourcesv1.Prometheus
informer prominformersv1.PrometheusRuleInformer
client promresourcesclient.Interface
}
func NewPrometheusRuler(resource *promresourcesv1.Prometheus, informer prominformersv1.PrometheusRuleInformer,
client promresourcesclient.Interface) Ruler {
return &PrometheusRuler{
resource: resource,
informer: informer,
client: client,
}
}
func (r *PrometheusRuler) Namespace() string {
return r.resource.Namespace
}
func (r *PrometheusRuler) RuleResourceNamespaceSelector() (labels.Selector, error) {
if r.resource.Spec.RuleNamespaceSelector == nil {
return nil, nil
}
return metav1.LabelSelectorAsSelector(r.resource.Spec.RuleNamespaceSelector)
}
func (r *PrometheusRuler) RuleResourceSelector(extraRuleResourceSelector labels.Selector) (labels.Selector, error) {
rSelector, err := metav1.LabelSelectorAsSelector(r.resource.Spec.RuleSelector)
if err != nil {
return nil, err
}
if extraRuleResourceSelector != nil {
if requirements, ok := extraRuleResourceSelector.Requirements(); ok {
rSelector = rSelector.Add(requirements...)
}
}
return rSelector, nil
}
func (r *PrometheusRuler) ExternalLabels() func() map[string]string {
// ignoring the external labels because rules gotten from prometheus endpoint do not include them
return nil
}
func (r *PrometheusRuler) ListRuleResources(ruleNamespace *corev1.Namespace, extraRuleResourceSelector labels.Selector) (
[]*promresourcesv1.PrometheusRule, error) {
selected, err := ruleNamespaceSelected(r, ruleNamespace)
if err != nil {
return nil, err
}
if !selected {
return nil, nil
}
rSelector, err := r.RuleResourceSelector(extraRuleResourceSelector)
if err != nil {
return nil, err
}
return r.informer.Lister().PrometheusRules(ruleNamespace.Name).List(rSelector)
}
func (r *PrometheusRuler) AddAlertingRule(ctx context.Context, ruleNamespace *corev1.Namespace,
extraRuleResourceSelector labels.Selector,
group string, rule *promresourcesv1.Rule, ruleResourceLabels map[string]string) error {
return errors.New("not supported to add rules for prometheus")
}
func (r *PrometheusRuler) UpdateAlertingRule(ctx context.Context, ruleNamespace *corev1.Namespace,
extraRuleResourceSelector labels.Selector,
group string, rule *promresourcesv1.Rule, ruleResourceLabels map[string]string) error {
return errors.New("not supported to update rules for prometheus")
}
func (r *PrometheusRuler) DeleteAlertingRule(ctx context.Context, ruleNamespace *corev1.Namespace,
extraRuleResourceSelector labels.Selector,
group string, name string) error {
return errors.New("not supported to update rules for prometheus")
}
type ThanosRuler struct {
resource *promresourcesv1.ThanosRuler
informer prominformersv1.PrometheusRuleInformer
client promresourcesclient.Interface
}
func NewThanosRuler(resource *promresourcesv1.ThanosRuler, informer prominformersv1.PrometheusRuleInformer,
client promresourcesclient.Interface) Ruler {
return &ThanosRuler{
resource: resource,
informer: informer,
client: client,
}
}
func (r *ThanosRuler) Namespace() string {
return r.resource.Namespace
}
func (r *ThanosRuler) RuleResourceNamespaceSelector() (labels.Selector, error) {
if r.resource.Spec.RuleNamespaceSelector == nil {
return nil, nil
}
return metav1.LabelSelectorAsSelector(r.resource.Spec.RuleNamespaceSelector)
}
func (r *ThanosRuler) RuleResourceSelector(extraRuleSelector labels.Selector) (labels.Selector, error) {
rSelector, err := metav1.LabelSelectorAsSelector(r.resource.Spec.RuleSelector)
if err != nil {
return nil, err
}
if extraRuleSelector != nil {
if requirements, ok := extraRuleSelector.Requirements(); ok {
rSelector = rSelector.Add(requirements...)
}
}
return rSelector, nil
}
func (r *ThanosRuler) ExternalLabels() func() map[string]string {
// rules gotten from thanos ruler endpoint include the labels
lbls := make(map[string]string)
if ls := r.resource.Spec.Labels; ls != nil {
for k, v := range ls {
lbls[k] = v
}
}
return func() map[string]string {
return lbls
}
}
func (r *ThanosRuler) ListRuleResources(ruleNamespace *corev1.Namespace, extraRuleSelector labels.Selector) (
[]*promresourcesv1.PrometheusRule, error) {
selected, err := ruleNamespaceSelected(r, ruleNamespace)
if err != nil {
return nil, err
}
if !selected {
return nil, nil
}
rSelector, err := r.RuleResourceSelector(extraRuleSelector)
if err != nil {
return nil, err
}
return r.informer.Lister().PrometheusRules(ruleNamespace.Name).List(rSelector)
}
func (r *ThanosRuler) AddAlertingRule(ctx context.Context, ruleNamespace *corev1.Namespace,
extraRuleResourceSelector labels.Selector,
group string, rule *promresourcesv1.Rule, ruleResourceLabels map[string]string) error {
prometheusRules, err := r.ListRuleResources(ruleNamespace, extraRuleResourceSelector)
if err != nil {
return err
}
return r.addAlertingRule(ctx, ruleNamespace, prometheusRules, nil, group, rule, ruleResourceLabels)
}
func (r *ThanosRuler) addAlertingRule(ctx context.Context, ruleNamespace *corev1.Namespace,
prometheusRules []*promresourcesv1.PrometheusRule, excludeRuleResources map[string]*ruleResource,
group string, rule *promresourcesv1.Rule, ruleResourceLabels map[string]string) error {
sort.Slice(prometheusRules, func(i, j int) bool {
return len(fmt.Sprint(prometheusRules[i])) <= len(fmt.Sprint(prometheusRules[j]))
})
for _, prometheusRule := range prometheusRules {
if len(excludeRuleResources) > 0 {
if _, ok := excludeRuleResources[prometheusRule.Name]; ok {
continue
}
}
resource := ruleResource(*prometheusRule)
if ok, err := resource.addAlertingRule(group, rule); err != nil {
if err == errOutOfConfigMapSize {
break
}
return err
} else if ok {
if err = resource.commit(ctx, r.client); err != nil {
return err
}
return nil
}
}
// create a new rule resource and add rule into it when all existing rule resources are full.
newPromRule := promresourcesv1.PrometheusRule{
ObjectMeta: metav1.ObjectMeta{
Namespace: ruleNamespace.Name,
GenerateName: customAlertingRuleResourcePrefix,
Labels: ruleResourceLabels,
},
Spec: promresourcesv1.PrometheusRuleSpec{
Groups: []promresourcesv1.RuleGroup{{
Name: group,
Rules: []promresourcesv1.Rule{*rule},
}},
},
}
if _, err := r.client.MonitoringV1().
PrometheusRules(ruleNamespace.Name).Create(ctx, &newPromRule, metav1.CreateOptions{}); err != nil {
return errors.Wrapf(err, "error creating a prometheusrule resource %s/%s",
newPromRule.Namespace, newPromRule.Name)
}
return nil
}
func (r *ThanosRuler) UpdateAlertingRule(ctx context.Context, ruleNamespace *corev1.Namespace,
extraRuleResourceSelector labels.Selector,
group string, rule *promresourcesv1.Rule, ruleResourceLabels map[string]string) error {
prometheusRules, err := r.ListRuleResources(ruleNamespace, extraRuleResourceSelector)
if err != nil {
return err
}
var (
found bool
success bool
resourcesToDelRule = make(map[string]*ruleResource)
)
for _, prometheusRule := range prometheusRules {
resource := ruleResource(*prometheusRule)
if success { // If the update has been successful, delete the possible same rule in other resources
if ok, err := resource.deleteAlertingRule(rule.Alert); err != nil {
return err
} else if ok {
if err = resource.commit(ctx, r.client); err != nil {
return err
}
}
continue
}
if ok, err := resource.updateAlertingRule(group, rule); err != nil {
if err == errOutOfConfigMapSize {
// updating the rule in the resource will oversize the size limit,
// so delete it and then add the new rule to a new resource.
resourcesToDelRule[resource.Name] = &resource
found = true
} else {
return err
}
} else if ok {
if err = resource.commit(ctx, r.client); err != nil {
return err
}
found = true
success = true
}
}
if !found {
return v1alpha1.ErrAlertingRuleNotFound
}
if !success {
err := r.addAlertingRule(ctx, ruleNamespace, prometheusRules, resourcesToDelRule, group, rule, ruleResourceLabels)
if err != nil {
return err
}
}
for _, resource := range resourcesToDelRule {
if ok, err := resource.deleteAlertingRule(rule.Alert); err != nil {
return err
} else if ok {
if err = resource.commit(ctx, r.client); err != nil {
return err
}
}
}
return nil
}
func (r *ThanosRuler) DeleteAlertingRule(ctx context.Context, ruleNamespace *corev1.Namespace,
extraRuleResourceSelector labels.Selector, group string, name string) error {
prometheusRules, err := r.ListRuleResources(ruleNamespace, extraRuleResourceSelector)
if err != nil {
return err
}
var success bool
for _, prometheusRule := range prometheusRules {
resource := ruleResource(*prometheusRule)
if ok, err := resource.deleteAlertingRule(name); err != nil {
return err
} else if ok {
if err = resource.commit(ctx, r.client); err != nil {
return err
}
success = true
}
}
if !success {
return v1alpha1.ErrAlertingRuleNotFound
}
return nil
}
func ruleNamespaceSelected(r Ruler, ruleNamespace *corev1.Namespace) (bool, error) {
rnSelector, err := r.RuleResourceNamespaceSelector()
if err != nil {
return false, err
}
if rnSelector == nil { // refer to the comment of Prometheus.Spec.RuleResourceNamespaceSelector
if r.Namespace() != ruleNamespace.Name {
return false, nil
}
} else {
if !rnSelector.Matches(labels.Set(ruleNamespace.Labels)) {
return false, nil
}
}
return true, nil
}

View File

@@ -0,0 +1,421 @@
package rules
import (
"path/filepath"
"sort"
"strings"
"time"
"github.com/pkg/errors"
"github.com/prometheus-community/prom-label-proxy/injectproxy"
promresourcesv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
prommodel "github.com/prometheus/common/model"
promlabels "github.com/prometheus/prometheus/pkg/labels"
"github.com/prometheus/prometheus/promql/parser"
"github.com/prometheus/prometheus/rules"
"k8s.io/klog"
"kubesphere.io/kubesphere/pkg/api/customalerting/v1alpha1"
"kubesphere.io/kubesphere/pkg/simple/client/customalerting"
)
const (
ErrGenRuleId = "error generating rule id"
LabelKeyInternalRuleGroup = "__rule_group__"
LabelKeyInternalRuleName = "__rule_name__"
LabelKeyInternalRuleQuery = "__rule_query__"
LabelKeyInternalRuleDuration = "__rule_duration__"
LabelKeyInternalRuleAlias = "__rule_alias__"
LabelKeyInternalRuleDescription = "__rule_description__"
)
func FormatExpr(expr string) (string, error) {
parsedExpr, err := parser.ParseExpr(expr)
if err == nil {
return parsedExpr.String(), nil
}
return "", errors.Wrapf(err, "failed to parse expr: %s", expr)
}
// InjectExprNamespaceLabel injects an label, whose key is "namespace" and whose value is the given namespace,
// into the prometheus query expression, which will limit the query scope.
func InjectExprNamespaceLabel(expr, namespace string) (string, error) {
parsedExpr, err := parser.ParseExpr(expr)
if err != nil {
return "", err
}
if err = injectproxy.NewEnforcer(&promlabels.Matcher{
Type: promlabels.MatchEqual,
Name: "namespace",
Value: namespace,
}).EnforceNode(parsedExpr); err == nil {
return parsedExpr.String(), nil
}
return "", err
}
func FormatDuration(for_ string) (string, error) {
var duration prommodel.Duration
var err error
if for_ != "" {
duration, err = prommodel.ParseDuration(for_)
if err != nil {
return "", errors.Wrapf(err, "failed to parse Duration string(\"%s\") to time.Duration", for_)
}
}
return duration.String(), nil
}
func parseDurationSeconds(durationSeconds float64) string {
return prommodel.Duration(int64(durationSeconds * float64(time.Second))).String()
}
func GenResourceRuleIdIgnoreFormat(group string, rule *promresourcesv1.Rule) string {
query, err := FormatExpr(rule.Expr.String())
if err != nil {
klog.Warning(errors.Wrapf(err, "invalid alerting rule(%s)", rule.Alert))
query = rule.Expr.String()
}
duration, err := FormatDuration(rule.For)
if err != nil {
klog.Warning(errors.Wrapf(err, "invalid alerting rule(%s)", rule.Alert))
duration = rule.For
}
lbls := make(map[string]string)
for k, v := range rule.Labels {
lbls[k] = v
}
lbls[LabelKeyInternalRuleGroup] = group
lbls[LabelKeyInternalRuleName] = rule.Alert
lbls[LabelKeyInternalRuleQuery] = query
lbls[LabelKeyInternalRuleDuration] = duration
return prommodel.Fingerprint(prommodel.LabelsToSignature(lbls)).String()
}
func GenEndpointRuleId(group string, epRule *customalerting.AlertingRule,
externalLabels func() map[string]string) (string, error) {
query, err := FormatExpr(epRule.Query)
if err != nil {
return "", err
}
duration := parseDurationSeconds(epRule.Duration)
var labelsMap map[string]string
if externalLabels == nil {
labelsMap = epRule.Labels
} else {
labelsMap = make(map[string]string)
extLabels := externalLabels()
for key, value := range epRule.Labels {
if v, ok := extLabels[key]; !(ok && value == v) {
labelsMap[key] = value
}
}
}
lbls := make(map[string]string)
for k, v := range labelsMap {
lbls[k] = v
}
lbls[LabelKeyInternalRuleGroup] = group
lbls[LabelKeyInternalRuleName] = epRule.Name
lbls[LabelKeyInternalRuleQuery] = query
lbls[LabelKeyInternalRuleDuration] = duration
return prommodel.Fingerprint(prommodel.LabelsToSignature(lbls)).String(), nil
}
// MixAlertingRules mix rules from prometheusrule custom resources and rules from endpoints.
// Use rules from prometheusrule custom resources as the main reference.
func MixAlertingRules(ruleNamespace string, ruleChunk *ResourceRuleChunk, epRuleGroups []*customalerting.RuleGroup,
extLabels func() map[string]string) ([]*v1alpha1.GettableAlertingRule, error) {
var (
idEpRules = make(map[string]*customalerting.AlertingRule)
nameIds = make(map[string][]string)
ret []*v1alpha1.GettableAlertingRule
)
for _, group := range epRuleGroups {
fileShort := strings.TrimSuffix(filepath.Base(group.File), filepath.Ext(group.File))
if !strings.HasPrefix(fileShort, ruleNamespace+"-") {
continue
}
resourceRules, ok := ruleChunk.ResourceRulesMap[strings.TrimPrefix(fileShort, ruleNamespace+"-")]
if !ok {
continue
}
if _, ok := resourceRules.GroupSet[group.Name]; !ok {
continue
}
for _, epRule := range group.Rules {
if eid, err := GenEndpointRuleId(group.Name, epRule, extLabels); err != nil {
return nil, errors.Wrap(err, ErrGenRuleId)
} else {
idEpRules[eid] = epRule
nameIds[epRule.Name] = append(nameIds[epRule.Name], eid)
}
}
}
if ruleChunk.Custom {
// guarantee the names of the custom alerting rules not to be repeated
var m = make(map[string][]*ResourceRule)
for _, resourceRules := range ruleChunk.ResourceRulesMap {
for name, rrArr := range resourceRules.NameRules {
m[name] = append(m[name], rrArr...)
}
}
for _, rrArr := range m {
if l := len(rrArr); l > 0 {
if l > 1 {
sort.Slice(rrArr, func(i, j int) bool {
return v1alpha1.AlertingRuleIdCompare(rrArr[i].Id, rrArr[j].Id)
})
}
resRule := rrArr[0]
epRule := idEpRules[resRule.Id]
if r := mixAlertingRule(resRule, epRule, ruleChunk.Custom, ruleChunk.Level); r != nil {
ret = append(ret, r)
}
}
}
} else {
// guarantee the ids of the builtin alerting rules not to be repeated
var m = make(map[string]*v1alpha1.GettableAlertingRule)
for _, resourceRules := range ruleChunk.ResourceRulesMap {
for id, rule := range resourceRules.IdRules {
if r := mixAlertingRule(rule, idEpRules[id], ruleChunk.Custom, ruleChunk.Level); r != nil {
m[id] = r
}
}
}
for _, r := range m {
ret = append(ret, r)
}
}
return ret, nil
}
func MixAlertingRule(ruleNamespace string, rule *ResourceRuleSole, epRuleGroups []*customalerting.RuleGroup,
extLabels func() map[string]string) (*v1alpha1.GettableAlertingRule, error) {
if rule == nil || rule.Rule == nil {
return nil, nil
}
var epRules = make(map[string]*customalerting.AlertingRule)
for _, group := range epRuleGroups {
fileShort := strings.TrimSuffix(filepath.Base(group.File), filepath.Ext(group.File))
if !strings.HasPrefix(fileShort, ruleNamespace+"-") {
continue
}
if strings.TrimPrefix(fileShort, ruleNamespace+"-") != rule.ResourceName {
continue
}
for _, epRule := range group.Rules {
if eid, err := GenEndpointRuleId(group.Name, epRule, extLabels); err != nil {
return nil, errors.Wrap(err, ErrGenRuleId)
} else {
if rule.Rule.Alert == epRule.Name {
epRules[eid] = epRule
}
}
}
}
var epRule *customalerting.AlertingRule
if rule.Custom {
// guarantees the stability of the get operations.
var ids []string
for k, _ := range epRules {
ids = append(ids, k)
}
if l := len(ids); l > 0 {
if l > 1 {
sort.Slice(ids, func(i, j int) bool {
return v1alpha1.AlertingRuleIdCompare(ids[i], ids[j])
})
}
epRule = epRules[ids[0]]
}
} else {
epRule = epRules[rule.Id]
}
return mixAlertingRule(&rule.ResourceRule, epRule, rule.Custom, rule.Level), nil
}
func mixAlertingRule(resRule *ResourceRule, epRule *customalerting.AlertingRule,
custom bool, level v1alpha1.RuleLevel) *v1alpha1.GettableAlertingRule {
if resRule == nil || resRule.Rule == nil {
return nil
}
var (
alias string
descrption string
lbls map[string]string
)
if len(resRule.Rule.Labels) > 0 {
lbls = make(map[string]string)
for k, v := range resRule.Rule.Labels {
switch k {
case LabelKeyInternalRuleAlias:
alias = v
case LabelKeyInternalRuleDescription:
descrption = v
default:
lbls[k] = v
}
}
}
rule := v1alpha1.GettableAlertingRule{
AlertingRuleQualifier: v1alpha1.AlertingRuleQualifier{
Id: resRule.Id,
Name: resRule.Rule.Alert,
Custom: custom,
Level: level,
},
AlertingRuleProps: v1alpha1.AlertingRuleProps{
Query: resRule.Rule.Expr.String(),
Duration: resRule.Rule.For,
Labels: lbls,
Annotations: resRule.Rule.Annotations,
},
Alias: alias,
Description: descrption,
State: stateInactiveString,
Health: string(rules.HealthUnknown),
}
if epRule != nil {
// The state information and alerts associated with the rule are from the rule from the endpoint.
if epRule.Health != "" {
rule.Health = epRule.Health
}
rule.LastError = epRule.LastError
rule.LastEvaluation = epRule.LastEvaluation
rule.EvaluationDurationSeconds = epRule.EvaluationTime
rState := strings.ToLower(epRule.State)
cliRuleStateEmpty := rState == ""
if !cliRuleStateEmpty {
rule.State = rState
}
for _, a := range epRule.Alerts {
aState := strings.ToLower(a.State)
if cliRuleStateEmpty {
// for the rules gotten from prometheus or thanos ruler with a lower version, they may not contain
// the state property, so compute the rule state by states of its alerts
if alertState(rState) < alertState(aState) {
rule.State = aState
}
}
var lbls map[string]string
if len(a.Labels) > 0 {
lbls = make(map[string]string)
for k, v := range a.Labels {
switch k {
case LabelKeyInternalRuleAlias, LabelKeyInternalRuleDescription:
default:
lbls[k] = v
}
}
}
rule.Alerts = append(rule.Alerts, &v1alpha1.Alert{
ActiveAt: a.ActiveAt,
Labels: lbls,
Annotations: a.Annotations,
State: aState,
Value: a.Value,
Rule: &rule.AlertingRuleQualifier,
})
}
}
return &rule
}
func ParseAlertingRules(epRuleGroups []*customalerting.RuleGroup, custom bool, level v1alpha1.RuleLevel,
filterFunc func(group, ruleId string, rule *customalerting.AlertingRule) bool) ([]*v1alpha1.GettableAlertingRule, error) {
var ret []*v1alpha1.GettableAlertingRule
for _, g := range epRuleGroups {
for _, r := range g.Rules {
id, err := GenEndpointRuleId(g.Name, r, nil)
if err != nil {
return nil, err
}
if filterFunc(g.Name, id, r) {
rule := &v1alpha1.GettableAlertingRule{
AlertingRuleQualifier: v1alpha1.AlertingRuleQualifier{
Id: id,
Name: r.Name,
Custom: custom,
Level: level,
},
AlertingRuleProps: v1alpha1.AlertingRuleProps{
Query: r.Query,
Duration: parseDurationSeconds(r.Duration),
Labels: r.Labels,
Annotations: r.Annotations,
},
State: r.State,
Health: string(r.Health),
LastError: r.LastError,
LastEvaluation: r.LastEvaluation,
EvaluationDurationSeconds: r.EvaluationTime,
}
if rule.Health != "" {
rule.Health = string(rules.HealthUnknown)
}
ruleStateEmpty := rule.State == ""
rule.State = stateInactiveString
for _, a := range r.Alerts {
aState := strings.ToLower(a.State)
if ruleStateEmpty {
// for the rules gotten from prometheus or thanos ruler with a lower version, they may not contain
// the state property, so compute the rule state by states of its alerts
if alertState(rule.State) < alertState(aState) {
rule.State = aState
}
}
rule.Alerts = append(rule.Alerts, &v1alpha1.Alert{
ActiveAt: a.ActiveAt,
Labels: a.Labels,
Annotations: a.Annotations,
State: aState,
Value: a.Value,
Rule: &rule.AlertingRuleQualifier,
})
}
ret = append(ret, rule)
}
}
}
return ret, nil
}
var (
statePendingString = rules.StatePending.String()
stateFiringString = rules.StateFiring.String()
stateInactiveString = rules.StateInactive.String()
)
func alertState(state string) rules.AlertState {
switch state {
case statePendingString:
return rules.StatePending
case stateFiringString:
return rules.StateFiring
case stateInactiveString:
return rules.StateInactive
}
return rules.StateInactive
}

View File

@@ -0,0 +1,95 @@
package rules
import (
"testing"
"github.com/google/go-cmp/cmp"
promresourcesv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
"github.com/prometheus/prometheus/rules"
"k8s.io/apimachinery/pkg/util/intstr"
"kubesphere.io/kubesphere/pkg/api/customalerting/v1alpha1"
"kubesphere.io/kubesphere/pkg/simple/client/customalerting"
)
func TestMixAlertingRules(t *testing.T) {
var tests = []struct {
description string
ruleNamespace string
resourceRuleChunk *ResourceRuleChunk
ruleGroups []*customalerting.RuleGroup
extLabels func() map[string]string
expected []*v1alpha1.GettableAlertingRule
}{{
description: "mix custom rules",
ruleNamespace: "test",
resourceRuleChunk: &ResourceRuleChunk{
Level: v1alpha1.RuleLevelNamespace,
Custom: true,
ResourceRulesMap: map[string]*ResourceRules{
"custom-alerting-rule-jqbgn": &ResourceRules{
GroupSet: map[string]struct{}{"alerting.custom.defaults": struct{}{}},
NameRules: map[string][]*ResourceRule{
"f89836879157ca88": []*ResourceRule{{
ResourceName: "custom-alerting-rule-jqbgn",
Group: "alerting.custom.defaults",
Id: "f89836879157ca88",
Rule: &promresourcesv1.Rule{
Alert: "TestCPUUsageHigh",
Expr: intstr.FromString(`namespace:workload_cpu_usage:sum{namespace="test"} > 1`),
For: "1m",
Labels: map[string]string{
LabelKeyInternalRuleAlias: "The alias is here",
LabelKeyInternalRuleDescription: "The description is here",
},
},
}},
},
},
},
},
ruleGroups: []*customalerting.RuleGroup{{
Name: "alerting.custom.defaults",
File: "/etc/thanos/rules/thanos-ruler-thanos-ruler-rulefiles-0/test-custom-alerting-rule-jqbgn.yaml",
Rules: []*customalerting.AlertingRule{{
Name: "TestCPUUsageHigh",
Query: `namespace:workload_cpu_usage:sum{namespace="test"} > 1`,
Duration: 60,
Health: string(rules.HealthGood),
State: stateInactiveString,
Labels: map[string]string{
LabelKeyInternalRuleAlias: "The alias is here",
LabelKeyInternalRuleDescription: "The description is here",
},
}},
}},
expected: []*v1alpha1.GettableAlertingRule{{
AlertingRuleQualifier: v1alpha1.AlertingRuleQualifier{
Id: "f89836879157ca88",
Name: "TestCPUUsageHigh",
Level: v1alpha1.RuleLevelNamespace,
Custom: true,
},
AlertingRuleProps: v1alpha1.AlertingRuleProps{
Query: `namespace:workload_cpu_usage:sum{namespace="test"} > 1`,
Duration: "1m",
Labels: map[string]string{},
},
Alias: "The alias is here",
Description: "The description is here",
Health: string(rules.HealthGood),
State: stateInactiveString,
}},
}}
for _, test := range tests {
t.Run(test.description, func(t *testing.T) {
rules, err := MixAlertingRules(test.ruleNamespace, test.resourceRuleChunk, test.ruleGroups, test.extLabels)
if err != nil {
t.Fatal(err)
}
if diff := cmp.Diff(rules, test.expected); diff != "" {
t.Fatalf("%T differ (-got, +want): %s", test.expected, diff)
}
})
}
}

View File

@@ -213,7 +213,7 @@ func prepare() (informers.InformerFactory, error) {
k8sClient := fakek8s.NewSimpleClientset()
istioClient := fakeistio.NewSimpleClientset()
snapshotClient := fakesnapshot.NewSimpleClientset()
fakeInformerFactory := informers.NewInformerFactories(k8sClient, ksClient, istioClient, snapshotClient, nil)
fakeInformerFactory := informers.NewInformerFactories(k8sClient, ksClient, istioClient, snapshotClient, nil, nil)
k8sInformerFactory := fakeInformerFactory.KubernetesSharedInformerFactory()

View File

@@ -108,7 +108,7 @@ func prepare() *ResourceGetter {
istioClient := fakeistio.NewSimpleClientset()
snapshotClient := fakesnapshot.NewSimpleClientset()
apiextensionsClient := fakeapiextensions.NewSimpleClientset()
fakeInformerFactory := informers.NewInformerFactories(k8sClient, ksClient, istioClient, snapshotClient, apiextensionsClient)
fakeInformerFactory := informers.NewInformerFactories(k8sClient, ksClient, istioClient, snapshotClient, apiextensionsClient, nil)
for _, namespace := range namespaces {
fakeInformerFactory.KubernetesSharedInformerFactory().Core().V1().

View File

@@ -491,7 +491,7 @@ func prepare() Interface {
ksClient := fakeks.NewSimpleClientset([]runtime.Object{testWorkspace, systemWorkspace}...)
k8sClient := fakek8s.NewSimpleClientset([]runtime.Object{testNamespace, kubesphereSystem}...)
istioClient := fakeistio.NewSimpleClientset()
fakeInformerFactory := informers.NewInformerFactories(k8sClient, ksClient, istioClient, nil, nil)
fakeInformerFactory := informers.NewInformerFactories(k8sClient, ksClient, istioClient, nil, nil, nil)
for _, workspace := range workspaces {
fakeInformerFactory.KubeSphereSharedInformerFactory().Tenant().V1alpha1().