diff --git a/cmd/controller-manager/app/controllers.go b/cmd/controller-manager/app/controllers.go index a4a3be453..44aec3320 100644 --- a/cmd/controller-manager/app/controllers.go +++ b/cmd/controller-manager/app/controllers.go @@ -31,6 +31,7 @@ import ( "sigs.k8s.io/kubefed/pkg/controller/util" "kubesphere.io/kubesphere/cmd/controller-manager/app/options" + "kubesphere.io/kubesphere/pkg/controller/alerting" "kubesphere.io/kubesphere/pkg/controller/application" "kubesphere.io/kubesphere/pkg/controller/helm" "kubesphere.io/kubesphere/pkg/controller/namespace" @@ -540,6 +541,26 @@ func addAllControllers(mgr manager.Manager, client k8s.Client, informerFactory i } } + // controllers for alerting + alertingOptionsEnable := cmOptions.AlertingOptions != nil && (cmOptions.AlertingOptions.PrometheusEndpoint != "" || cmOptions.AlertingOptions.ThanosRulerEndpoint != "") + if alertingOptionsEnable { + // "rulegroup" controller + if cmOptions.IsControllerEnabled("rulegroup") { + rulegroupReconciler := &alerting.RuleGroupReconciler{} + addControllerWithSetup(mgr, "rulegroup", rulegroupReconciler) + } + // "clusterrulegroup" controller + if cmOptions.IsControllerEnabled("clusterrulegroup") { + clusterrulegroupReconciler := &alerting.ClusterRuleGroupReconciler{} + addControllerWithSetup(mgr, "clusterrulegroup", clusterrulegroupReconciler) + } + // "globalrulegroup" controller + if cmOptions.IsControllerEnabled("globalrulegroup") { + globalrulegroupReconciler := &alerting.GlobalRuleGroupReconciler{} + addControllerWithSetup(mgr, "globalrulegroup", globalrulegroupReconciler) + } + } + // log all controllers process result for _, name := range allControllers { if cmOptions.IsControllerEnabled(name) { diff --git a/cmd/controller-manager/app/options/options.go b/cmd/controller-manager/app/options/options.go index b7dc0bb5b..b453a9ace 100644 --- a/cmd/controller-manager/app/options/options.go +++ b/cmd/controller-manager/app/options/options.go @@ -22,6 +22,7 @@ import ( "strings" "time" + "kubesphere.io/kubesphere/pkg/simple/client/alerting" "kubesphere.io/kubesphere/pkg/simple/client/monitoring/prometheus" controllerconfig "kubesphere.io/kubesphere/pkg/apiserver/config" @@ -60,6 +61,7 @@ type KubeSphereControllerManagerOptions struct { ServiceMeshOptions *servicemesh.Options GatewayOptions *gateway.Options MonitoringOptions *prometheus.Options + AlertingOptions *alerting.Options LeaderElect bool LeaderElection *leaderelection.LeaderElectionConfig WebhookCertDir string @@ -99,6 +101,7 @@ func NewKubeSphereControllerManagerOptions() *KubeSphereControllerManagerOptions ServiceMeshOptions: servicemesh.NewServiceMeshOptions(), AuthenticationOptions: authentication.NewOptions(), GatewayOptions: gateway.NewGatewayOptions(), + AlertingOptions: alerting.NewAlertingOptions(), LeaderElection: &leaderelection.LeaderElectionConfig{ LeaseDuration: 30 * time.Second, RenewDeadline: 15 * time.Second, @@ -126,6 +129,7 @@ func (s *KubeSphereControllerManagerOptions) Flags(allControllerNameSelectors [] s.MultiClusterOptions.AddFlags(fss.FlagSet("multicluster"), s.MultiClusterOptions) s.ServiceMeshOptions.AddFlags(fss.FlagSet("servicemesh"), s.ServiceMeshOptions) s.GatewayOptions.AddFlags(fss.FlagSet("gateway"), s.GatewayOptions) + s.AlertingOptions.AddFlags(fss.FlagSet("alerting"), s.AlertingOptions) fs := fss.FlagSet("leaderelection") s.bindLeaderElectionFlags(s.LeaderElection, fs) @@ -171,6 +175,7 @@ func (o *KubeSphereControllerManagerOptions) Validate(allControllerNameSelectors errs = append(errs, o.NetworkOptions.Validate()...) errs = append(errs, o.LdapOptions.Validate()...) errs = append(errs, o.MultiClusterOptions.Validate()...) + errs = append(errs, o.AlertingOptions.Validate()...) // genetic option: application-selector if len(o.ApplicationSelector) != 0 { diff --git a/cmd/controller-manager/app/server.go b/cmd/controller-manager/app/server.go index bdf7aac4e..95c9470dd 100644 --- a/cmd/controller-manager/app/server.go +++ b/cmd/controller-manager/app/server.go @@ -67,6 +67,7 @@ func NewControllerManagerCommand() *cobra.Command { ServiceMeshOptions: conf.ServiceMeshOptions, GatewayOptions: conf.GatewayOptions, MonitoringOptions: conf.MonitoringOptions, + AlertingOptions: conf.AlertingOptions, LeaderElection: s.LeaderElection, LeaderElect: s.LeaderElect, WebhookCertDir: s.WebhookCertDir, diff --git a/pkg/apis/addtoscheme_monitoring_v1alpha3.go b/pkg/apis/addtoscheme_monitoring_v1alpha3.go new file mode 100644 index 000000000..104527176 --- /dev/null +++ b/pkg/apis/addtoscheme_monitoring_v1alpha3.go @@ -0,0 +1,22 @@ +/* +Copyright 2019 The KubeSphere Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package apis + +import ( + promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" +) + +func init() { + AddToSchemes = append(AddToSchemes, promv1.SchemeBuilder.AddToScheme) +} diff --git a/pkg/controller/alerting/clusterrulegroup_controller.go b/pkg/controller/alerting/clusterrulegroup_controller.go new file mode 100644 index 000000000..5f42f8ee6 --- /dev/null +++ b/pkg/controller/alerting/clusterrulegroup_controller.go @@ -0,0 +1,142 @@ +/* +Copyright 2019 The KubeSphere Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package alerting + +import ( + "context" + + "github.com/go-logr/logr" + promresourcesv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + promlabels "github.com/prometheus/prometheus/pkg/labels" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/controller-runtime/pkg/source" + + alertingv2beta1 "kubesphere.io/api/alerting/v2beta1" +) + +type ClusterRuleGroupReconciler struct { + client.Client + + Log logr.Logger +} + +func (r *ClusterRuleGroupReconciler) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) { + + var ( + log = r.Log + + ruleLevel = RuleLevelCluster + + clusterrulegroupList = alertingv2beta1.ClusterRuleGroupList{} + + promruleNamespace = PrometheusRuleNamespace + ) + + // get all enabled clusterrulegroups + err := r.Client.List(ctx, &clusterrulegroupList, &client.ListOptions{ + LabelSelector: labels.SelectorFromSet(labels.Set{ + SourceGroupResourceLabelKeyEnable: SourceGroupResourceLabelValueEnableTrue, + }), + }) + if err != nil { + return reconcile.Result{}, err + } + + // labels added to rule.labels + enforceRuleLabels := map[string]string{ + RuleLabelKeyRuleLevel: string(ruleLevel), + } + // matchers enforced to rule.expr + enforceRuleMatchers := []*promlabels.Matcher{} + // labels added to PrometheusRule.metadata.labels + promruleLabelSet := labels.Set{ + PrometheusRuleResourceLabelKeyRuleLevel: string(ruleLevel), + } + + enforceFuncs := createEnforceRuleFuncs(enforceRuleMatchers, enforceRuleLabels) + + // make PrometheusRule Groups + rulegroups, err := makePrometheusRuleGroups(log, &clusterrulegroupList, enforceFuncs...) + if err != nil { + return reconcile.Result{}, err + } + if len(rulegroups) == 0 { + err = r.Client.DeleteAllOf(ctx, &promresourcesv1.PrometheusRule{}, &client.DeleteAllOfOptions{ + ListOptions: client.ListOptions{ + Namespace: promruleNamespace, + LabelSelector: labels.SelectorFromSet(promruleLabelSet), + }, + }) + return reconcile.Result{}, err + } + + // make desired PrometheuRule resources + desired, err := makePrometheusRuleResources(rulegroups, promruleNamespace, PrometheusRulePrefixClusterLevel, promruleLabelSet, nil) + if err != nil { + return reconcile.Result{}, err + } + + // get current PrometheusRules + var current promresourcesv1.PrometheusRuleList + err = r.Client.List(ctx, ¤t, &client.ListOptions{ + Namespace: promruleNamespace, + LabelSelector: labels.SelectorFromSet(promruleLabelSet), + }) + if err != nil { + return reconcile.Result{}, err + } + + // update relevant prometheusrule resources + err = bulkUpdatePrometheusRuleResources(r.Client, ctx, current.Items, desired) + if err != nil && (apierrors.IsConflict(err) || apierrors.IsAlreadyExists(err)) { + return reconcile.Result{Requeue: true}, nil + } + return reconcile.Result{}, err +} + +func (r *ClusterRuleGroupReconciler) SetupWithManager(mgr ctrl.Manager) error { + if r.Log == nil { + r.Log = mgr.GetLogger() + } + if r.Client == nil { + r.Client = mgr.GetClient() + } + + ctr, err := controller.New("clusterrulegroup", mgr, + controller.Options{ + Reconciler: r, + }) + + if err != nil { + return err + } + + err = ctr.Watch( + &source.Kind{Type: &alertingv2beta1.ClusterRuleGroup{}}, + handler.EnqueueRequestsFromMapFunc(func(o client.Object) []reconcile.Request { + return []reconcile.Request{{ + NamespacedName: types.NamespacedName{ + Namespace: PrometheusRuleNamespace, + }, + }} + })) + return err +} diff --git a/pkg/controller/alerting/globalrulegroup_controller.go b/pkg/controller/alerting/globalrulegroup_controller.go new file mode 100644 index 000000000..84462d79c --- /dev/null +++ b/pkg/controller/alerting/globalrulegroup_controller.go @@ -0,0 +1,138 @@ +/* +Copyright 2019 The KubeSphere Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package alerting + +import ( + "context" + + "github.com/go-logr/logr" + promresourcesv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/controller-runtime/pkg/source" + + alertingv2beta1 "kubesphere.io/api/alerting/v2beta1" +) + +type GlobalRuleGroupReconciler struct { + client.Client + + Log logr.Logger +} + +func (r *GlobalRuleGroupReconciler) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) { + + var ( + log = r.Log + + ruleLevel = RuleLevelGlobal + + globalrulegroupList = alertingv2beta1.GlobalRuleGroupList{} + + promruleNamespace = PrometheusRuleNamespace + ) + + // get all enabled globalrulegroups + err := r.Client.List(ctx, &globalrulegroupList, &client.ListOptions{ + LabelSelector: labels.SelectorFromSet(labels.Set{ + SourceGroupResourceLabelKeyEnable: SourceGroupResourceLabelValueEnableTrue, + }), + }) + if err != nil { + return reconcile.Result{}, err + } + + // labels added to rule.labels + enforceRuleLabels := map[string]string{ + RuleLabelKeyRuleLevel: string(ruleLevel), + } + // labels added to PrometheusRule.metadata.labels + promruleLabelSet := labels.Set{ + PrometheusRuleResourceLabelKeyRuleLevel: string(ruleLevel), + } + + enforceFuncs := createEnforceRuleFuncs(nil, enforceRuleLabels) + + // make PrometheusRule Groups + rulegroups, err := makePrometheusRuleGroups(log, &globalrulegroupList, enforceFuncs...) + if err != nil { + return reconcile.Result{}, err + } + if len(rulegroups) == 0 { + err = r.Client.DeleteAllOf(ctx, &promresourcesv1.PrometheusRule{}, &client.DeleteAllOfOptions{ + ListOptions: client.ListOptions{ + Namespace: promruleNamespace, + LabelSelector: labels.SelectorFromSet(promruleLabelSet), + }, + }) + return reconcile.Result{}, err + } + + // make desired PrometheuRule resources + desired, err := makePrometheusRuleResources(rulegroups, promruleNamespace, PrometheusRulePrefixGlobalLevel, promruleLabelSet, nil) + if err != nil { + return reconcile.Result{}, err + } + + // get current PrometheusRules + var current promresourcesv1.PrometheusRuleList + err = r.Client.List(ctx, ¤t, &client.ListOptions{ + Namespace: promruleNamespace, + LabelSelector: labels.SelectorFromSet(promruleLabelSet), + }) + if err != nil { + return reconcile.Result{}, err + } + + err = bulkUpdatePrometheusRuleResources(r.Client, ctx, current.Items, desired) + if err != nil && (apierrors.IsConflict(err) || apierrors.IsAlreadyExists(err)) { + return reconcile.Result{Requeue: true}, nil + } + return reconcile.Result{}, err +} + +func (r *GlobalRuleGroupReconciler) SetupWithManager(mgr ctrl.Manager) error { + if r.Log == nil { + r.Log = mgr.GetLogger() + } + if r.Client == nil { + r.Client = mgr.GetClient() + } + + ctr, err := controller.New("globalrulegroup", mgr, + controller.Options{ + Reconciler: r, + }) + + if err != nil { + return err + } + + err = ctr.Watch( + &source.Kind{Type: &alertingv2beta1.GlobalRuleGroup{}}, + handler.EnqueueRequestsFromMapFunc(func(o client.Object) []reconcile.Request { + return []reconcile.Request{{ + NamespacedName: types.NamespacedName{ + Namespace: PrometheusRuleNamespace, + }, + }} + })) + return err +} diff --git a/pkg/controller/alerting/rulegroup_controller.go b/pkg/controller/alerting/rulegroup_controller.go new file mode 100644 index 000000000..b7f159231 --- /dev/null +++ b/pkg/controller/alerting/rulegroup_controller.go @@ -0,0 +1,171 @@ +/* +Copyright 2019 The KubeSphere Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package alerting + +import ( + "context" + "fmt" + + "github.com/go-logr/logr" + promresourcesv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + promlabels "github.com/prometheus/prometheus/pkg/labels" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/controller-runtime/pkg/source" + + alertingv2beta1 "kubesphere.io/api/alerting/v2beta1" +) + +type RuleGroupReconciler struct { + client.Client + + Log logr.Logger +} + +func (r *RuleGroupReconciler) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) { + + var ( + log = r.Log.WithValues("namespace", req.Namespace) + + ruleLevel = RuleLevelNamesapce + + rulegroupNamespace = req.Namespace + rulegroupList = alertingv2beta1.RuleGroupList{} + + promruleNamespace = PrometheusRuleNamespace + ) + + // get all enabled rulegroups + err := r.Client.List(ctx, &rulegroupList, &client.ListOptions{ + Namespace: rulegroupNamespace, + LabelSelector: labels.SelectorFromSet(labels.Set{ + SourceGroupResourceLabelKeyEnable: SourceGroupResourceLabelValueEnableTrue, + }), + }) + if err != nil { + return reconcile.Result{}, err + } + + // labels added to rule.labels + enforceRuleLabels := map[string]string{ + RuleLabelKeyRuleLevel: string(ruleLevel), + RuleLabelKeyNamespace: rulegroupNamespace, + } + // matchers enforced to rule.expr + enforceRuleMatchers := []*promlabels.Matcher{{ + Type: promlabels.MatchEqual, + Name: RuleLabelKeyNamespace, + Value: rulegroupNamespace, + }} + // labels added to PrometheusRule.metadata.labels + promruleLabelSet := labels.Set{ + PrometheusRuleResourceLabelKeyRuleLevel: string(ruleLevel), + PrometheusRuleResourceLabelKeyOwnerNamespace: rulegroupNamespace, + } + + enforceFuncs := createEnforceRuleFuncs(enforceRuleMatchers, enforceRuleLabels) + + // make PrometheusRule Groups + rulegroups, err := makePrometheusRuleGroups(log, &rulegroupList, enforceFuncs...) + if err != nil { + return reconcile.Result{}, err + } + if len(rulegroups) == 0 { + err = r.Client.DeleteAllOf(ctx, &promresourcesv1.PrometheusRule{}, &client.DeleteAllOfOptions{ + ListOptions: client.ListOptions{ + Namespace: promruleNamespace, + LabelSelector: labels.SelectorFromSet(promruleLabelSet), + }, + }) + return reconcile.Result{}, err + } + + var ns corev1.Namespace + err = r.Client.Get(ctx, types.NamespacedName{Name: rulegroupNamespace}, &ns) + if err != nil { + return reconcile.Result{}, client.IgnoreNotFound(err) + } + if !ns.DeletionTimestamp.IsZero() { + // if the namespace is being deleted, ignoring it because + // the PrometheusRules with the namespace owner will be garbage collected by k8s. + return reconcile.Result{}, nil + } + ownerReferences := []metav1.OwnerReference{{ + APIVersion: ns.APIVersion, + Kind: ns.Kind, + Name: ns.Name, + UID: ns.UID, + }} + + // make desired PrometheuRule resources + namePrefix := fmt.Sprintf("%s%s-", PrometheusRulePrefixNamespaceLevel, rulegroupNamespace) + desired, err := makePrometheusRuleResources(rulegroups, promruleNamespace, namePrefix, promruleLabelSet, ownerReferences) + if err != nil { + return reconcile.Result{}, err + } + + // get current PrometheusRules + var current promresourcesv1.PrometheusRuleList + err = r.Client.List(ctx, ¤t, &client.ListOptions{ + Namespace: promruleNamespace, + LabelSelector: labels.SelectorFromSet(promruleLabelSet), + }) + if err != nil { + return reconcile.Result{}, err + } + + // update relevant prometheusrule resources + err = bulkUpdatePrometheusRuleResources(r.Client, ctx, current.Items, desired) + if err != nil && (apierrors.IsConflict(err) || apierrors.IsAlreadyExists(err)) { + return reconcile.Result{Requeue: true}, nil + } + return reconcile.Result{}, err +} + +func (r *RuleGroupReconciler) SetupWithManager(mgr ctrl.Manager) error { + if r.Log == nil { + r.Log = mgr.GetLogger() + } + if r.Client == nil { + r.Client = mgr.GetClient() + } + + ctr, err := controller.New("rulegroup", mgr, + controller.Options{ + Reconciler: r, + }) + + if err != nil { + return err + } + + err = ctr.Watch( + &source.Kind{Type: &alertingv2beta1.RuleGroup{}}, + handler.EnqueueRequestsFromMapFunc(func(o client.Object) []reconcile.Request { + return []reconcile.Request{{ + NamespacedName: types.NamespacedName{ + Namespace: o.GetNamespace(), + }, + }} + })) + return err +} diff --git a/pkg/controller/alerting/util.go b/pkg/controller/alerting/util.go new file mode 100644 index 000000000..62d9175b7 --- /dev/null +++ b/pkg/controller/alerting/util.go @@ -0,0 +1,345 @@ +/* +Copyright 2019 The KubeSphere Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package alerting + +import ( + "context" + "reflect" + "sort" + "strconv" + + "github.com/go-logr/logr" + "github.com/pkg/errors" + "github.com/prometheus-community/prom-label-proxy/injectproxy" + promresourcesv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + promlabels "github.com/prometheus/prometheus/pkg/labels" + "github.com/prometheus/prometheus/promql/parser" + "gopkg.in/yaml.v2" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "sigs.k8s.io/controller-runtime/pkg/client" + + alertingv2beta1 "kubesphere.io/api/alerting/v2beta1" + + "kubesphere.io/kubesphere/pkg/constants" +) + +const ( + RuleLevelNamesapce RuleLevel = "namespace" + RuleLevelCluster RuleLevel = "cluster" + RuleLevelGlobal RuleLevel = "global" + + // label keys in rule.labels + RuleLabelKeyRuleLevel = "rule_level" + RuleLabelKeyCluster = "cluster" + RuleLabelKeyNamespace = "namespace" + RuleLabelKeySeverity = "severity" + + // label keys in RuleGroup/ClusterRuleGroup/GlobalRuleGroup.metadata.labels + SourceGroupResourceLabelKeyEnable = "alerting.kubesphere.io/enable" + SourceGroupResourceLabelValueEnableTrue = "true" + SourceGroupResourceLabelValueEnableFalse = "false" + + // label keys in PrometheusRule.metadata.labels + PrometheusRuleResourceLabelKeyOwnerNamespace = "alerting.kubesphere.io/owner_namespace" + PrometheusRuleResourceLabelKeyOwnerCluster = "alerting.kubesphere.io/owner_cluster" + PrometheusRuleResourceLabelKeyRuleLevel = "alerting.kubesphere.io/rule_level" + + // name prefix for PrometheusRule + PrometheusRulePrefix = "alertrules-" + PrometheusRulePrefixNamespaceLevel = PrometheusRulePrefix + "ns-" + PrometheusRulePrefixClusterLevel = PrometheusRulePrefix + "cl-" + PrometheusRulePrefixGlobalLevel = PrometheusRulePrefix + "gl-" + + PrometheusRuleNamespace = constants.KubeSphereMonitoringNamespace +) + +type RuleLevel string + +var maxConfigMapDataSize = int(float64(corev1.MaxSecretSize) * 0.5) + +type enforceRuleFunc func(rule *promresourcesv1.Rule) error + +func createEnforceRuleFuncs(enforceRuleMatchers []*promlabels.Matcher, enforceRuleLabels map[string]string) []enforceRuleFunc { + var enforceFuncs []enforceRuleFunc + // enforce func for rule.expr + if len(enforceRuleMatchers) > 0 { + enforcer := injectproxy.NewEnforcer(enforceRuleMatchers...) + enforceFuncs = append(enforceFuncs, func(rule *promresourcesv1.Rule) error { + if enforcer != nil { + expr := rule.Expr.String() + parsedExpr, err := parser.ParseExpr(expr) + if err != nil { + return err + } + if err = enforcer.EnforceNode(parsedExpr); err != nil { + return err + } + rule.Expr = intstr.FromString(parsedExpr.String()) + } + return nil + }) + } + // enforce func for rule.labels + if len(enforceRuleLabels) > 0 { + enforceFuncs = append(enforceFuncs, func(rule *promresourcesv1.Rule) error { + if rule.Labels == nil { + rule.Labels = make(map[string]string) + } + for n, v := range enforceRuleLabels { + rule.Labels[n] = v + } + return nil + }) + } + return enforceFuncs +} + +func makePrometheusRuleGroups(log logr.Logger, groupList client.ObjectList, + enforceFuncs ...enforceRuleFunc) ([]*promresourcesv1.RuleGroup, error) { + var rulegroups []*promresourcesv1.RuleGroup + + convertRule := func(rule *alertingv2beta1.Rule) (*promresourcesv1.Rule, error) { + if rule.Disable { // ignoring disabled rule + return nil, nil + } + + rule = rule.DeepCopy() + + if rule.Labels == nil { + rule.Labels = make(map[string]string) + } + + if rule.Severity != "" { + rule.Labels[RuleLabelKeySeverity] = string(rule.Severity) + } + + prule := promresourcesv1.Rule{ + Alert: rule.Alert, + For: string(rule.For), + Expr: rule.Expr, + Labels: rule.Labels, + Annotations: rule.Annotations, + } + + for _, f := range enforceFuncs { + if f == nil { + continue + } + err := f(&prule) + if err != nil { + return nil, errors.Wrapf(err, "alert: %s", rule.Alert) + } + } + + return &prule, nil + } + + switch list := groupList.(type) { + case *alertingv2beta1.RuleGroupList: + for _, group := range list.Items { + var prules []promresourcesv1.Rule + for _, rule := range group.Spec.Rules { + prule, err := convertRule(&rule.Rule) + if err != nil { + log.WithValues("rulegroup", group.Namespace+"/"+group.Name).Error(err, "failed to convert") + continue + } + if prule != nil { + prules = append(prules, *prule) + } + } + rulegroups = append(rulegroups, &promresourcesv1.RuleGroup{ + Name: group.Name, + Interval: group.Spec.Interval, + PartialResponseStrategy: group.Spec.PartialResponseStrategy, + Rules: prules, + }) + } + case *alertingv2beta1.ClusterRuleGroupList: + for _, group := range list.Items { + var prules []promresourcesv1.Rule + for _, rule := range group.Spec.Rules { + prule, err := convertRule(&rule.Rule) + if err != nil { + log.WithValues("clusterrulegroup", group.Name).Error(err, "failed to convert") + continue + } + if prule != nil { + prules = append(prules, *prule) + } + } + rulegroups = append(rulegroups, &promresourcesv1.RuleGroup{ + Name: group.Name, + Interval: group.Spec.Interval, + PartialResponseStrategy: group.Spec.PartialResponseStrategy, + Rules: prules, + }) + } + case *alertingv2beta1.GlobalRuleGroupList: + for _, group := range list.Items { + var prules []promresourcesv1.Rule + for _, rule := range group.Spec.Rules { + prule, err := convertRule(&rule.Rule) + if err != nil { + log.WithValues("globalrulegroup", group.Name).Error(err, "failed to convert") + continue + } + if prule != nil { + prules = append(prules, *prule) + } + } + rulegroups = append(rulegroups, &promresourcesv1.RuleGroup{ + Name: group.Name, + Interval: group.Spec.Interval, + PartialResponseStrategy: group.Spec.PartialResponseStrategy, + Rules: prules, + }) + } + } + + return rulegroups, nil +} + +func makePrometheusRuleResources(rulegroups []*promresourcesv1.RuleGroup, namespace, namePrefix string, + labels map[string]string, ownerReferences []metav1.OwnerReference) ([]*promresourcesv1.PrometheusRule, error) { + + promruleSpecs, err := makePrometheusRuleSpecs(rulegroups) + if err != nil { + return nil, err + } + var ps = make([]*promresourcesv1.PrometheusRule, len(promruleSpecs)) + for i := range promruleSpecs { + ps[i] = &promresourcesv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: namePrefix + strconv.Itoa(i), + Labels: labels, + OwnerReferences: ownerReferences, + }, + Spec: *promruleSpecs[i], + } + } + + return ps, nil +} + +type rulegroupsWrapper struct { + rulegroups []*promresourcesv1.RuleGroup + by func(g1, g2 *promresourcesv1.RuleGroup) bool +} + +func (w rulegroupsWrapper) Len() int { + return len(w.rulegroups) +} + +func (w rulegroupsWrapper) Swap(i, j int) { + w.rulegroups[i], w.rulegroups[j] = w.rulegroups[j], w.rulegroups[i] +} + +func (w rulegroupsWrapper) Less(i, j int) bool { + return w.by(w.rulegroups[i], w.rulegroups[j]) +} + +func makePrometheusRuleSpecs(rulegroups []*promresourcesv1.RuleGroup) ([]*promresourcesv1.PrometheusRuleSpec, error) { + sort.Sort(rulegroupsWrapper{ + rulegroups: rulegroups, + by: func(g1, g2 *promresourcesv1.RuleGroup) bool { + return g1.Name < g2.Name + }, + }) + + var ( + pSpecs []*promresourcesv1.PrometheusRuleSpec + pSpec promresourcesv1.PrometheusRuleSpec + size int + ) + + for i := range rulegroups { + rulegroup := rulegroups[i] + + content, err := yaml.Marshal(rulegroup) + if err != nil { + return nil, errors.Wrap(err, "failed to marshal content") + } + contentLen := len(string(content)) + size += contentLen + if size > maxConfigMapDataSize*80/100 { // leave space for enforcing possiable label matchers into expr + pSpecs = append(pSpecs, &pSpec) + // reinit + size = contentLen + pSpec = promresourcesv1.PrometheusRuleSpec{} + } + + pSpec.Groups = append(pSpec.Groups, *rulegroup) + } + if len(pSpec.Groups) > 0 { + pSpecs = append(pSpecs, &pSpec) + } + + return pSpecs, nil +} + +func bulkUpdatePrometheusRuleResources(client client.Client, ctx context.Context, current, desired []*promresourcesv1.PrometheusRule) error { + + var ( + currentMap = make(map[string]*promresourcesv1.PrometheusRule) + desiredMap = make(map[string]*promresourcesv1.PrometheusRule) + err error + ) + for i := range current { + promrule := current[i] + currentMap[promrule.Namespace+"/"+promrule.Name] = promrule + } + for i := range desired { + promrule := desired[i] + desiredMap[promrule.Namespace+"/"+promrule.Name] = promrule + } + + // update if exists in current PrometheusRules, or create + for name, desired := range desiredMap { + if current, ok := currentMap[name]; ok { + if !reflect.DeepEqual(current.Spec, desired.Spec) || + !reflect.DeepEqual(current.Labels, desired.Labels) || + !reflect.DeepEqual(current.OwnerReferences, desired.OwnerReferences) { + desired.SetResourceVersion(current.ResourceVersion) + err = client.Update(ctx, desired) + if err != nil { + return err + } + } + } else { + err = client.Create(ctx, desired) + if err != nil { + return err + } + } + } + // delete if not in desired PrometheusRules + for name, current := range currentMap { + if _, ok := desiredMap[name]; !ok { + err = client.Delete(ctx, current) + if err != nil { + if apierrors.IsNotFound(err) { + continue + } + return err + } + } + } + + return nil +}