custom alerting

Signed-off-by: junotx <junotx@126.com>
This commit is contained in:
junotx
2020-11-24 17:56:26 +08:00
parent 242193ddb0
commit 371c9b187d
342 changed files with 64021 additions and 1934 deletions

View File

@@ -0,0 +1,66 @@
/*
Copyright 2020 KubeSphere Authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package customalerting
import (
"fmt"
"strings"
"github.com/spf13/pflag"
"kubesphere.io/kubesphere/pkg/utils/reflectutils"
)
type Options struct {
PrometheusEndpoint string `json:"prometheusEndpoint" yaml:"prometheusEndpoint"`
ThanosRulerEndpoint string `json:"thanosRulerEndpoint" yaml:"thanosRulerEndpoint"`
ThanosRuleResourceLabels string `json:"thanosRuleResourceLabels" yaml:"thanosRuleResourceLabels"`
}
func NewOptions() *Options {
return &Options{}
}
func (o *Options) ApplyTo(options *Options) {
reflectutils.Override(options, o)
}
func (o *Options) Validate() []error {
errs := []error{}
if len(o.ThanosRuleResourceLabels) > 0 {
lblStrings := strings.Split(o.ThanosRuleResourceLabels, ",")
for _, lblString := range lblStrings {
if len(lblString) > 0 {
lbl := strings.Split(lblString, "=")
if len(lbl) != 2 {
errs = append(errs, fmt.Errorf("invalid thanos-rule-resource-labels arg: %s", o.ThanosRuleResourceLabels))
}
}
}
}
return errs
}
func (o *Options) AddFlags(fs *pflag.FlagSet, c *Options) {
fs.StringVar(&o.PrometheusEndpoint, "prometheus-endpoint", c.PrometheusEndpoint,
"Prometheus service endpoint from which built-in alerting rules are gotten.")
fs.StringVar(&o.ThanosRulerEndpoint, "thanos-ruler-endpoint", c.ThanosRulerEndpoint,
"Thanos ruler service endpoint from which custom alerting rules are gotten.")
fs.StringVar(&o.ThanosRuleResourceLabels, "thanos-rule-resource-labels", c.ThanosRuleResourceLabels,
"The labels will be added to prometheusrule custom resources to be selected by thanos ruler. eg: thanosruler=thanos-ruler,role=custom-alerting-rules")
}

View File

@@ -0,0 +1,173 @@
package customalerting
import (
"context"
"encoding/json"
"fmt"
"github.com/pkg/errors"
"net/http"
)
import "github.com/prometheus/client_golang/api"
const (
apiPrefix = "/api/v1"
epRules = apiPrefix + "/rules"
statusAPIError = 422
statusSuccess status = "success"
statusError status = "error"
ErrBadData ErrorType = "bad_data"
ErrTimeout ErrorType = "timeout"
ErrCanceled ErrorType = "canceled"
ErrExec ErrorType = "execution"
ErrBadResponse ErrorType = "bad_response"
ErrServer ErrorType = "server_error"
ErrClient ErrorType = "client_error"
)
type status string
type ErrorType string
type Error struct {
Type ErrorType
Msg string
Detail string
}
func (e *Error) Error() string {
return fmt.Sprintf("%s: %s", e.Type, e.Msg)
}
type response struct {
Status status `json:"status"`
Data json.RawMessage `json:"data,omitempty"`
ErrorType ErrorType `json:"errorType,omitempty"`
Error string `json:"error,omitempty"`
Warnings []string `json:"warnings,omitempty"`
}
type RuleClient interface {
PrometheusRules(ctx context.Context) ([]*RuleGroup, error)
ThanosRules(ctx context.Context) ([]*RuleGroup, error)
}
type ruleClient struct {
prometheus api.Client
thanosruler api.Client
}
func (c *ruleClient) PrometheusRules(ctx context.Context) ([]*RuleGroup, error) {
if c.prometheus != nil {
return c.rules(c.prometheus, ctx)
}
return nil, nil
}
func (c *ruleClient) ThanosRules(ctx context.Context) ([]*RuleGroup, error) {
if c.thanosruler != nil {
return c.rules(c.thanosruler, ctx)
}
return nil, nil
}
func (c *ruleClient) rules(client api.Client, ctx context.Context) ([]*RuleGroup, error) {
u := client.URL(epRules, nil)
q := u.Query()
q.Add("type", "alert")
u.RawQuery = q.Encode()
req, err := http.NewRequest(http.MethodGet, u.String(), nil)
if err != nil {
return nil, errors.Wrap(err, "error creating request: ")
}
_, body, _, err := c.do(client, ctx, req)
if err != nil {
return nil, errors.Wrap(err, "error doing request: ")
}
var result struct {
Groups []*RuleGroup
}
err = json.Unmarshal(body, &result)
if err != nil {
return nil, errors.Wrap(err, "")
}
return result.Groups, nil
}
func (c *ruleClient) do(client api.Client, ctx context.Context, req *http.Request) (*http.Response, []byte, []string, error) {
resp, body, e := client.Do(ctx, req)
if e != nil {
return resp, body, nil, e
}
code := resp.StatusCode
if code/100 != 2 && !apiError(code) {
errorType, errorMsg := errorTypeAndMsgFor(resp)
return resp, body, nil, &Error{
Type: errorType,
Msg: errorMsg,
Detail: string(body),
}
}
var result response
if http.StatusNoContent != code {
if jsonErr := json.Unmarshal(body, &result); jsonErr != nil {
return resp, body, nil, &Error{
Type: ErrBadResponse,
Msg: jsonErr.Error(),
}
}
}
var err error
if apiError(code) && result.Status == "success" {
err = &Error{
Type: ErrBadResponse,
Msg: "inconsistent body for response code",
}
}
if result.Status == "error" {
err = &Error{
Type: result.ErrorType,
Msg: result.Error,
}
}
return resp, []byte(result.Data), result.Warnings, err
}
func errorTypeAndMsgFor(resp *http.Response) (ErrorType, string) {
switch resp.StatusCode / 100 {
case 4:
return ErrClient, fmt.Sprintf("client error: %d", resp.StatusCode)
case 5:
return ErrServer, fmt.Sprintf("server error: %d", resp.StatusCode)
}
return ErrBadResponse, fmt.Sprintf("bad response code %d", resp.StatusCode)
}
func apiError(code int) bool {
// These are the codes that rule server sends when it returns an error.
return code == statusAPIError || code == http.StatusBadRequest ||
code == http.StatusServiceUnavailable || code == http.StatusInternalServerError
}
func NewRuleClient(options *Options) (RuleClient, error) {
var (
c ruleClient
e error
)
if options.PrometheusEndpoint != "" {
c.prometheus, e = api.NewClient(api.Config{Address: options.PrometheusEndpoint})
}
if options.ThanosRulerEndpoint != "" {
c.thanosruler, e = api.NewClient(api.Config{Address: options.ThanosRulerEndpoint})
}
return &c, e
}

View File

@@ -0,0 +1,100 @@
package customalerting
import (
"context"
"net/http"
"net/http/httptest"
"testing"
)
func TestListRules(t *testing.T) {
var tests = []struct {
description string
fakeCode int
fakeResp string
expectError bool
}{{
description: "list alerting rules from prometheus endpoint",
expectError: false,
fakeCode: 200,
fakeResp: `
{
"status": "success",
"data": {
"groups": [
{
"name": "kubernetes-resources",
"file": "/etc/prometheus/rules/prometheus-k8s-rulefiles-0/kubesphere-monitoring-system-prometheus-k8s-rules.yaml",
"rules": [
{
"state": "firing",
"name": "KubeCPUOvercommit",
"query": "sum(namespace:kube_pod_container_resource_requests_cpu_cores:sum) / sum(kube_node_status_allocatable_cpu_cores) > (count(kube_node_status_allocatable_cpu_cores) - 1) / count(kube_node_status_allocatable_cpu_cores)",
"duration": 300,
"labels": {
"severity": "warning"
},
"annotations": {
"message": "Cluster has overcommitted CPU resource requests for Pods and cannot tolerate node failure.",
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit"
},
"alerts": [
{
"labels": {
"alertname": "KubeCPUOvercommit",
"severity": "warning"
},
"annotations": {
"message": "Cluster has overcommitted CPU resource requests for Pods and cannot tolerate node failure.",
"runbook_url": "https://github.com/ kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit"
},
"state": "firing",
"activeAt": "2020-09-22T06:18:47.55260138Z",
"value": "4.405e-01"
}
],
"health": "ok",
"evaluationTime": 0.000894038,
"lastEvaluation": "2020-09-22T08:57:17.566233983Z",
"type": "alerting"
}
]
}
]
}
}
`,
}}
for _, test := range tests {
t.Run(test.description, func(t *testing.T) {
mock := MockService(epRules, test.fakeCode, test.fakeResp)
defer mock.Close()
c, e := NewRuleClient(&Options{PrometheusEndpoint: mock.URL})
if e != nil {
t.Fatal(e)
}
rgs, e := c.PrometheusRules(context.TODO())
if test.expectError {
} else {
if e != nil {
t.Fatal(e)
} else if len(rgs) == 1 && len(rgs[0].Rules) == 1 {
} else {
t.Fatalf("expect %d group and %d rule but got %d group and %d rule", 1, 1, len(rgs), len(rgs[0].Rules))
}
}
})
}
}
func MockService(pattern string, fakeCode int, fakeResp string) *httptest.Server {
mux := http.NewServeMux()
mux.HandleFunc(pattern, func(res http.ResponseWriter, req *http.Request) {
res.WriteHeader(fakeCode)
res.Write([]byte(fakeResp))
})
return httptest.NewServer(mux)
}

View File

@@ -0,0 +1,40 @@
package customalerting
import (
"time"
)
type RuleGroup struct {
Name string `json:"name"`
File string `json:"file"`
Rules []*AlertingRule `json:"rules"`
Interval float64 `json:"interval"`
EvaluationTime float64 `json:"evaluationTime"`
LastEvaluation *time.Time `json:"lastEvaluation"`
}
type AlertingRule struct {
// State can be "pending", "firing", "inactive".
State string `json:"state"`
Name string `json:"name"`
Query string `json:"query"`
Duration float64 `json:"duration"`
Labels map[string]string `json:"labels"`
Annotations map[string]string `json:"annotations"`
Alerts []*Alert `json:"alerts"`
// Health can be "ok", "err", "unknown".
Health string `json:"health"`
LastError string `json:"lastError,omitempty"`
EvaluationTime float64 `json:"evaluationTime"`
LastEvaluation *time.Time `json:"lastEvaluation"`
// Type of an alertingRule is always "alerting".
Type string `json:"type"`
}
type Alert struct {
Labels map[string]string `json:"labels"`
Annotations map[string]string `json:"annotations"`
State string `json:"state"`
ActiveAt *time.Time `json:"activeAt,omitempty"`
Value string `json:"value"`
}

View File

@@ -18,6 +18,7 @@ package k8s
import (
snapshotclient "github.com/kubernetes-csi/external-snapshotter/client/v3/clientset/versioned"
promresourcesclient "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned"
istioclient "istio.io/client-go/pkg/clientset/versioned"
apiextensionsclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
"k8s.io/client-go/discovery"
@@ -42,6 +43,8 @@ type FakeClient struct {
ApiExtensionClient apiextensionsclient.Interface
prometheusClient promresourcesclient.Interface
MasterURL string
KubeConfig *rest.Config
@@ -50,7 +53,8 @@ type FakeClient struct {
func NewFakeClientSets(k8sClient kubernetes.Interface, discoveryClient *discovery.DiscoveryClient,
kubeSphereClient kubesphere.Interface,
istioClient istioclient.Interface, snapshotClient snapshotclient.Interface,
apiextensionsclient apiextensionsclient.Interface, masterURL string, kubeConfig *rest.Config) Client {
apiextensionsclient apiextensionsclient.Interface, prometheusClient promresourcesclient.Interface,
masterURL string, kubeConfig *rest.Config) Client {
return &FakeClient{
K8sClient: k8sClient,
DiscoveryClient: discoveryClient,
@@ -58,6 +62,7 @@ func NewFakeClientSets(k8sClient kubernetes.Interface, discoveryClient *discover
IstioClient: istioClient,
SnapshotClient: snapshotClient,
ApiExtensionClient: apiextensionsclient,
prometheusClient: prometheusClient,
MasterURL: masterURL,
KubeConfig: kubeConfig,
}
@@ -87,6 +92,10 @@ func (n *FakeClient) Discovery() discovery.DiscoveryInterface {
return n.DiscoveryClient
}
func (n *FakeClient) Prometheus() promresourcesclient.Interface {
return n.prometheusClient
}
func (n *FakeClient) Master() string {
return n.MasterURL
}

View File

@@ -17,6 +17,7 @@ limitations under the License.
package k8s
import (
promresourcesclient "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned"
snapshotclient "github.com/kubernetes-csi/external-snapshotter/client/v3/clientset/versioned"
istioclient "istio.io/client-go/pkg/clientset/versioned"
apiextensionsclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
@@ -35,6 +36,7 @@ type Client interface {
Snapshot() snapshotclient.Interface
ApiExtensions() apiextensionsclient.Interface
Discovery() discovery.DiscoveryInterface
Prometheus() promresourcesclient.Interface
Master() string
Config() *rest.Config
}
@@ -55,6 +57,8 @@ type kubernetesClient struct {
apiextensions apiextensionsclient.Interface
prometheus promresourcesclient.Interface
master string
config *rest.Config
@@ -77,6 +81,7 @@ func NewKubernetesClientOrDie(options *KubernetesOptions) Client {
istio: istioclient.NewForConfigOrDie(config),
snapshot: snapshotclient.NewForConfigOrDie(config),
apiextensions: apiextensionsclient.NewForConfigOrDie(config),
prometheus: promresourcesclient.NewForConfigOrDie(config),
master: config.Host,
config: config,
}
@@ -135,6 +140,11 @@ func NewKubernetesClient(options *KubernetesOptions) (Client, error) {
return nil, err
}
k.prometheus, err = promresourcesclient.NewForConfig(config)
if err != nil {
return nil, err
}
k.master = options.Master
k.config = config
@@ -165,6 +175,10 @@ func (k *kubernetesClient) ApiExtensions() apiextensionsclient.Interface {
return k.apiextensions
}
func (k *kubernetesClient) Prometheus() promresourcesclient.Interface {
return k.prometheus
}
// master address used to generate kubeconfig for downloading
func (k *kubernetesClient) Master() string {
return k.master

View File

@@ -17,6 +17,7 @@ limitations under the License.
package k8s
import (
promresourcesclient "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned"
snapshotclient "github.com/kubernetes-csi/external-snapshotter/client/v3/clientset/versioned"
istio "istio.io/client-go/pkg/clientset/versioned"
apiextensionsclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
@@ -57,6 +58,10 @@ func (n nullClient) Discovery() discovery.DiscoveryInterface {
return nil
}
func (n *nullClient) Prometheus() promresourcesclient.Interface {
return nil
}
func (n nullClient) Master() string {
return ""
}