feat: kubesphere 4.0 (#6115)

* feat: kubesphere 4.0

Signed-off-by: ci-bot <ci-bot@kubesphere.io>

* feat: kubesphere 4.0

Signed-off-by: ci-bot <ci-bot@kubesphere.io>

---------

Signed-off-by: ci-bot <ci-bot@kubesphere.io>
Co-authored-by: ks-ci-bot <ks-ci-bot@example.com>
Co-authored-by: joyceliu <joyceliu@yunify.com>
This commit is contained in:
KubeSphere CI Bot
2024-09-06 11:05:52 +08:00
committed by GitHub
parent b5015ec7b9
commit 447a51f08b
8557 changed files with 546695 additions and 1146174 deletions

View File

@@ -36,19 +36,25 @@ import (
"k8s.io/apimachinery/pkg/runtime/serializer"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/uuid"
"k8s.io/apimachinery/pkg/util/wait"
apiserverconfig "k8s.io/apiserver/pkg/apis/config"
apiserverconfigv1 "k8s.io/apiserver/pkg/apis/config/v1"
"k8s.io/apiserver/pkg/apis/config/validation"
"k8s.io/apiserver/pkg/features"
"k8s.io/apiserver/pkg/server/healthz"
"k8s.io/apiserver/pkg/storage/value"
"k8s.io/apiserver/pkg/server/options/encryptionconfig/metrics"
storagevalue "k8s.io/apiserver/pkg/storage/value"
aestransformer "k8s.io/apiserver/pkg/storage/value/encrypt/aes"
"k8s.io/apiserver/pkg/storage/value/encrypt/envelope"
envelopekmsv2 "k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2"
kmstypes "k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/v2"
envelopemetrics "k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics"
"k8s.io/apiserver/pkg/storage/value/encrypt/identity"
"k8s.io/apiserver/pkg/storage/value/encrypt/secretbox"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/klog/v2"
kmsservice "k8s.io/kms/pkg/service"
)
const (
@@ -57,13 +63,80 @@ const (
secretboxTransformerPrefixV1 = "k8s:enc:secretbox:v1:"
kmsTransformerPrefixV1 = "k8s:enc:kms:v1:"
kmsTransformerPrefixV2 = "k8s:enc:kms:v2:"
kmsPluginHealthzNegativeTTL = 3 * time.Second
kmsPluginHealthzPositiveTTL = 20 * time.Second
kmsAPIVersionV1 = "v1"
kmsAPIVersionV2 = "v2"
kmsReloadHealthCheckName = "kms-providers"
// these constants relate to how the KMS v2 plugin status poll logic
// and the DEK/seed generation logic behave. In particular, the positive
// interval and max TTL are closely related as the difference between
// these values defines the worst case window in which the write DEK/seed
// could expire due to the plugin going into an error state. The
// worst case window divided by the negative interval defines the
// minimum amount of times the server will attempt to return to a
// healthy state before the DEK/seed expires and writes begin to fail.
//
// For now, these values are kept small and hardcoded to support being
// able to perform a "passive" storage migration while tolerating some
// amount of plugin downtime.
//
// With the current approach, a user can update the key ID their plugin
// is using and then can simply schedule a migration for 3 + N + M minutes
// later where N is how long it takes their plugin to pick up new config
// and M is extra buffer to allow the API server to process the config.
// At that point, they are guaranteed to either migrate to the new key
// or get errors during the migration.
//
// If the API server coasted forever on the last DEK/seed, they would need
// to actively check if it had observed the new key ID before starting
// a migration - otherwise it could keep using the old DEK/seed and their
// storage migration would not do what they thought it did.
kmsv2PluginHealthzPositiveInterval = 1 * time.Minute
kmsv2PluginHealthzNegativeInterval = 10 * time.Second
kmsv2PluginWriteDEKSourceMaxTTL = 3 * time.Minute
kmsPluginHealthzNegativeTTL = 3 * time.Second
kmsPluginHealthzPositiveTTL = 20 * time.Second
kmsAPIVersionV1 = "v1"
kmsAPIVersionV2 = "v2"
// this name is used for two different healthz endpoints:
// - when one or more KMS v2 plugins are in use and no KMS v1 plugins are in use
// in this case, all v2 plugins are probed via this single endpoint
// - when automatic reload of encryption config is enabled
// in this case, all KMS plugins are probed via this single endpoint
// the endpoint is present even if there are no KMS plugins configured (it is a no-op then)
kmsReloadHealthCheckName = "kms-providers"
)
var codecs serializer.CodecFactory
// this atomic bool allows us to swap enablement of the KMSv2KDF feature in tests
// as the feature gate is now locked to true starting with v1.29
// Note: it cannot be set by an end user
var kdfDisabled atomic.Bool
// this function should only be called in tests to swap enablement of the KMSv2KDF feature
func SetKDFForTests(b bool) func() {
kdfDisabled.Store(!b)
return func() {
kdfDisabled.Store(false)
}
}
// this function should be used to determine enablement of the KMSv2KDF feature
// instead of getting it from DefaultFeatureGate as the feature gate is now locked
// to true starting with v1.29
func GetKDF() bool {
return !kdfDisabled.Load()
}
func init() {
configScheme := runtime.NewScheme()
utilruntime.Must(apiserverconfig.AddToScheme(configScheme))
utilruntime.Must(apiserverconfigv1.AddToScheme(configScheme))
codecs = serializer.NewCodecFactory(configScheme)
envelopemetrics.RegisterMetrics()
storagevalue.RegisterMetrics()
metrics.RegisterMetrics()
}
type kmsPluginHealthzResponse struct {
err error
received time.Time
@@ -78,11 +151,14 @@ type kmsPluginProbe struct {
}
type kmsv2PluginProbe struct {
state atomic.Pointer[envelopekmsv2.State]
name string
ttl time.Duration
service envelopekmsv2.Service
service kmsservice.Service
lastResponse *kmsPluginHealthzResponse
l *sync.Mutex
apiServerID string
version string
}
type kmsHealthChecker []healthz.HealthChecker
@@ -119,7 +195,7 @@ func (h *kmsv2PluginProbe) toHealthzCheck(idx int) healthz.HealthChecker {
// EncryptionConfiguration represents the parsed and normalized encryption configuration for the apiserver.
type EncryptionConfiguration struct {
// Transformers is a list of value.Transformer that will be used to encrypt and decrypt data.
Transformers map[schema.GroupResource]value.Transformer
Transformers map[schema.GroupResource]storagevalue.Transformer
// HealthChecks is a list of healthz.HealthChecker that will be used to check the health of the encryption providers.
HealthChecks []healthz.HealthChecker
@@ -133,15 +209,16 @@ type EncryptionConfiguration struct {
}
// LoadEncryptionConfig parses and validates the encryption config specified by filepath.
// It may launch multiple go routines whose lifecycle is controlled by stopCh.
// It may launch multiple go routines whose lifecycle is controlled by ctx.
// In case of an error, the caller is responsible for canceling ctx to clean up any go routines that may have been launched.
// If reload is true, or KMS v2 plugins are used with no KMS v1 plugins, the returned slice of health checkers will always be of length 1.
func LoadEncryptionConfig(filepath string, reload bool, stopCh <-chan struct{}) (*EncryptionConfiguration, error) {
func LoadEncryptionConfig(ctx context.Context, filepath string, reload bool, apiServerID string) (*EncryptionConfiguration, error) {
config, contentHash, err := loadConfig(filepath, reload)
if err != nil {
return nil, fmt.Errorf("error while parsing file: %w", err)
}
transformers, kmsHealthChecks, kmsUsed, err := getTransformerOverridesAndKMSPluginHealthzCheckers(config, stopCh)
transformers, kmsHealthChecks, kmsUsed, err := getTransformerOverridesAndKMSPluginHealthzCheckers(ctx, config, apiServerID)
if err != nil {
return nil, fmt.Errorf("error while building transformers: %w", err)
}
@@ -150,7 +227,7 @@ func LoadEncryptionConfig(filepath string, reload bool, stopCh <-chan struct{})
kmsHealthChecks = []healthz.HealthChecker{kmsHealthChecker(kmsHealthChecks)}
}
// KMSTimeout is the duration we will wait before closing old transformers.
// KMSCloseGracePeriod is the duration we will wait before closing old transformers.
// The way we calculate is as follows:
// 1. Sum all timeouts across all KMS plugins. (check kmsPrefixTransformer for differences between v1 and v2)
// 2. Multiply that by 2 (to allow for some buffer)
@@ -160,12 +237,15 @@ func LoadEncryptionConfig(filepath string, reload bool, stopCh <-chan struct{})
HealthChecks: kmsHealthChecks,
EncryptionFileContentHash: contentHash,
KMSCloseGracePeriod: 2 * kmsUsed.kmsTimeoutSum,
}, err
}, nil
}
func getTransformerOverridesAndKMSPluginHealthzCheckers(config *apiserverconfig.EncryptionConfiguration, stopCh <-chan struct{}) (map[schema.GroupResource]value.Transformer, []healthz.HealthChecker, *kmsState, error) {
// getTransformerOverridesAndKMSPluginHealthzCheckers creates the set of transformers and KMS healthz checks based on the given config.
// It may launch multiple go routines whose lifecycle is controlled by ctx.
// In case of an error, the caller is responsible for canceling ctx to clean up any go routines that may have been launched.
func getTransformerOverridesAndKMSPluginHealthzCheckers(ctx context.Context, config *apiserverconfig.EncryptionConfiguration, apiServerID string) (map[schema.GroupResource]storagevalue.Transformer, []healthz.HealthChecker, *kmsState, error) {
var kmsHealthChecks []healthz.HealthChecker
transformers, probes, kmsUsed, err := getTransformerOverridesAndKMSPluginProbes(config, stopCh)
transformers, probes, kmsUsed, err := getTransformerOverridesAndKMSPluginProbes(ctx, config, apiServerID)
if err != nil {
return nil, nil, nil, err
}
@@ -181,8 +261,11 @@ type healthChecker interface {
toHealthzCheck(idx int) healthz.HealthChecker
}
func getTransformerOverridesAndKMSPluginProbes(config *apiserverconfig.EncryptionConfiguration, stopCh <-chan struct{}) (map[schema.GroupResource]value.Transformer, []healthChecker, *kmsState, error) {
resourceToPrefixTransformer := map[schema.GroupResource][]value.PrefixTransformer{}
// getTransformerOverridesAndKMSPluginProbes creates the set of transformers and KMS probes based on the given config.
// It may launch multiple go routines whose lifecycle is controlled by ctx.
// In case of an error, the caller is responsible for canceling ctx to clean up any go routines that may have been launched.
func getTransformerOverridesAndKMSPluginProbes(ctx context.Context, config *apiserverconfig.EncryptionConfiguration, apiServerID string) (map[schema.GroupResource]storagevalue.Transformer, []healthChecker, *kmsState, error) {
resourceToPrefixTransformer := map[schema.GroupResource][]storagevalue.PrefixTransformer{}
var probes []healthChecker
var kmsUsed kmsState
@@ -190,31 +273,42 @@ func getTransformerOverridesAndKMSPluginProbes(config *apiserverconfig.Encryptio
for _, resourceConfig := range config.Resources {
resourceConfig := resourceConfig
transformers, p, used, err := prefixTransformersAndProbes(resourceConfig, stopCh)
transformers, p, used, err := prefixTransformersAndProbes(ctx, resourceConfig, apiServerID)
if err != nil {
return nil, nil, nil, err
}
kmsUsed.v1Used = kmsUsed.v1Used || used.v1Used
kmsUsed.v2Used = kmsUsed.v2Used || used.v2Used
kmsUsed.kmsTimeoutSum += used.kmsTimeoutSum
kmsUsed.accumulate(used)
// For each resource, create a list of providers to use
for _, resource := range resourceConfig.Resources {
resource := resource
gr := schema.ParseGroupResource(resource)
resourceToPrefixTransformer[gr] = append(
resourceToPrefixTransformer[gr], transformers...)
// check if resource is masked by *.group rule
anyResourceInGroup := schema.GroupResource{Group: gr.Group, Resource: "*"}
if _, masked := resourceToPrefixTransformer[anyResourceInGroup]; masked {
// an earlier rule already configured a transformer for *.group, masking this rule
// return error since this is not allowed
return nil, nil, nil, fmt.Errorf("resource %q is masked by earlier rule %q", grYAMLString(gr), grYAMLString(anyResourceInGroup))
}
if _, masked := resourceToPrefixTransformer[anyGroupAnyResource]; masked {
// an earlier rule already configured a transformer for *.*, masking this rule
// return error since this is not allowed
return nil, nil, nil, fmt.Errorf("resource %q is masked by earlier rule %q", grYAMLString(gr), grYAMLString(anyGroupAnyResource))
}
resourceToPrefixTransformer[gr] = append(resourceToPrefixTransformer[gr], transformers...)
}
probes = append(probes, p...)
}
transformers := make(map[schema.GroupResource]value.Transformer, len(resourceToPrefixTransformer))
transformers := make(map[schema.GroupResource]storagevalue.Transformer, len(resourceToPrefixTransformer))
for gr, transList := range resourceToPrefixTransformer {
gr := gr
transList := transList
transformers[gr] = value.NewPrefixTransformers(fmt.Errorf("no matching prefix found"), transList...)
transformers[gr] = storagevalue.NewPrefixTransformers(fmt.Errorf("no matching prefix found"), transList...)
}
return transformers, probes, &kmsUsed, nil
@@ -252,7 +346,7 @@ func (h *kmsv2PluginProbe) check(ctx context.Context) error {
h.l.Lock()
defer h.l.Unlock()
if (time.Since(h.lastResponse.received)) < h.ttl {
if time.Since(h.lastResponse.received) < h.ttl {
return h.lastResponse.err
}
@@ -263,7 +357,7 @@ func (h *kmsv2PluginProbe) check(ctx context.Context) error {
return fmt.Errorf("failed to perform status section of the healthz check for KMS Provider %s, error: %w", h.name, err)
}
if err := isKMSv2ProviderHealthy(h.name, p); err != nil {
if err := h.isKMSv2ProviderHealthyAndMaybeRotateDEK(ctx, p); err != nil {
h.lastResponse = &kmsPluginHealthzResponse{err: err, received: time.Now()}
h.ttl = kmsPluginHealthzNegativeTTL
return err
@@ -274,27 +368,160 @@ func (h *kmsv2PluginProbe) check(ctx context.Context) error {
return nil
}
// isKMSv2ProviderHealthy checks if the KMSv2-Plugin is healthy.
func isKMSv2ProviderHealthy(name string, response *envelopekmsv2.StatusResponse) error {
// rotateDEKOnKeyIDChange tries to rotate to a new DEK/seed if the key ID returned by Status does not match the
// current state. If a successful rotation is performed, the new DEK/seed and keyID overwrite the existing state.
// On any failure during rotation (including mismatch between status and encrypt calls), the current state is
// preserved and will remain valid to use for encryption until its expiration (the system attempts to coast).
// If the key ID returned by Status matches the current state, the expiration of the current state is extended
// and no rotation is performed.
func (h *kmsv2PluginProbe) rotateDEKOnKeyIDChange(ctx context.Context, statusKeyID, uid string) error {
// we do not check ValidateEncryptCapability here because it is fine to re-use an old key
// that was marked as expired during an unhealthy period. As long as the key ID matches
// what we expect then there is no need to rotate here.
state, errState := h.getCurrentState()
// allow reads indefinitely in all cases
// allow writes indefinitely as long as there is no error
// allow writes for only up to kmsv2PluginWriteDEKSourceMaxTTL from now when there are errors
// we start the timer before we make the network call because kmsv2PluginWriteDEKSourceMaxTTL is meant to be the upper bound
expirationTimestamp := envelopekmsv2.NowFunc().Add(kmsv2PluginWriteDEKSourceMaxTTL)
// dynamically check if we want to use KDF seed to derive DEKs or just a single DEK
// this gate can only change during tests, but the check is cheap enough to always make
// this allows us to easily exercise both modes without restarting the API server
// TODO integration test that this dynamically takes effect
useSeed := GetKDF()
stateUseSeed := state.EncryptedObject.EncryptedDEKSourceType == kmstypes.EncryptedDEKSourceType_HKDF_SHA256_XNONCE_AES_GCM_SEED
// state is valid and status keyID is unchanged from when we generated this DEK/seed so there is no need to rotate it
// just move the expiration of the current state forward by the reuse interval
// useSeed can only change at runtime during tests, so we check it here to allow us to easily exercise both modes
if errState == nil && state.EncryptedObject.KeyID == statusKeyID && stateUseSeed == useSeed {
state.ExpirationTimestamp = expirationTimestamp
h.state.Store(&state)
return nil
}
transformer, encObject, cacheKey, errGen := envelopekmsv2.GenerateTransformer(ctx, uid, h.service, useSeed)
if encObject == nil {
encObject = &kmstypes.EncryptedObject{} // avoid nil panics
}
// happy path, should be the common case
// TODO maybe add success metrics?
if errGen == nil && encObject.KeyID == statusKeyID {
h.state.Store(&envelopekmsv2.State{
Transformer: transformer,
EncryptedObject: *encObject,
UID: uid,
ExpirationTimestamp: expirationTimestamp,
CacheKey: cacheKey,
})
// it should be logically impossible for the new state to be invalid but check just in case
_, errGen = h.getCurrentState()
if errGen == nil {
klogV6 := klog.V(6)
if klogV6.Enabled() {
klogV6.InfoS("successfully rotated DEK",
"uid", uid,
"useSeed", useSeed,
"newKeyIDHash", envelopekmsv2.GetHashIfNotEmpty(encObject.KeyID),
"oldKeyIDHash", envelopekmsv2.GetHashIfNotEmpty(state.EncryptedObject.KeyID),
"expirationTimestamp", expirationTimestamp.Format(time.RFC3339),
)
}
return nil
}
}
return fmt.Errorf("failed to rotate DEK uid=%q, useSeed=%v, errState=%v, errGen=%v, statusKeyIDHash=%q, encryptKeyIDHash=%q, stateKeyIDHash=%q, expirationTimestamp=%s",
uid, useSeed, errState, errGen, envelopekmsv2.GetHashIfNotEmpty(statusKeyID), envelopekmsv2.GetHashIfNotEmpty(encObject.KeyID), envelopekmsv2.GetHashIfNotEmpty(state.EncryptedObject.KeyID), state.ExpirationTimestamp.Format(time.RFC3339))
}
// getCurrentState returns the latest state from the last status and encrypt calls.
// If the returned error is nil, the state is considered valid indefinitely for read requests.
// For write requests, the caller must also check that state.ValidateEncryptCapability does not error.
func (h *kmsv2PluginProbe) getCurrentState() (envelopekmsv2.State, error) {
state := *h.state.Load()
if state.Transformer == nil {
return envelopekmsv2.State{}, fmt.Errorf("got unexpected nil transformer")
}
encryptedObjectCopy := state.EncryptedObject
if len(encryptedObjectCopy.EncryptedData) != 0 {
return envelopekmsv2.State{}, fmt.Errorf("got unexpected non-empty EncryptedData")
}
encryptedObjectCopy.EncryptedData = []byte{0} // any non-empty value to pass validation
if err := envelopekmsv2.ValidateEncryptedObject(&encryptedObjectCopy); err != nil {
return envelopekmsv2.State{}, fmt.Errorf("got invalid EncryptedObject: %w", err)
}
if state.ExpirationTimestamp.IsZero() {
return envelopekmsv2.State{}, fmt.Errorf("got unexpected zero expirationTimestamp")
}
if len(state.CacheKey) == 0 {
return envelopekmsv2.State{}, fmt.Errorf("got unexpected empty cacheKey")
}
return state, nil
}
func (h *kmsv2PluginProbe) isKMSv2ProviderHealthyAndMaybeRotateDEK(ctx context.Context, response *kmsservice.StatusResponse) error {
var errs []error
if response.Healthz != "ok" {
errs = append(errs, fmt.Errorf("got unexpected healthz status: %s", response.Healthz))
}
if response.Version != envelopekmsv2.KMSAPIVersion {
errs = append(errs, fmt.Errorf("expected KMSv2 API version %s, got %s", envelopekmsv2.KMSAPIVersion, response.Version))
if response.Version != envelopekmsv2.KMSAPIVersionv2 && response.Version != envelopekmsv2.KMSAPIVersionv2beta1 {
errs = append(errs, fmt.Errorf("expected KMSv2 API version %s, got %s", envelopekmsv2.KMSAPIVersionv2, response.Version))
} else {
// set version for the first status response
if len(h.version) == 0 {
h.version = response.Version
}
if h.version != response.Version {
errs = append(errs, fmt.Errorf("KMSv2 API version should not change after the initial status response version %s, got %s", h.version, response.Version))
}
}
if len(response.KeyID) == 0 {
errs = append(errs, fmt.Errorf("expected KMSv2 KeyID to be set, got %s", response.KeyID))
if errCode, err := envelopekmsv2.ValidateKeyID(response.KeyID); err != nil {
envelopemetrics.RecordInvalidKeyIDFromStatus(h.name, string(errCode))
errs = append(errs, fmt.Errorf("got invalid KMSv2 KeyID hash %q: %w", envelopekmsv2.GetHashIfNotEmpty(response.KeyID), err))
} else {
envelopemetrics.RecordKeyIDFromStatus(h.name, response.KeyID, h.apiServerID)
// unconditionally append as we filter out nil errors below
errs = append(errs, h.rotateDEKOnKeyIDChange(ctx, response.KeyID, string(uuid.NewUUID())))
}
if err := utilerrors.Reduce(utilerrors.NewAggregate(errs)); err != nil {
return fmt.Errorf("kmsv2 Provider %s is not healthy, error: %w", name, err)
return fmt.Errorf("kmsv2 Provider %s is not healthy, error: %w", h.name, err)
}
return nil
}
// loadConfig parses the encryption configuration file at filepath and returns the parsed config and hash of the file.
func loadConfig(filepath string, reload bool) (*apiserverconfig.EncryptionConfiguration, string, error) {
data, contentHash, err := loadDataAndHash(filepath)
if err != nil {
return nil, "", fmt.Errorf("error while loading file: %w", err)
}
configObj, gvk, err := codecs.UniversalDecoder().Decode(data, nil, nil)
if err != nil {
return nil, "", fmt.Errorf("error decoding encryption provider configuration file %q: %w", filepath, err)
}
config, ok := configObj.(*apiserverconfig.EncryptionConfiguration)
if !ok {
return nil, "", fmt.Errorf("got unexpected config type: %v", gvk)
}
return config, contentHash, validation.ValidateEncryptionConfiguration(config, reload).ToAggregate()
}
func loadDataAndHash(filepath string) ([]byte, string, error) {
f, err := os.Open(filepath)
if err != nil {
return nil, "", fmt.Errorf("error opening encryption provider configuration file %q: %w", filepath, err)
@@ -309,32 +536,28 @@ func loadConfig(filepath string, reload bool) (*apiserverconfig.EncryptionConfig
return nil, "", fmt.Errorf("encryption provider configuration file %q is empty", filepath)
}
scheme := runtime.NewScheme()
codecs := serializer.NewCodecFactory(scheme)
utilruntime.Must(apiserverconfig.AddToScheme(scheme))
utilruntime.Must(apiserverconfigv1.AddToScheme(scheme))
configObj, gvk, err := codecs.UniversalDecoder().Decode(data, nil, nil)
if err != nil {
return nil, "", err
}
config, ok := configObj.(*apiserverconfig.EncryptionConfiguration)
if !ok {
return nil, "", fmt.Errorf("got unexpected config type: %v", gvk)
}
return config, computeEncryptionConfigHash(data), validation.ValidateEncryptionConfiguration(config, reload).ToAggregate()
return data, computeEncryptionConfigHash(data), nil
}
func prefixTransformersAndProbes(config apiserverconfig.ResourceConfiguration, stopCh <-chan struct{}) ([]value.PrefixTransformer, []healthChecker, *kmsState, error) {
var transformers []value.PrefixTransformer
// GetEncryptionConfigHash reads the encryption configuration file at filepath and returns the hash of the file.
// It does not attempt to decode or load the config, and serves as a cheap check to determine if the file has changed.
func GetEncryptionConfigHash(filepath string) (string, error) {
_, contentHash, err := loadDataAndHash(filepath)
return contentHash, err
}
// prefixTransformersAndProbes creates the set of transformers and KMS probes based on the given resource config.
// It may launch multiple go routines whose lifecycle is controlled by ctx.
// In case of an error, the caller is responsible for canceling ctx to clean up any go routines that may have been launched.
func prefixTransformersAndProbes(ctx context.Context, config apiserverconfig.ResourceConfiguration, apiServerID string) ([]storagevalue.PrefixTransformer, []healthChecker, *kmsState, error) {
var transformers []storagevalue.PrefixTransformer
var probes []healthChecker
var kmsUsed kmsState
for _, provider := range config.Providers {
provider := provider
var (
transformer value.PrefixTransformer
transformer storagevalue.PrefixTransformer
transformerErr error
probe healthChecker
used *kmsState
@@ -345,24 +568,23 @@ func prefixTransformersAndProbes(config apiserverconfig.ResourceConfiguration, s
transformer, transformerErr = aesPrefixTransformer(provider.AESGCM, aestransformer.NewGCMTransformer, aesGCMTransformerPrefixV1)
case provider.AESCBC != nil:
transformer, transformerErr = aesPrefixTransformer(provider.AESCBC, aestransformer.NewCBCTransformer, aesCBCTransformerPrefixV1)
cbcTransformer := func(block cipher.Block) (storagevalue.Transformer, error) {
return aestransformer.NewCBCTransformer(block), nil
}
transformer, transformerErr = aesPrefixTransformer(provider.AESCBC, cbcTransformer, aesCBCTransformerPrefixV1)
case provider.Secretbox != nil:
transformer, transformerErr = secretboxPrefixTransformer(provider.Secretbox)
case provider.KMS != nil:
transformer, probe, used, transformerErr = kmsPrefixTransformer(provider.KMS, stopCh)
transformer, probe, used, transformerErr = kmsPrefixTransformer(ctx, provider.KMS, apiServerID)
if transformerErr == nil {
probes = append(probes, probe)
kmsUsed.v1Used = kmsUsed.v1Used || used.v1Used
kmsUsed.v2Used = kmsUsed.v2Used || used.v2Used
// calculate the maximum timeout for all KMS providers
kmsUsed.kmsTimeoutSum += used.kmsTimeoutSum
kmsUsed.accumulate(used)
}
case provider.Identity != nil:
transformer = value.PrefixTransformer{
transformer = storagevalue.PrefixTransformer{
Transformer: identity.NewEncryptCheckTransformer(),
Prefix: []byte{},
}
@@ -381,10 +603,10 @@ func prefixTransformersAndProbes(config apiserverconfig.ResourceConfiguration, s
return transformers, probes, &kmsUsed, nil
}
type blockTransformerFunc func(cipher.Block) value.Transformer
type blockTransformerFunc func(cipher.Block) (storagevalue.Transformer, error)
func aesPrefixTransformer(config *apiserverconfig.AESConfiguration, fn blockTransformerFunc, prefix string) (value.PrefixTransformer, error) {
var result value.PrefixTransformer
func aesPrefixTransformer(config *apiserverconfig.AESConfiguration, fn blockTransformerFunc, prefix string) (storagevalue.PrefixTransformer, error) {
var result storagevalue.PrefixTransformer
if len(config.Keys) == 0 {
return result, fmt.Errorf("aes provider has no valid keys")
@@ -399,41 +621,45 @@ func aesPrefixTransformer(config *apiserverconfig.AESConfiguration, fn blockTran
}
}
keyTransformers := []value.PrefixTransformer{}
keyTransformers := []storagevalue.PrefixTransformer{}
for _, keyData := range config.Keys {
keyData := keyData
key, err := base64.StdEncoding.DecodeString(keyData.Secret)
if err != nil {
return result, fmt.Errorf("could not obtain secret for named key %s: %s", keyData.Name, err)
return result, fmt.Errorf("could not obtain secret for named key %s: %w", keyData.Name, err)
}
block, err := aes.NewCipher(key)
if err != nil {
return result, fmt.Errorf("error while creating cipher for named key %s: %s", keyData.Name, err)
return result, fmt.Errorf("error while creating cipher for named key %s: %w", keyData.Name, err)
}
transformer, err := fn(block)
if err != nil {
return result, fmt.Errorf("error while creating transformer for named key %s: %w", keyData.Name, err)
}
// Create a new PrefixTransformer for this key
keyTransformers = append(keyTransformers,
value.PrefixTransformer{
Transformer: fn(block),
storagevalue.PrefixTransformer{
Transformer: transformer,
Prefix: []byte(keyData.Name + ":"),
})
}
// Create a prefixTransformer which can choose between these keys
keyTransformer := value.NewPrefixTransformers(
keyTransformer := storagevalue.NewPrefixTransformers(
fmt.Errorf("no matching key was found for the provided AES transformer"), keyTransformers...)
// Create a PrefixTransformer which shall later be put in a list with other providers
result = value.PrefixTransformer{
result = storagevalue.PrefixTransformer{
Transformer: keyTransformer,
Prefix: []byte(prefix),
}
return result, nil
}
func secretboxPrefixTransformer(config *apiserverconfig.SecretboxConfiguration) (value.PrefixTransformer, error) {
var result value.PrefixTransformer
func secretboxPrefixTransformer(config *apiserverconfig.SecretboxConfiguration) (storagevalue.PrefixTransformer, error) {
var result storagevalue.PrefixTransformer
if len(config.Keys) == 0 {
return result, fmt.Errorf("secretbox provider has no valid keys")
@@ -448,7 +674,7 @@ func secretboxPrefixTransformer(config *apiserverconfig.SecretboxConfiguration)
}
}
keyTransformers := []value.PrefixTransformer{}
keyTransformers := []storagevalue.PrefixTransformer{}
for _, keyData := range config.Keys {
keyData := keyData
@@ -466,18 +692,18 @@ func secretboxPrefixTransformer(config *apiserverconfig.SecretboxConfiguration)
// Create a new PrefixTransformer for this key
keyTransformers = append(keyTransformers,
value.PrefixTransformer{
storagevalue.PrefixTransformer{
Transformer: secretbox.NewSecretboxTransformer(keyArray),
Prefix: []byte(keyData.Name + ":"),
})
}
// Create a prefixTransformer which can choose between these keys
keyTransformer := value.NewPrefixTransformers(
keyTransformer := storagevalue.NewPrefixTransformers(
fmt.Errorf("no matching key was found for the provided Secretbox transformer"), keyTransformers...)
// Create a PrefixTransformer which shall later be put in a list with other providers
result = value.PrefixTransformer{
result = storagevalue.PrefixTransformer{
Transformer: keyTransformer,
Prefix: []byte(secretboxTransformerPrefixV1),
}
@@ -497,16 +723,31 @@ type kmsState struct {
kmsTimeoutSum time.Duration
}
func kmsPrefixTransformer(config *apiserverconfig.KMSConfiguration, stopCh <-chan struct{}) (value.PrefixTransformer, healthChecker, *kmsState, error) {
// we ignore the cancel func because this context should only be canceled when stopCh is closed
ctx, _ := wait.ContextForChannel(stopCh)
// accumulate computes the KMS state by:
// - determining which KMS plugin versions are in use
// - calculating kmsTimeoutSum which is used as transformTracker.kmsCloseGracePeriod
// DynamicTransformers.Set waits for this period before closing old transformers after a config reload
func (s *kmsState) accumulate(other *kmsState) {
s.v1Used = s.v1Used || other.v1Used
s.v2Used = s.v2Used || other.v2Used
s.kmsTimeoutSum += other.kmsTimeoutSum
}
// kmsPrefixTransformer creates a KMS transformer and probe based on the given KMS config.
// It may launch multiple go routines whose lifecycle is controlled by ctx.
// In case of an error, the caller is responsible for canceling ctx to clean up any go routines that may have been launched.
func kmsPrefixTransformer(ctx context.Context, config *apiserverconfig.KMSConfiguration, apiServerID string) (storagevalue.PrefixTransformer, healthChecker, *kmsState, error) {
kmsName := config.Name
switch config.APIVersion {
case kmsAPIVersionV1:
if !utilfeature.DefaultFeatureGate.Enabled(features.KMSv1) {
return storagevalue.PrefixTransformer{}, nil, nil, fmt.Errorf("KMSv1 is deprecated and will only receive security updates going forward. Use KMSv2 instead. Set --feature-gates=KMSv1=true to use the deprecated KMSv1 feature.")
}
klog.InfoS("KMSv1 is deprecated and will only receive security updates going forward. Use KMSv2 instead.")
envelopeService, err := envelopeServiceFactory(ctx, config.Endpoint, config.Timeout.Duration)
if err != nil {
return value.PrefixTransformer{}, nil, nil, fmt.Errorf("could not configure KMSv1-Plugin's probe %q, error: %w", kmsName, err)
return storagevalue.PrefixTransformer{}, nil, nil, fmt.Errorf("could not configure KMSv1-Plugin's probe %q, error: %w", kmsName, err)
}
probe := &kmsPluginProbe{
@@ -527,12 +768,12 @@ func kmsPrefixTransformer(config *apiserverconfig.KMSConfiguration, stopCh <-cha
case kmsAPIVersionV2:
if !utilfeature.DefaultFeatureGate.Enabled(features.KMSv2) {
return value.PrefixTransformer{}, nil, nil, fmt.Errorf("could not configure KMSv2 plugin %q, KMSv2 feature is not enabled", kmsName)
return storagevalue.PrefixTransformer{}, nil, nil, fmt.Errorf("could not configure KMSv2 plugin %q, KMSv2 feature is not enabled", kmsName)
}
envelopeService, err := EnvelopeKMSv2ServiceFactory(ctx, config.Endpoint, config.Timeout.Duration)
envelopeService, err := EnvelopeKMSv2ServiceFactory(ctx, config.Endpoint, config.Name, config.Timeout.Duration)
if err != nil {
return value.PrefixTransformer{}, nil, nil, fmt.Errorf("could not configure KMSv2-Plugin's probe %q, error: %w", kmsName, err)
return storagevalue.PrefixTransformer{}, nil, nil, fmt.Errorf("could not configure KMSv2-Plugin's probe %q, error: %w", kmsName, err)
}
probe := &kmsv2PluginProbe{
@@ -541,11 +782,14 @@ func kmsPrefixTransformer(config *apiserverconfig.KMSConfiguration, stopCh <-cha
service: envelopeService,
l: &sync.Mutex{},
lastResponse: &kmsPluginHealthzResponse{},
apiServerID: apiServerID,
}
// initialize state so that Load always works
probe.state.Store(&envelopekmsv2.State{})
// using AES-GCM by default for encrypting data with KMSv2
transformer := value.PrefixTransformer{
Transformer: envelopekmsv2.NewEnvelopeTransformer(envelopeService, int(*config.CacheSize), aestransformer.NewGCMTransformer),
primeAndProbeKMSv2(ctx, probe, kmsName)
transformer := storagevalue.PrefixTransformer{
Transformer: envelopekmsv2.NewEnvelopeTransformer(envelopeService, kmsName, probe.getCurrentState, apiServerID),
Prefix: []byte(kmsTransformerPrefixV2 + kmsName + ":"),
}
@@ -555,28 +799,83 @@ func kmsPrefixTransformer(config *apiserverconfig.KMSConfiguration, stopCh <-cha
}, nil
default:
return value.PrefixTransformer{}, nil, nil, fmt.Errorf("could not configure KMS plugin %q, unsupported KMS API version %q", kmsName, config.APIVersion)
return storagevalue.PrefixTransformer{}, nil, nil, fmt.Errorf("could not configure KMS plugin %q, unsupported KMS API version %q", kmsName, config.APIVersion)
}
}
func envelopePrefixTransformer(config *apiserverconfig.KMSConfiguration, envelopeService envelope.Service, prefix string) value.PrefixTransformer {
baseTransformerFunc := func(block cipher.Block) value.Transformer {
func primeAndProbeKMSv2(ctx context.Context, probe *kmsv2PluginProbe, kmsName string) {
runProbeCheckAndLog := func(ctx context.Context, depth int) error {
if err := probe.check(ctx); err != nil {
klog.VDepth(1+depth, 2).ErrorS(err, "kms plugin failed health check probe", "name", kmsName)
return err
}
return nil
}
blockAndProbeFastUntilSuccess := func(ctx context.Context) {
_ = wait.PollUntilWithContext(
ctx,
kmsv2PluginHealthzNegativeInterval,
func(ctx context.Context) (bool, error) {
return runProbeCheckAndLog(ctx, 1) == nil, nil
},
)
}
// on the happy path where the plugin is healthy and available on server start,
// prime keyID and DEK by running the check inline once (this also prevents unit tests from flaking)
errPrime := runProbeCheckAndLog(ctx, 0)
// if our initial attempt to prime failed, start trying to get to a valid state in the background ASAP
// this prevents a slow start when the external healthz checker is configured to ignore the KMS healthz endpoint
// since we want to support the plugin starting up async with the API server, this error is not fatal
if errPrime != nil {
go blockAndProbeFastUntilSuccess(ctx) // separate go routine to avoid blocking
}
// make sure that the plugin's key ID is reasonably up-to-date
// also, make sure that our DEK is up-to-date to with said key ID (if it expires the server will fail all writes)
// if this background loop ever stops running, the server will become unfunctional after kmsv2PluginWriteDEKSourceMaxTTL
go wait.PollUntilWithContext(
ctx,
kmsv2PluginHealthzPositiveInterval,
func(ctx context.Context) (bool, error) {
if err := runProbeCheckAndLog(ctx, 0); err == nil {
return false, nil
}
// TODO add integration test for quicker error poll on failure
// if we fail, block the outer polling and start a new quicker poll inline
// this limits the chance that our DEK expires during a transient failure
blockAndProbeFastUntilSuccess(ctx)
return false, nil
})
}
func envelopePrefixTransformer(config *apiserverconfig.KMSConfiguration, envelopeService envelope.Service, prefix string) storagevalue.PrefixTransformer {
baseTransformerFunc := func(block cipher.Block) (storagevalue.Transformer, error) {
gcm, err := aestransformer.NewGCMTransformer(block)
if err != nil {
return nil, err
}
// v1.24: write using AES-CBC only but support reads via AES-CBC and AES-GCM (so we can move to AES-GCM)
// v1.25: write using AES-GCM only but support reads via AES-GCM and fallback to AES-CBC for backwards compatibility
// TODO(aramase): Post v1.25: We cannot drop CBC read support until we automate storage migration.
// We could have a release note that hard requires users to perform storage migration.
return unionTransformers{aestransformer.NewGCMTransformer(block), aestransformer.NewCBCTransformer(block)}
return unionTransformers{gcm, aestransformer.NewCBCTransformer(block)}, nil
}
return value.PrefixTransformer{
return storagevalue.PrefixTransformer{
Transformer: envelope.NewEnvelopeTransformer(envelopeService, int(*config.CacheSize), baseTransformerFunc),
Prefix: []byte(prefix + config.Name + ":"),
}
}
type unionTransformers []value.Transformer
type unionTransformers []storagevalue.Transformer
func (u unionTransformers) TransformFromStorage(ctx context.Context, data []byte, dataCtx value.Context) (out []byte, stale bool, err error) {
func (u unionTransformers) TransformFromStorage(ctx context.Context, data []byte, dataCtx storagevalue.Context) (out []byte, stale bool, err error) {
var errs []error
for i := range u {
transformer := u[i]
@@ -595,7 +894,7 @@ func (u unionTransformers) TransformFromStorage(ctx context.Context, data []byte
return nil, false, fmt.Errorf("unionTransformers: unable to transform from storage")
}
func (u unionTransformers) TransformToStorage(ctx context.Context, data []byte, dataCtx value.Context) (out []byte, err error) {
func (u unionTransformers) TransformToStorage(ctx context.Context, data []byte, dataCtx storagevalue.Context) (out []byte, err error) {
return u[0].TransformToStorage(ctx, data, dataCtx)
}
@@ -606,6 +905,7 @@ func computeEncryptionConfigHash(data []byte) string {
return fmt.Sprintf("%x", sha256.Sum256(data))
}
var _ storagevalue.ResourceTransformers = &DynamicTransformers{}
var _ healthz.HealthChecker = &DynamicTransformers{}
// DynamicTransformers holds transformers that may be dynamically updated via a single external actor, likely a controller.
@@ -615,7 +915,7 @@ type DynamicTransformers struct {
}
type transformTracker struct {
transformerOverrides map[schema.GroupResource]value.Transformer
transformerOverrides map[schema.GroupResource]storagevalue.Transformer
kmsPluginHealthzCheck healthz.HealthChecker
closeTransformers context.CancelFunc
kmsCloseGracePeriod time.Duration
@@ -623,7 +923,7 @@ type transformTracker struct {
// NewDynamicTransformers returns transformers, health checks for kms providers and an ability to close transformers.
func NewDynamicTransformers(
transformerOverrides map[schema.GroupResource]value.Transformer,
transformerOverrides map[schema.GroupResource]storagevalue.Transformer,
kmsPluginHealthzCheck healthz.HealthChecker,
closeTransformers context.CancelFunc,
kmsCloseGracePeriod time.Duration,
@@ -654,7 +954,7 @@ func (d *DynamicTransformers) Name() string {
}
// TransformerForResource returns the transformer for the given resource.
func (d *DynamicTransformers) TransformerForResource(resource schema.GroupResource) value.Transformer {
func (d *DynamicTransformers) TransformerForResource(resource schema.GroupResource) storagevalue.Transformer {
return &resourceTransformer{
resource: resource,
transformTracker: d.transformTracker,
@@ -663,7 +963,7 @@ func (d *DynamicTransformers) TransformerForResource(resource schema.GroupResour
// Set sets the transformer overrides. This method is not go routine safe and must only be called by the same, single caller throughout the lifetime of this object.
func (d *DynamicTransformers) Set(
transformerOverrides map[schema.GroupResource]value.Transformer,
transformerOverrides map[schema.GroupResource]storagevalue.Transformer,
closeTransformers context.CancelFunc,
kmsPluginHealthzCheck healthz.HealthChecker,
kmsCloseGracePeriod time.Duration,
@@ -688,43 +988,61 @@ func (d *DynamicTransformers) Set(
}()
}
var _ value.Transformer = &resourceTransformer{}
var _ storagevalue.Transformer = &resourceTransformer{}
type resourceTransformer struct {
resource schema.GroupResource
transformTracker *atomic.Value
}
func (r *resourceTransformer) TransformFromStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, bool, error) {
func (r *resourceTransformer) TransformFromStorage(ctx context.Context, data []byte, dataCtx storagevalue.Context) ([]byte, bool, error) {
return r.transformer().TransformFromStorage(ctx, data, dataCtx)
}
func (r *resourceTransformer) TransformToStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, error) {
func (r *resourceTransformer) TransformToStorage(ctx context.Context, data []byte, dataCtx storagevalue.Context) ([]byte, error) {
return r.transformer().TransformToStorage(ctx, data, dataCtx)
}
func (r *resourceTransformer) transformer() value.Transformer {
transformer := r.transformTracker.Load().(*transformTracker).transformerOverrides[r.resource]
if transformer == nil {
return identity.NewEncryptCheckTransformer()
func (r *resourceTransformer) transformer() storagevalue.Transformer {
return transformerFromOverrides(r.transformTracker.Load().(*transformTracker).transformerOverrides, r.resource)
}
var _ storagevalue.ResourceTransformers = &StaticTransformers{}
type StaticTransformers map[schema.GroupResource]storagevalue.Transformer
func (s StaticTransformers) TransformerForResource(resource schema.GroupResource) storagevalue.Transformer {
return transformerFromOverrides(s, resource)
}
var anyGroupAnyResource = schema.GroupResource{
Group: "*",
Resource: "*",
}
func transformerFromOverrides(transformerOverrides map[schema.GroupResource]storagevalue.Transformer, resource schema.GroupResource) storagevalue.Transformer {
if transformer := transformerOverrides[resource]; transformer != nil {
return transformer
}
return transformer
}
type ResourceTransformers interface {
TransformerForResource(resource schema.GroupResource) value.Transformer
}
var _ ResourceTransformers = &DynamicTransformers{}
var _ ResourceTransformers = &StaticTransformers{}
type StaticTransformers map[schema.GroupResource]value.Transformer
// StaticTransformers
func (s StaticTransformers) TransformerForResource(resource schema.GroupResource) value.Transformer {
transformer := s[resource]
if transformer == nil {
return identity.NewEncryptCheckTransformer()
if transformer := transformerOverrides[schema.GroupResource{
Group: resource.Group,
Resource: "*",
}]; transformer != nil {
return transformer
}
return transformer
if transformer := transformerOverrides[anyGroupAnyResource]; transformer != nil {
return transformer
}
return identity.NewEncryptCheckTransformer()
}
func grYAMLString(gr schema.GroupResource) string {
if gr.Group == "" && gr.Resource == "*" {
return "*."
}
return gr.String()
}

View File

@@ -20,13 +20,14 @@ import (
"context"
"fmt"
"net/http"
"sync"
"time"
"github.com/fsnotify/fsnotify"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apiserver/pkg/server/healthz"
"k8s.io/apiserver/pkg/server/options/encryptionconfig"
"k8s.io/apiserver/pkg/server/options/encryptionconfig/metrics"
"k8s.io/client-go/util/workqueue"
"k8s.io/klog/v2"
)
@@ -34,8 +35,11 @@ import (
// workqueueKey is the dummy key used to process change in encryption config file.
const workqueueKey = "key"
// DynamicKMSEncryptionConfigContent which can dynamically handle changes in encryption config file.
type DynamicKMSEncryptionConfigContent struct {
// EncryptionConfigFileChangePollDuration is exposed so that integration tests can crank up the reload speed.
var EncryptionConfigFileChangePollDuration = time.Minute
// DynamicEncryptionConfigContent which can dynamically handle changes in encryption config file.
type DynamicEncryptionConfigContent struct {
name string
// filePath is the path of the file to read.
@@ -50,105 +54,90 @@ type DynamicKMSEncryptionConfigContent struct {
// dynamicTransformers updates the transformers when encryption config file changes.
dynamicTransformers *encryptionconfig.DynamicTransformers
// stopCh used here is a lifecycle signal of genericapiserver already drained while shutting down.
stopCh <-chan struct{}
// identity of the api server
apiServerID string
// can be swapped during testing
getEncryptionConfigHash func(ctx context.Context, filepath string) (string, error)
loadEncryptionConfig func(ctx context.Context, filepath string, reload bool, apiServerID string) (*encryptionconfig.EncryptionConfiguration, error)
}
// NewDynamicKMSEncryptionConfiguration returns controller that dynamically reacts to changes in encryption config file.
func NewDynamicKMSEncryptionConfiguration(
func init() {
metrics.RegisterMetrics()
}
// NewDynamicEncryptionConfiguration returns controller that dynamically reacts to changes in encryption config file.
func NewDynamicEncryptionConfiguration(
name, filePath string,
dynamicTransformers *encryptionconfig.DynamicTransformers,
configContentHash string,
stopCh <-chan struct{},
) *DynamicKMSEncryptionConfigContent {
encryptionConfig := &DynamicKMSEncryptionConfigContent{
apiServerID string,
) *DynamicEncryptionConfigContent {
return &DynamicEncryptionConfigContent{
name: name,
filePath: filePath,
lastLoadedEncryptionConfigHash: configContentHash,
dynamicTransformers: dynamicTransformers,
stopCh: stopCh,
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), fmt.Sprintf("%s-hot-reload", name)),
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), name),
apiServerID: apiServerID,
getEncryptionConfigHash: func(_ context.Context, filepath string) (string, error) {
return encryptionconfig.GetEncryptionConfigHash(filepath)
},
loadEncryptionConfig: encryptionconfig.LoadEncryptionConfig,
}
encryptionConfig.queue.Add(workqueueKey)
return encryptionConfig
}
// Run starts the controller and blocks until stopCh is closed.
func (d *DynamicKMSEncryptionConfigContent) Run(ctx context.Context) {
// Run starts the controller and blocks until ctx is canceled.
func (d *DynamicEncryptionConfigContent) Run(ctx context.Context) {
defer utilruntime.HandleCrash()
defer d.queue.ShutDown()
klog.InfoS("Starting controller", "name", d.name)
defer klog.InfoS("Shutting down controller", "name", d.name)
// start worker for processing content
go wait.Until(d.runWorker, time.Second, ctx.Done())
var wg sync.WaitGroup
// start the loop that watches the encryption config file until stopCh is closed.
go wait.Until(func() {
if err := d.watchEncryptionConfigFile(ctx.Done()); err != nil {
// if there is an error while setting up or handling the watches, this will ensure that we will process the config file.
defer d.queue.Add(workqueueKey)
klog.ErrorS(err, "Failed to watch encryption config file, will retry later")
}
}, time.Second, ctx.Done())
wg.Add(1)
go func() {
defer utilruntime.HandleCrash()
defer wg.Done()
defer d.queue.ShutDown()
<-ctx.Done()
}()
<-ctx.Done()
}
wg.Add(1)
go func() {
defer utilruntime.HandleCrash()
defer wg.Done()
d.runWorker(ctx)
}()
func (d *DynamicKMSEncryptionConfigContent) watchEncryptionConfigFile(stopCh <-chan struct{}) error {
watcher, err := fsnotify.NewWatcher()
if err != nil {
return fmt.Errorf("error creating fsnotify watcher: %w", err)
}
defer watcher.Close()
// this function polls changes in the encryption config file by placing a dummy key in the queue.
// the 'runWorker' function then picks up this dummy key and processes the changes.
// the goroutine terminates when 'ctx' is canceled.
_ = wait.PollUntilContextCancel(
ctx,
EncryptionConfigFileChangePollDuration,
true,
func(ctx context.Context) (bool, error) {
// add dummy item to the queue to trigger file content processing.
d.queue.Add(workqueueKey)
if err = watcher.Add(d.filePath); err != nil {
return fmt.Errorf("error adding watch for file %s: %w", d.filePath, err)
}
// return false to continue polling.
return false, nil
},
)
for {
select {
case event := <-watcher.Events:
if err := d.handleWatchEvent(event, watcher); err != nil {
return err
}
case err := <-watcher.Errors:
return fmt.Errorf("received fsnotify error: %w", err)
case <-stopCh:
return nil
}
}
}
func (d *DynamicKMSEncryptionConfigContent) handleWatchEvent(event fsnotify.Event, watcher *fsnotify.Watcher) error {
// This should be executed after restarting the watch (if applicable) to ensure no file event will be missing.
defer d.queue.Add(workqueueKey)
// return if file has not been removed or renamed.
if event.Op&(fsnotify.Remove|fsnotify.Rename) == 0 {
return nil
}
if err := watcher.Remove(d.filePath); err != nil {
klog.V(2).InfoS("Failed to remove file watch, it may have been deleted", "file", d.filePath, "err", err)
}
if err := watcher.Add(d.filePath); err != nil {
return fmt.Errorf("error adding watch for file %s: %w", d.filePath, err)
}
return nil
wg.Wait()
}
// runWorker to process file content
func (d *DynamicKMSEncryptionConfigContent) runWorker() {
for d.processNextWorkItem() {
func (d *DynamicEncryptionConfigContent) runWorker(ctx context.Context) {
for d.processNextWorkItem(ctx) {
}
}
// processNextWorkItem processes file content when there is a message in the queue.
func (d *DynamicKMSEncryptionConfigContent) processNextWorkItem() bool {
func (d *DynamicEncryptionConfigContent) processNextWorkItem(serverCtx context.Context) bool {
// key here is dummy item in the queue to trigger file content processing.
key, quit := d.queue.Get()
if quit {
@@ -156,6 +145,12 @@ func (d *DynamicKMSEncryptionConfigContent) processNextWorkItem() bool {
}
defer d.queue.Done(key)
d.processWorkItem(serverCtx, key)
return true
}
func (d *DynamicEncryptionConfigContent) processWorkItem(serverCtx context.Context, workqueueKey interface{}) {
var (
updatedEffectiveConfig bool
err error
@@ -163,39 +158,45 @@ func (d *DynamicKMSEncryptionConfigContent) processNextWorkItem() bool {
configChanged bool
)
// get context to close the new transformers.
ctx, closeTransformers := wait.ContextForChannel(d.stopCh)
// get context to close the new transformers (on error cases and on the next reload)
// serverCtx is attached to the API server's lifecycle so we will always close transformers on shut down
ctx, closeTransformers := context.WithCancel(serverCtx)
defer func() {
// TODO: increment success metric when updatedEffectiveConfig=true
// TODO can work queue metrics help here?
if !updatedEffectiveConfig {
// avoid leaking if we're not using the newly constructed transformers (due to an error or them not being changed)
closeTransformers()
}
if updatedEffectiveConfig && err == nil {
metrics.RecordEncryptionConfigAutomaticReloadSuccess(d.apiServerID)
}
if err != nil {
// TODO: increment failure metric
metrics.RecordEncryptionConfigAutomaticReloadFailure(d.apiServerID)
utilruntime.HandleError(fmt.Errorf("error processing encryption config file %s: %v", d.filePath, err))
// add dummy item back to the queue to trigger file content processing.
d.queue.AddRateLimited(key)
d.queue.AddRateLimited(workqueueKey)
}
}()
encryptionConfiguration, configChanged, err = d.processEncryptionConfig(ctx)
if err != nil {
return true
return
}
if !configChanged {
return true
return
}
if len(encryptionConfiguration.HealthChecks) != 1 {
err = fmt.Errorf("unexpected number of healthz checks: %d. Should have only one", len(encryptionConfiguration.HealthChecks))
return true
return
}
// get healthz checks for all new KMS plugins.
if err = d.validateNewTransformersHealth(ctx, encryptionConfiguration.HealthChecks[0], encryptionConfiguration.KMSCloseGracePeriod); err != nil {
return true
return
}
// update transformers.
@@ -212,30 +213,44 @@ func (d *DynamicKMSEncryptionConfigContent) processNextWorkItem() bool {
klog.V(2).InfoS("Loaded new kms encryption config content", "name", d.name)
updatedEffectiveConfig = true
return true
}
// loadEncryptionConfig processes the next set of content from the file.
func (d *DynamicKMSEncryptionConfigContent) processEncryptionConfig(ctx context.Context) (
encryptionConfiguration *encryptionconfig.EncryptionConfiguration,
func (d *DynamicEncryptionConfigContent) processEncryptionConfig(ctx context.Context) (
_ *encryptionconfig.EncryptionConfiguration,
configChanged bool,
err error,
_ error,
) {
// this code path will only execute if reload=true. So passing true explicitly.
encryptionConfiguration, err = encryptionconfig.LoadEncryptionConfig(d.filePath, true, ctx.Done())
contentHash, err := d.getEncryptionConfigHash(ctx, d.filePath)
if err != nil {
return nil, false, err
}
// check if encryptionConfig is different from the current. Do nothing if they are the same.
if encryptionConfiguration.EncryptionFileContentHash == d.lastLoadedEncryptionConfigHash {
klog.V(4).InfoS("Encryption config has not changed", "name", d.name)
if contentHash == d.lastLoadedEncryptionConfigHash {
klog.V(4).InfoS("Encryption config has not changed (before load)", "name", d.name)
return nil, false, nil
}
// this code path will only execute if reload=true. So passing true explicitly.
encryptionConfiguration, err := d.loadEncryptionConfig(ctx, d.filePath, true, d.apiServerID)
if err != nil {
return nil, false, err
}
// check if encryptionConfig is different from the current (again to avoid TOCTOU). Do nothing if they are the same.
if encryptionConfiguration.EncryptionFileContentHash == d.lastLoadedEncryptionConfigHash {
klog.V(4).InfoS("Encryption config has not changed (after load)", "name", d.name)
return nil, false, nil
}
return encryptionConfiguration, true, nil
}
func (d *DynamicKMSEncryptionConfigContent) validateNewTransformersHealth(
// minKMSPluginCloseGracePeriod can be lowered in unit tests to make the health check poll faster
var minKMSPluginCloseGracePeriod = 10 * time.Second
func (d *DynamicEncryptionConfigContent) validateNewTransformersHealth(
ctx context.Context,
kmsPluginHealthzCheck healthz.HealthChecker,
kmsPluginCloseGracePeriod time.Duration,
@@ -243,11 +258,16 @@ func (d *DynamicKMSEncryptionConfigContent) validateNewTransformersHealth(
// test if new transformers are healthy
var healthCheckError error
if kmsPluginCloseGracePeriod < 10*time.Second {
kmsPluginCloseGracePeriod = 10 * time.Second
if kmsPluginCloseGracePeriod < minKMSPluginCloseGracePeriod {
kmsPluginCloseGracePeriod = minKMSPluginCloseGracePeriod
}
pollErr := wait.PollImmediate(100*time.Millisecond, kmsPluginCloseGracePeriod, func() (bool, error) {
// really make sure that the immediate check does not hang
var cancel context.CancelFunc
ctx, cancel = context.WithTimeout(ctx, kmsPluginCloseGracePeriod)
defer cancel()
pollErr := wait.PollImmediateWithContext(ctx, 100*time.Millisecond, kmsPluginCloseGracePeriod, func(ctx context.Context) (bool, error) {
// create a fake http get request to health check endpoint
req, err := http.NewRequestWithContext(ctx, http.MethodGet, fmt.Sprintf("/healthz/%s", kmsPluginHealthzCheck.Name()), nil)
if err != nil {

View File

@@ -0,0 +1,111 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"crypto/sha256"
"fmt"
"hash"
"sync"
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
)
const (
namespace = "apiserver"
subsystem = "encryption_config_controller"
)
var (
encryptionConfigAutomaticReloadFailureTotal = metrics.NewCounterVec(
&metrics.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "automatic_reload_failures_total",
Help: "Total number of failed automatic reloads of encryption configuration split by apiserver identity.",
StabilityLevel: metrics.ALPHA,
},
[]string{"apiserver_id_hash"},
)
encryptionConfigAutomaticReloadSuccessTotal = metrics.NewCounterVec(
&metrics.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "automatic_reload_success_total",
Help: "Total number of successful automatic reloads of encryption configuration split by apiserver identity.",
StabilityLevel: metrics.ALPHA,
},
[]string{"apiserver_id_hash"},
)
encryptionConfigAutomaticReloadLastTimestampSeconds = metrics.NewGaugeVec(
&metrics.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "automatic_reload_last_timestamp_seconds",
Help: "Timestamp of the last successful or failed automatic reload of encryption configuration split by apiserver identity.",
StabilityLevel: metrics.ALPHA,
},
[]string{"status", "apiserver_id_hash"},
)
)
var registerMetrics sync.Once
var hashPool *sync.Pool
func RegisterMetrics() {
registerMetrics.Do(func() {
hashPool = &sync.Pool{
New: func() interface{} {
return sha256.New()
},
}
legacyregistry.MustRegister(encryptionConfigAutomaticReloadFailureTotal)
legacyregistry.MustRegister(encryptionConfigAutomaticReloadSuccessTotal)
legacyregistry.MustRegister(encryptionConfigAutomaticReloadLastTimestampSeconds)
})
}
func RecordEncryptionConfigAutomaticReloadFailure(apiServerID string) {
apiServerIDHash := getHash(apiServerID)
encryptionConfigAutomaticReloadFailureTotal.WithLabelValues(apiServerIDHash).Inc()
recordEncryptionConfigAutomaticReloadTimestamp("failure", apiServerIDHash)
}
func RecordEncryptionConfigAutomaticReloadSuccess(apiServerID string) {
apiServerIDHash := getHash(apiServerID)
encryptionConfigAutomaticReloadSuccessTotal.WithLabelValues(apiServerIDHash).Inc()
recordEncryptionConfigAutomaticReloadTimestamp("success", apiServerIDHash)
}
func recordEncryptionConfigAutomaticReloadTimestamp(result, apiServerIDHash string) {
encryptionConfigAutomaticReloadLastTimestampSeconds.WithLabelValues(result, apiServerIDHash).SetToCurrentTime()
}
func getHash(data string) string {
if len(data) == 0 {
return ""
}
h := hashPool.Get().(hash.Hash)
h.Reset()
h.Write([]byte(data))
dataHash := fmt.Sprintf("sha256:%x", h.Sum(nil))
hashPool.Put(h)
return dataHash
}