Upgrade k8s package verison (#5358)

* upgrade k8s package version

Signed-off-by: hongzhouzi <hongzhouzi@kubesphere.io>

* Script upgrade and code formatting.

Signed-off-by: hongzhouzi <hongzhouzi@kubesphere.io>

Signed-off-by: hongzhouzi <hongzhouzi@kubesphere.io>
This commit is contained in:
hongzhouzi
2022-11-15 14:56:38 +08:00
committed by GitHub
parent 5f91c1663a
commit 44167aa47a
3106 changed files with 321340 additions and 172080 deletions

View File

@@ -1,25 +1,21 @@
# See the OWNERS docs at https://go.k8s.io/owners
approvers:
- lavalamp
- liggitt
- timothysc
- wojtek-t
- xiang90
- lavalamp
- liggitt
- wojtek-t
reviewers:
- lavalamp
- smarterclayton
- wojtek-t
- deads2k
- caesarxuchao
- mikedanese
- liggitt
- ncdc
- timothysc
- hongchaodeng
- krousey
- xiang90
- mml
- ingvagabund
- resouer
- enj
- lavalamp
- smarterclayton
- wojtek-t
- deads2k
- caesarxuchao
- mikedanese
- liggitt
- ncdc
- ingvagabund
- enj
- stevekuznetsov
emeritus_approvers:
- xiang90
- timothysc

View File

@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
package etcd3
package storage
import (
"fmt"
@@ -23,7 +23,6 @@ import (
"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/validation/field"
"k8s.io/apiserver/pkg/storage"
)
// APIObjectVersioner implements versioning and extracting etcd node information
@@ -60,7 +59,7 @@ func (a APIObjectVersioner) UpdateList(obj runtime.Object, resourceVersion uint6
return nil
}
// PrepareObjectForStorage clears resource version and self link prior to writing to etcd.
// PrepareObjectForStorage clears resourceVersion and selfLink prior to writing to etcd.
func (a APIObjectVersioner) PrepareObjectForStorage(obj runtime.Object) error {
accessor, err := meta.Accessor(obj)
if err != nil {
@@ -94,7 +93,7 @@ func (a APIObjectVersioner) ParseResourceVersion(resourceVersion string) (uint64
}
version, err := strconv.ParseUint(resourceVersion, 10, 64)
if err != nil {
return 0, storage.NewInvalidError(field.ErrorList{
return 0, NewInvalidError(field.ErrorList{
// Validation errors are supposed to return version-specific field
// paths, but this is probably close enough.
field.Invalid(field.NewPath("resourceVersion"), resourceVersion, err.Error()),
@@ -104,17 +103,17 @@ func (a APIObjectVersioner) ParseResourceVersion(resourceVersion string) (uint64
}
// Versioner implements Versioner
var Versioner storage.Versioner = APIObjectVersioner{}
var _ Versioner = APIObjectVersioner{}
// CompareResourceVersion compares etcd resource versions. Outside this API they are all strings,
// but etcd resource versions are special, they're actually ints, so we can easily compare them.
func (a APIObjectVersioner) CompareResourceVersion(lhs, rhs runtime.Object) int {
lhsVersion, err := Versioner.ObjectResourceVersion(lhs)
lhsVersion, err := a.ObjectResourceVersion(lhs)
if err != nil {
// coder error
panic(err)
}
rhsVersion, err := Versioner.ObjectResourceVersion(rhs)
rhsVersion, err := a.ObjectResourceVersion(rhs)
if err != nil {
// coder error
panic(err)

View File

@@ -31,20 +31,23 @@ import (
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/clock"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apimachinery/pkg/watch"
endpointsrequest "k8s.io/apiserver/pkg/endpoints/request"
"k8s.io/apiserver/pkg/features"
"k8s.io/apiserver/pkg/storage"
"k8s.io/apiserver/pkg/storage/cacher/metrics"
utilfeature "k8s.io/apiserver/pkg/util/feature"
utilflowcontrol "k8s.io/apiserver/pkg/util/flowcontrol"
"k8s.io/client-go/tools/cache"
"k8s.io/klog/v2"
"k8s.io/utils/clock"
utiltrace "k8s.io/utils/trace"
)
var (
emptyFunc = func() {}
emptyFunc = func(bool) {}
)
const (
@@ -144,6 +147,10 @@ func (i *indexedWatchers) deleteWatcher(number int, value string, supported bool
}
func (i *indexedWatchers) terminateAll(objectType reflect.Type, done func(*cacheWatcher)) {
// note that we don't have to call setDrainInputBufferLocked method on the watchers
// because we take advantage of the default value - stop immediately
// also watchers that have had already its draining strategy set
// are no longer available (they were removed from the allWatchers and the valueWatchers maps)
if len(i.allWatchers) > 0 || len(i.valueWatchers) > 0 {
klog.Warningf("Terminating all watchers from cacher %v", objectType)
}
@@ -180,6 +187,10 @@ func newTimeBucketWatchers(clock clock.Clock, bookmarkFrequency time.Duration) *
// adds a watcher to the bucket, if the deadline is before the start, it will be
// added to the first one.
func (t *watcherBookmarkTimeBuckets) addWatcher(w *cacheWatcher) bool {
// note that the returned time can be before t.createTime,
// especially in cases when the nextBookmarkTime method
// give us the zero value of type Time
// so buckedID can hold a negative value
nextTime, ok := w.nextBookmarkTime(t.clock.Now(), t.bookmarkFrequency)
if !ok {
return false
@@ -190,7 +201,7 @@ func (t *watcherBookmarkTimeBuckets) addWatcher(w *cacheWatcher) bool {
if bucketID < t.startBucketID {
bucketID = t.startBucketID
}
watchers, _ := t.watchersBuckets[bucketID]
watchers := t.watchersBuckets[bucketID]
t.watchersBuckets[bucketID] = append(watchers, w)
return true
}
@@ -230,6 +241,8 @@ type Cacher struct {
// Incoming events that should be dispatched to watchers.
incoming chan watchCacheEvent
resourcePrefix string
sync.RWMutex
// Before accessing the cacher's cache, wait for the ready to be ok.
@@ -292,6 +305,8 @@ type Cacher struct {
watchersToStop []*cacheWatcher
// Maintain a timeout queue to send the bookmark event before the watcher times out.
bookmarkWatchers *watcherBookmarkTimeBuckets
// expiredBookmarkWatchers is a list of watchers that were expired and need to be schedule for a next bookmark event
expiredBookmarkWatchers []*cacheWatcher
}
// NewCacherFromConfig creates a new Cacher responsible for servicing WATCH and LIST requests from
@@ -328,6 +343,7 @@ func NewCacherFromConfig(config Config) (*Cacher, error) {
}
objType := reflect.TypeOf(obj)
cacher := &Cacher{
resourcePrefix: config.ResourcePrefix,
ready: newReady(),
storage: config.Storage,
objectType: objType,
@@ -341,7 +357,7 @@ func NewCacherFromConfig(config Config) (*Cacher, error) {
},
// TODO: Figure out the correct value for the buffer size.
incoming: make(chan watchCacheEvent, 100),
dispatchTimeoutBudget: newTimeBudget(stopCh),
dispatchTimeoutBudget: newTimeBudget(),
// We need to (potentially) stop both:
// - wait.Until go-routine
// - reflector.ListAndWatch
@@ -369,6 +385,10 @@ func NewCacherFromConfig(config Config) (*Cacher, error) {
// Configure reflector's pager to for an appropriate pagination chunk size for fetching data from
// storage. The pager falls back to full list if paginated list calls fail due to an "Expired" error.
reflector.WatchListPageSize = storageWatchListPageSize
// When etcd loses leader for 3 cycles, it returns error "no leader".
// We don't want to terminate all watchers as recreating all watchers puts high load on api-server.
// In most of the cases, leader is reelected within few cycles.
reflector.MaxInternalErrorRetryDuration = time.Second * 30
cacher.watchCache = watchCache
cacher.reflector = reflector
@@ -403,6 +423,7 @@ func (c *Cacher) startCaching(stopChannel <-chan struct{}) {
successfulList = true
c.ready.set(true)
klog.V(1).Infof("cacher (%v): initialized", c.objectType.String())
metrics.WatchCacheInitializations.WithLabelValues(c.objectType.String()).Inc()
})
defer func() {
if successfulList {
@@ -456,7 +477,9 @@ func (c *Cacher) Watch(ctx context.Context, key string, opts storage.ListOptions
return nil, err
}
c.ready.wait()
if err := c.ready.wait(); err != nil {
return nil, errors.NewServiceUnavailable(err.Error())
}
triggerValue, triggerSupported := "", false
if c.indexedTrigger != nil {
@@ -469,21 +492,12 @@ func (c *Cacher) Watch(ctx context.Context, key string, opts storage.ListOptions
}
}
// If there is indexedTrigger defined, but triggerSupported is false,
// we can't narrow the amount of events significantly at this point.
//
// That said, currently indexedTrigger is defined only for couple resources:
// Pods, Nodes, Secrets and ConfigMaps and there is only a constant
// number of watchers for which triggerSupported is false (excluding those
// issued explicitly by users).
// Thus, to reduce the risk of those watchers blocking all watchers of a
// given resource in the system, we increase the sizes of buffers for them.
chanSize := 10
if c.indexedTrigger != nil && !triggerSupported {
// TODO: We should tune this value and ideally make it dependent on the
// number of objects of a given type and/or their churn.
chanSize = 1000
}
// It boils down to a tradeoff between:
// - having it as small as possible to reduce memory usage
// - having it large enough to ensure that watchers that need to process
// a bunch of changes have enough buffer to avoid from blocking other
// watchers on our watcher having a processing hiccup
chanSize := c.watchCache.suggestedWatchChannelSize(c.indexedTrigger != nil, triggerSupported)
// Determine watch timeout('0' means deadline is not set, ignore checking)
deadline, _ := ctx.Deadline()
@@ -503,7 +517,7 @@ func (c *Cacher) Watch(ctx context.Context, key string, opts storage.ListOptions
// underlying watchCache is calling processEvent under its lock.
c.watchCache.RLock()
defer c.watchCache.RUnlock()
initEvents, err := c.watchCache.GetAllEventsSinceThreadUnsafe(watchRV)
cacheInterval, err := c.watchCache.getAllEventsSinceLocked(watchRV)
if err != nil {
// To match the uncached watch implementation, once we have passed authn/authz/admission,
// and successfully parsed a resource version, other errors must fail with a watch event of type ERROR,
@@ -511,18 +525,11 @@ func (c *Cacher) Watch(ctx context.Context, key string, opts storage.ListOptions
return newErrWatcher(err), nil
}
// With some events already sent, update resourceVersion so that
// events that were buffered and not yet processed won't be delivered
// to this watcher second time causing going back in time.
if len(initEvents) > 0 {
watchRV = initEvents[len(initEvents)-1].ResourceVersion
}
func() {
c.Lock()
defer c.Unlock()
// Update watcher.forget function once we can compute it.
watcher.forget = forgetWatcher(c, c.watcherIdx, triggerValue, triggerSupported)
watcher.forget = forgetWatcher(c, watcher, c.watcherIdx, triggerValue, triggerSupported)
c.watchers.addWatcher(watcher, c.watcherIdx, triggerValue, triggerSupported)
// Add it to the queue only when the client support watch bookmarks.
@@ -532,15 +539,10 @@ func (c *Cacher) Watch(ctx context.Context, key string, opts storage.ListOptions
c.watcherIdx++
}()
go watcher.process(ctx, initEvents, watchRV)
go watcher.processInterval(ctx, cacheInterval, watchRV)
return watcher, nil
}
// WatchList implements storage.Interface.
func (c *Cacher) WatchList(ctx context.Context, key string, opts storage.ListOptions) (watch.Interface, error) {
return c.Watch(ctx, key, opts)
}
// Get implements storage.Interface.
func (c *Cacher) Get(ctx context.Context, key string, opts storage.GetOptions, objPtr runtime.Object) error {
if opts.ResourceVersion == "" {
@@ -565,7 +567,9 @@ func (c *Cacher) Get(ctx context.Context, key string, opts storage.GetOptions, o
// Do not create a trace - it's not for free and there are tons
// of Get requests. We can add it if it will be really needed.
c.ready.wait()
if err := c.ready.wait(); err != nil {
return errors.NewServiceUnavailable(err.Error())
}
objVal, err := conversion.EnforcePtr(objPtr)
if err != nil {
@@ -592,95 +596,46 @@ func (c *Cacher) Get(ctx context.Context, key string, opts storage.GetOptions, o
return nil
}
// GetToList implements storage.Interface.
func (c *Cacher) GetToList(ctx context.Context, key string, opts storage.ListOptions, listObj runtime.Object) error {
// NOTICE: Keep in sync with shouldListFromStorage function in
//
// staging/src/k8s.io/apiserver/pkg/util/flowcontrol/request/list_work_estimator.go
func shouldDelegateList(opts storage.ListOptions) bool {
resourceVersion := opts.ResourceVersion
pred := opts.Predicate
pagingEnabled := utilfeature.DefaultFeatureGate.Enabled(features.APIListChunking)
hasContinuation := pagingEnabled && len(pred.Continue) > 0
hasLimit := pagingEnabled && pred.Limit > 0 && resourceVersion != "0"
if resourceVersion == "" || hasContinuation || hasLimit || opts.ResourceVersionMatch == metav1.ResourceVersionMatchExact {
// If resourceVersion is not specified, serve it from underlying
// storage (for backward compatibility). If a continuation is
// requested, serve it from the underlying storage as well.
// Limits are only sent to storage when resourceVersion is non-zero
// since the watch cache isn't able to perform continuations, and
// limits are ignored when resource version is zero
return c.storage.GetToList(ctx, key, opts, listObj)
}
// If resourceVersion is specified, serve it from cache.
// It's guaranteed that the returned value is at least that
// fresh as the given resourceVersion.
listRV, err := c.versioner.ParseResourceVersion(resourceVersion)
if err != nil {
return err
}
if listRV == 0 && !c.ready.check() {
// If Cacher is not yet initialized and we don't require any specific
// minimal resource version, simply forward the request to storage.
return c.storage.GetToList(ctx, key, opts, listObj)
}
trace := utiltrace.New("cacher list", utiltrace.Field{"type", c.objectType.String()})
defer trace.LogIfLong(500 * time.Millisecond)
c.ready.wait()
trace.Step("Ready")
// List elements with at least 'listRV' from cache.
listPtr, err := meta.GetItemsPtr(listObj)
if err != nil {
return err
}
listVal, err := conversion.EnforcePtr(listPtr)
if err != nil {
return err
}
if listVal.Kind() != reflect.Slice {
return fmt.Errorf("need a pointer to slice, got %v", listVal.Kind())
}
filter := filterWithAttrsFunction(key, pred)
obj, exists, readResourceVersion, err := c.watchCache.WaitUntilFreshAndGet(listRV, key, trace)
if err != nil {
return err
}
trace.Step("Got from cache")
if exists {
elem, ok := obj.(*storeElement)
if !ok {
return fmt.Errorf("non *storeElement returned from storage: %v", obj)
}
if filter(elem.Key, elem.Labels, elem.Fields) {
listVal.Set(reflect.Append(listVal, reflect.ValueOf(elem.Object).Elem()))
}
}
if c.versioner != nil {
if err := c.versioner.UpdateList(listObj, readResourceVersion, "", nil); err != nil {
return err
}
}
return nil
// If resourceVersion is not specified, serve it from underlying
// storage (for backward compatibility). If a continuation is
// requested, serve it from the underlying storage as well.
// Limits are only sent to storage when resourceVersion is non-zero
// since the watch cache isn't able to perform continuations, and
// limits are ignored when resource version is zero
return resourceVersion == "" || hasContinuation || hasLimit || opts.ResourceVersionMatch == metav1.ResourceVersionMatchExact
}
// List implements storage.Interface.
func (c *Cacher) List(ctx context.Context, key string, opts storage.ListOptions, listObj runtime.Object) error {
func (c *Cacher) listItems(listRV uint64, key string, pred storage.SelectionPredicate, trace *utiltrace.Trace, recursive bool) ([]interface{}, uint64, string, error) {
if !recursive {
obj, exists, readResourceVersion, err := c.watchCache.WaitUntilFreshAndGet(listRV, key, trace)
if err != nil {
return nil, 0, "", err
}
if exists {
return []interface{}{obj}, readResourceVersion, "", nil
}
return nil, readResourceVersion, "", nil
}
return c.watchCache.WaitUntilFreshAndList(listRV, pred.MatcherIndex(), trace)
}
// GetList implements storage.Interface
func (c *Cacher) GetList(ctx context.Context, key string, opts storage.ListOptions, listObj runtime.Object) error {
recursive := opts.Recursive
resourceVersion := opts.ResourceVersion
pred := opts.Predicate
pagingEnabled := utilfeature.DefaultFeatureGate.Enabled(features.APIListChunking)
hasContinuation := pagingEnabled && len(pred.Continue) > 0
hasLimit := pagingEnabled && pred.Limit > 0 && resourceVersion != "0"
if resourceVersion == "" || hasContinuation || hasLimit || opts.ResourceVersionMatch == metav1.ResourceVersionMatchExact {
// If resourceVersion is not specified, serve it from underlying
// storage (for backward compatibility). If a continuation is
// requested, serve it from the underlying storage as well.
// Limits are only sent to storage when resourceVersion is non-zero
// since the watch cache isn't able to perform continuations, and
// limits are ignored when resource version is zero.
return c.storage.List(ctx, key, opts, listObj)
if shouldDelegateList(opts) {
return c.storage.GetList(ctx, key, opts, listObj)
}
// If resourceVersion is specified, serve it from cache.
@@ -694,13 +649,17 @@ func (c *Cacher) List(ctx context.Context, key string, opts storage.ListOptions,
if listRV == 0 && !c.ready.check() {
// If Cacher is not yet initialized and we don't require any specific
// minimal resource version, simply forward the request to storage.
return c.storage.List(ctx, key, opts, listObj)
return c.storage.GetList(ctx, key, opts, listObj)
}
trace := utiltrace.New("cacher list", utiltrace.Field{"type", c.objectType.String()})
trace := utiltrace.New("cacher list",
utiltrace.Field{"audit-id", endpointsrequest.GetAuditIDTruncated(ctx)},
utiltrace.Field{Key: "type", Value: c.objectType.String()})
defer trace.LogIfLong(500 * time.Millisecond)
c.ready.wait()
if err := c.ready.wait(); err != nil {
return errors.NewServiceUnavailable(err.Error())
}
trace.Step("Ready")
// List elements with at least 'listRV' from cache.
@@ -717,11 +676,11 @@ func (c *Cacher) List(ctx context.Context, key string, opts storage.ListOptions,
}
filter := filterWithAttrsFunction(key, pred)
objs, readResourceVersion, err := c.watchCache.WaitUntilFreshAndList(listRV, pred.MatcherIndex(), trace)
objs, readResourceVersion, indexUsed, err := c.listItems(listRV, key, pred, trace, recursive)
if err != nil {
return err
}
trace.Step("Listed items from cache", utiltrace.Field{"count", len(objs)})
trace.Step("Listed items from cache", utiltrace.Field{Key: "count", Value: len(objs)})
if len(objs) > listVal.Cap() && pred.Label.Empty() && pred.Field.Empty() {
// Resize the slice appropriately, since we already know that none
// of the elements will be filtered out.
@@ -737,18 +696,19 @@ func (c *Cacher) List(ctx context.Context, key string, opts storage.ListOptions,
listVal.Set(reflect.Append(listVal, reflect.ValueOf(elem.Object).Elem()))
}
}
trace.Step("Filtered items", utiltrace.Field{"count", listVal.Len()})
trace.Step("Filtered items", utiltrace.Field{Key: "count", Value: listVal.Len()})
if c.versioner != nil {
if err := c.versioner.UpdateList(listObj, readResourceVersion, "", nil); err != nil {
return err
}
}
metrics.RecordListCacheMetrics(c.resourcePrefix, indexUsed, len(objs), listVal.Len())
return nil
}
// GuaranteedUpdate implements storage.Interface.
func (c *Cacher) GuaranteedUpdate(
ctx context.Context, key string, ptrToType runtime.Object, ignoreNotFound bool,
ctx context.Context, key string, destination runtime.Object, ignoreNotFound bool,
preconditions *storage.Preconditions, tryUpdate storage.UpdateFunc, _ runtime.Object) error {
// Ignore the suggestion and try to pass down the current version of the object
// read from cache.
@@ -758,10 +718,10 @@ func (c *Cacher) GuaranteedUpdate(
// DeepCopy the object since we modify resource version when serializing the
// current object.
currObj := elem.(*storeElement).Object.DeepCopyObject()
return c.storage.GuaranteedUpdate(ctx, key, ptrToType, ignoreNotFound, preconditions, tryUpdate, currObj)
return c.storage.GuaranteedUpdate(ctx, key, destination, ignoreNotFound, preconditions, tryUpdate, currObj)
}
// If we couldn't get the object, fallback to no-suggestion.
return c.storage.GuaranteedUpdate(ctx, key, ptrToType, ignoreNotFound, preconditions, tryUpdate, nil)
return c.storage.GuaranteedUpdate(ctx, key, destination, ignoreNotFound, preconditions, tryUpdate, nil)
}
// Count implements storage.Interface.
@@ -828,6 +788,7 @@ func (c *Cacher) dispatchEvents() {
c.dispatchEvent(&event)
}
lastProcessedResourceVersion = event.ResourceVersion
metrics.EventsCounter.WithLabelValues(c.objectType.String()).Inc()
case <-bookmarkTimer.C():
bookmarkTimer.Reset(wait.Jitter(time.Second, 0.25))
// Never send a bookmark event if we did not see an event here, this is fine
@@ -872,11 +833,11 @@ func setCachingObjects(event *watchCacheEvent, versioner storage.Versioner) {
// Don't wrap Object for delete events - these are not to deliver any
// events. Only wrap PrevObject.
if object, err := newCachingObject(event.PrevObject); err == nil {
// Update resource version of the underlying object.
// Update resource version of the object.
// event.PrevObject is used to deliver DELETE watch events and
// for them, we set resourceVersion to <current> instead of
// the resourceVersion of the last modification of the object.
updateResourceVersionIfNeeded(object.object, versioner, event.ResourceVersion)
updateResourceVersion(object, versioner, event.ResourceVersion)
event.PrevObject = object
} else {
klog.Errorf("couldn't create cachingObject from: %#v", event.Object)
@@ -905,14 +866,14 @@ func (c *Cacher) dispatchEvent(event *watchCacheEvent) {
// from it justifies increased memory usage, so for now we drop the cached
// serializations after dispatching this event.
//
// Given the deep-copies that are done to create cachingObjects,
// we try to cache serializations only if there are at least 3 watchers.
if len(c.watchersBuffer) >= 3 {
// Make a shallow copy to allow overwriting Object and PrevObject.
wcEvent := *event
setCachingObjects(&wcEvent, c.versioner)
event = &wcEvent
}
// Given that CachingObject is just wrapping the object and not perfoming
// deep-copying (until some field is explicitly being modified), we create
// it unconditionally to ensure safety and reduce deep-copying.
//
// Make a shallow copy to allow overwriting Object and PrevObject.
wcEvent := *event
setCachingObjects(&wcEvent, c.versioner)
event = &wcEvent
c.blockedWatchers = c.blockedWatchers[:0]
for _, watcher := range c.watchersBuffer {
@@ -928,8 +889,11 @@ func (c *Cacher) dispatchEvent(event *watchCacheEvent) {
timeout := c.dispatchTimeoutBudget.takeAvailable()
c.timer.Reset(timeout)
// Make sure every watcher will try to send event without blocking first,
// even if the timer has already expired.
// Send event to all blocked watchers. As long as timer is running,
// `add` will wait for the watcher to unblock. After timeout,
// `add` will not wait, but immediately close a still blocked watcher.
// Hence, every watcher gets the chance to unblock itself while timer
// is running, not only the first ones in the list.
timer := c.timer
for _, watcher := range c.blockedWatchers {
if !watcher.add(event, timer) {
@@ -950,7 +914,7 @@ func (c *Cacher) dispatchEvent(event *watchCacheEvent) {
}
}
func (c *Cacher) startDispatchingBookmarkEvents() {
func (c *Cacher) startDispatchingBookmarkEventsLocked() {
// Pop already expired watchers. However, explicitly ignore stopped ones,
// as we don't delete watcher from bookmarkWatchers when it is stopped.
for _, watchers := range c.bookmarkWatchers.popExpiredWatchers() {
@@ -961,8 +925,7 @@ func (c *Cacher) startDispatchingBookmarkEvents() {
continue
}
c.watchersBuffer = append(c.watchersBuffer, watcher)
// Requeue the watcher for the next bookmark if needed.
c.bookmarkWatchers.addWatcher(watcher)
c.expiredBookmarkWatchers = append(c.expiredBookmarkWatchers, watcher)
}
}
}
@@ -987,7 +950,7 @@ func (c *Cacher) startDispatching(event *watchCacheEvent) {
c.watchersBuffer = c.watchersBuffer[:0]
if event.Type == watch.Bookmark {
c.startDispatchingBookmarkEvents()
c.startDispatchingBookmarkEventsLocked()
// return here to reduce following code indentation and diff
return
}
@@ -1028,22 +991,31 @@ func (c *Cacher) finishDispatching() {
defer c.Unlock()
c.dispatching = false
for _, watcher := range c.watchersToStop {
watcher.stopThreadUnsafe()
watcher.stopLocked()
}
c.watchersToStop = c.watchersToStop[:0]
for _, watcher := range c.expiredBookmarkWatchers {
if watcher.stopped {
continue
}
// requeue the watcher for the next bookmark if needed.
c.bookmarkWatchers.addWatcher(watcher)
}
c.expiredBookmarkWatchers = c.expiredBookmarkWatchers[:0]
}
func (c *Cacher) terminateAllWatchers() {
c.Lock()
defer c.Unlock()
c.watchers.terminateAll(c.objectType, c.stopWatcherThreadUnsafe)
c.watchers.terminateAll(c.objectType, c.stopWatcherLocked)
}
func (c *Cacher) stopWatcherThreadUnsafe(watcher *cacheWatcher) {
func (c *Cacher) stopWatcherLocked(watcher *cacheWatcher) {
if c.dispatching {
c.watchersToStop = append(c.watchersToStop, watcher)
} else {
watcher.stopThreadUnsafe()
watcher.stopLocked()
}
}
@@ -1062,20 +1034,23 @@ func (c *Cacher) Stop() {
return
}
c.stopped = true
c.ready.stop()
c.stopLock.Unlock()
close(c.stopCh)
c.stopWg.Wait()
}
func forgetWatcher(c *Cacher, index int, triggerValue string, triggerSupported bool) func() {
return func() {
func forgetWatcher(c *Cacher, w *cacheWatcher, index int, triggerValue string, triggerSupported bool) func(bool) {
return func(drainWatcher bool) {
c.Lock()
defer c.Unlock()
w.setDrainInputBufferLocked(drainWatcher)
// It's possible that the watcher is already not in the structure (e.g. in case of
// simultaneous Stop() and terminateAllWatchers(), but it is safe to call stopThreadUnsafe()
// simultaneous Stop() and terminateAllWatchers(), but it is safe to call stopLocked()
// on a watcher multiple times.
c.watchers.deleteWatcher(index, triggerValue, triggerSupported, c.stopWatcherThreadUnsafe)
c.watchers.deleteWatcher(index, triggerValue, triggerSupported, c.stopWatcherLocked)
}
}
@@ -1091,7 +1066,9 @@ func filterWithAttrsFunction(key string, p storage.SelectionPredicate) filterWit
// LastSyncResourceVersion returns resource version to which the underlying cache is synced.
func (c *Cacher) LastSyncResourceVersion() (uint64, error) {
c.ready.wait()
if err := c.ready.wait(); err != nil {
return 0, errors.NewServiceUnavailable(err.Error())
}
resourceVersion := c.reflector.LastSyncResourceVersion()
return c.versioner.ParseResourceVersion(resourceVersion)
@@ -1123,7 +1100,12 @@ func (lw *cacherListerWatcher) List(options metav1.ListOptions) (runtime.Object,
Continue: options.Continue,
}
if err := lw.storage.List(context.TODO(), lw.resourcePrefix, storage.ListOptions{ResourceVersionMatch: options.ResourceVersionMatch, Predicate: pred}, list); err != nil {
storageOpts := storage.ListOptions{
ResourceVersionMatch: options.ResourceVersionMatch,
Predicate: pred,
Recursive: true,
}
if err := lw.storage.GetList(context.TODO(), lw.resourcePrefix, storageOpts, list); err != nil {
return nil, err
}
return list, nil
@@ -1134,11 +1116,10 @@ func (lw *cacherListerWatcher) Watch(options metav1.ListOptions) (watch.Interfac
opts := storage.ListOptions{
ResourceVersion: options.ResourceVersion,
Predicate: storage.Everything,
Recursive: true,
ProgressNotify: true,
}
if utilfeature.DefaultFeatureGate.Enabled(features.EfficientWatchResumption) {
opts.ProgressNotify = true
}
return lw.storage.WatchList(context.TODO(), lw.resourcePrefix, opts)
return lw.storage.Watch(context.TODO(), lw.resourcePrefix, opts)
}
// errWatcher implements watch.Interface to return a single error
@@ -1189,7 +1170,7 @@ type cacheWatcher struct {
done chan struct{}
filter filterWithAttrsFunc
stopped bool
forget func()
forget func(bool)
versioner storage.Versioner
// The watcher will be closed by server after the deadline,
// save it here to send bookmark events before that.
@@ -1201,9 +1182,13 @@ type cacheWatcher struct {
// human readable identifier that helps assigning cacheWatcher
// instance with request
identifier string
// drainInputBuffer indicates whether we should delay closing this watcher
// and send all event in the input buffer.
drainInputBuffer bool
}
func newCacheWatcher(chanSize int, filter filterWithAttrsFunc, forget func(), versioner storage.Versioner, deadline time.Time, allowWatchBookmarks bool, objectType reflect.Type, identifier string) *cacheWatcher {
func newCacheWatcher(chanSize int, filter filterWithAttrsFunc, forget func(bool), versioner storage.Versioner, deadline time.Time, allowWatchBookmarks bool, objectType reflect.Type, identifier string) *cacheWatcher {
return &cacheWatcher{
input: make(chan *watchCacheEvent, chanSize),
result: make(chan watch.Event, chanSize),
@@ -1226,16 +1211,29 @@ func (c *cacheWatcher) ResultChan() <-chan watch.Event {
// Implements watch.Interface.
func (c *cacheWatcher) Stop() {
c.forget()
c.forget(false)
}
// we rely on the fact that stopThredUnsafe is actually protected by Cacher.Lock()
func (c *cacheWatcher) stopThreadUnsafe() {
// we rely on the fact that stopLocked is actually protected by Cacher.Lock()
func (c *cacheWatcher) stopLocked() {
if !c.stopped {
c.stopped = true
close(c.done)
// stop without draining the input channel was requested.
if !c.drainInputBuffer {
close(c.done)
}
close(c.input)
}
// Even if the watcher was already stopped, if it previously was
// using draining mode and it's not using it now we need to
// close the done channel now. Otherwise we could leak the
// processing goroutine if it will be trying to put more objects
// into result channel, the channel will be full and there will
// already be noone on the processing the events on the receiving end.
if !c.drainInputBuffer && !c.isDoneChannelClosedLocked() {
close(c.done)
}
}
func (c *cacheWatcher) nonblockingAdd(event *watchCacheEvent) bool {
@@ -1259,8 +1257,8 @@ func (c *cacheWatcher) add(event *watchCacheEvent, timer *time.Timer) bool {
// Since we don't want to block on it infinitely,
// we simply terminate it.
klog.V(1).Infof("Forcing %v watcher close due to unresponsiveness: %v. len(c.input) = %v, len(c.result) = %v", c.objectType.String(), c.identifier, len(c.input), len(c.result))
terminatedWatchersCounter.WithLabelValues(c.objectType.String()).Inc()
c.forget()
metrics.TerminatedWatchersCounter.WithLabelValues(c.objectType.String()).Inc()
c.forget(false)
}
if timer == nil {
@@ -1280,12 +1278,16 @@ func (c *cacheWatcher) add(event *watchCacheEvent, timer *time.Timer) bool {
func (c *cacheWatcher) nextBookmarkTime(now time.Time, bookmarkFrequency time.Duration) (time.Time, bool) {
// We try to send bookmarks:
// (a) roughly every minute
// (b) right before the watcher timeout - for now we simply set it 2s before
//
// (a) right before the watcher timeout - for now we simply set it 2s before
// the deadline
// The former gives us periodicity if the watch breaks due to unexpected
// conditions, the later ensures that on timeout the watcher is as close to
//
// (b) roughly every minute
//
// (b) gives us periodicity if the watch breaks due to unexpected
// conditions, (a) ensures that on timeout the watcher is as close to
// now as possible - this covers 99% of cases.
heartbeatTime := now.Add(bookmarkFrequency)
if c.deadline.IsZero() {
// Timeout is set by our client libraries (e.g. reflector) as well as defaulted by
@@ -1302,20 +1304,33 @@ func (c *cacheWatcher) nextBookmarkTime(now time.Time, bookmarkFrequency time.Du
return heartbeatTime, true
}
func getEventObject(object runtime.Object) runtime.Object {
if _, ok := object.(runtime.CacheableObject); ok {
// setDrainInputBufferLocked if set to true indicates that we should delay closing this watcher
// until we send all events residing in the input buffer.
func (c *cacheWatcher) setDrainInputBufferLocked(drain bool) {
c.drainInputBuffer = drain
}
// isDoneChannelClosed checks if c.done channel is closed
func (c *cacheWatcher) isDoneChannelClosedLocked() bool {
select {
case <-c.done:
return true
default:
}
return false
}
func getMutableObject(object runtime.Object) runtime.Object {
if _, ok := object.(*cachingObject); ok {
// It is safe to return without deep-copy, because the underlying
// object was already deep-copied during construction.
// object will lazily perform deep-copy on the first try to change
// any of its fields.
return object
}
return object.DeepCopyObject()
}
func updateResourceVersionIfNeeded(object runtime.Object, versioner storage.Versioner, resourceVersion uint64) {
if _, ok := object.(*cachingObject); ok {
// We assume that for cachingObject resourceVersion was already propagated before.
return
}
func updateResourceVersion(object runtime.Object, versioner storage.Versioner, resourceVersion uint64) {
if err := versioner.UpdateObject(object, resourceVersion); err != nil {
utilruntime.HandleError(fmt.Errorf("failure to version api object (%d) %#v: %v", resourceVersion, object, err))
}
@@ -1338,13 +1353,17 @@ func (c *cacheWatcher) convertToWatchEvent(event *watchCacheEvent) *watch.Event
switch {
case curObjPasses && !oldObjPasses:
return &watch.Event{Type: watch.Added, Object: getEventObject(event.Object)}
return &watch.Event{Type: watch.Added, Object: getMutableObject(event.Object)}
case curObjPasses && oldObjPasses:
return &watch.Event{Type: watch.Modified, Object: getEventObject(event.Object)}
return &watch.Event{Type: watch.Modified, Object: getMutableObject(event.Object)}
case !curObjPasses && oldObjPasses:
// return a delete event with the previous object content, but with the event's resource version
oldObj := getEventObject(event.PrevObject)
updateResourceVersionIfNeeded(oldObj, c.versioner, event.ResourceVersion)
oldObj := getMutableObject(event.PrevObject)
// We know that if oldObj is cachingObject (which can only be set via
// setCachingObjects), its resourceVersion is already set correctly and
// we don't need to update it. However, since cachingObject efficiently
// handles noop updates, we avoid this microoptimization here.
updateResourceVersion(oldObj, c.versioner, event.ResourceVersion)
return &watch.Event{Type: watch.Deleted, Object: oldObj}
}
@@ -1366,7 +1385,7 @@ func (c *cacheWatcher) sendWatchCacheEvent(event *watchCacheEvent) {
// would give us non-determinism.
// At the same time, we don't want to block infinitely on putting
// to c.result, when c.done is already closed.
//
// This ensures that with c.done already close, we at most once go
// into the next select after this. With that, no matter which
// statement we choose there, we will deliver only consecutive
@@ -1383,8 +1402,10 @@ func (c *cacheWatcher) sendWatchCacheEvent(event *watchCacheEvent) {
}
}
func (c *cacheWatcher) process(ctx context.Context, initEvents []*watchCacheEvent, resourceVersion uint64) {
func (c *cacheWatcher) processInterval(ctx context.Context, cacheInterval *watchCacheInterval, resourceVersion uint64) {
defer utilruntime.HandleCrash()
defer close(c.result)
defer c.Stop()
// Check how long we are processing initEvents.
// As long as these are not processed, we are not processing
@@ -1401,20 +1422,60 @@ func (c *cacheWatcher) process(ctx context.Context, initEvents []*watchCacheEven
// consider increase size of result buffer in those cases.
const initProcessThreshold = 500 * time.Millisecond
startTime := time.Now()
for _, event := range initEvents {
initEventCount := 0
for {
event, err := cacheInterval.Next()
if err != nil {
// An error indicates that the cache interval
// has been invalidated and can no longer serve
// events.
//
// Initially we considered sending an "out-of-history"
// Error event in this case, but because historically
// such events weren't sent out of the watchCache, we
// decided not to. This is still ok, because on watch
// closure, the watcher will try to re-instantiate the
// watch and then will get an explicit "out-of-history"
// window. There is potential for optimization, but for
// now, in order to be on the safe side and not break
// custom clients, the cost of it is something that we
// are fully accepting.
klog.Warningf("couldn't retrieve watch event to serve: %#v", err)
return
}
if event == nil {
break
}
c.sendWatchCacheEvent(event)
// With some events already sent, update resourceVersion so that
// events that were buffered and not yet processed won't be delivered
// to this watcher second time causing going back in time.
resourceVersion = event.ResourceVersion
initEventCount++
}
objType := c.objectType.String()
if len(initEvents) > 0 {
initCounter.WithLabelValues(objType).Add(float64(len(initEvents)))
if initEventCount > 0 {
metrics.InitCounter.WithLabelValues(objType).Add(float64(initEventCount))
}
processingTime := time.Since(startTime)
if processingTime > initProcessThreshold {
klog.V(2).Infof("processing %d initEvents of %s (%s) took %v", len(initEvents), objType, c.identifier, processingTime)
klog.V(2).Infof("processing %d initEvents of %s (%s) took %v", initEventCount, objType, c.identifier, processingTime)
}
defer close(c.result)
defer c.Stop()
c.process(ctx, resourceVersion)
}
func (c *cacheWatcher) process(ctx context.Context, resourceVersion uint64) {
// At this point we already start processing incoming watch events.
// However, the init event can still be processed because their serialization
// and sending to the client happens asynchrnously.
// TODO: As describe in the KEP, we would like to estimate that by delaying
// the initialization signal proportionally to the number of events to
// process, but we're leaving this to the tuning phase.
utilflowcontrol.WatchInitialized(ctx)
for {
select {
case event, ok := <-c.input:
@@ -1430,36 +1491,3 @@ func (c *cacheWatcher) process(ctx context.Context, initEvents []*watchCacheEven
}
}
}
type ready struct {
ok bool
c *sync.Cond
}
func newReady() *ready {
return &ready{c: sync.NewCond(&sync.RWMutex{})}
}
func (r *ready) wait() {
r.c.L.Lock()
for !r.ok {
r.c.Wait()
}
r.c.L.Unlock()
}
// TODO: Make check() function more sophisticated, in particular
// allow it to behave as "waitWithTimeout".
func (r *ready) check() bool {
rwMutex := r.c.L.(*sync.RWMutex)
rwMutex.RLock()
defer rwMutex.RUnlock()
return r.ok
}
func (r *ready) set(ok bool) {
r.c.L.Lock()
defer r.c.L.Unlock()
r.ok = ok
r.c.Broadcast()
}

View File

@@ -60,11 +60,20 @@ type serializationsCache map[runtime.Identifier]*serializationResult
// so that each of those is computed exactly once.
//
// cachingObject implements the metav1.Object interface (accessors for
// all metadata fields). However, setters for all fields except from
// SelfLink (which is set lately in the path) are ignored.
// all metadata fields).
type cachingObject struct {
lock sync.RWMutex
// deepCopied defines whether the object below has already been
// deep copied. The operation is performed lazily on the first
// setXxx operation.
//
// The lazy deep-copy make is useful, as effectively the only
// case when we are setting some fields are ResourceVersion for
// DELETE events, so in all other cases we can effectively avoid
// performing any deep copies.
deepCopied bool
// Object for which serializations are cached.
object metaRuntimeInterface
@@ -80,7 +89,10 @@ type cachingObject struct {
// metav1.Object type.
func newCachingObject(object runtime.Object) (*cachingObject, error) {
if obj, ok := object.(metaRuntimeInterface); ok {
result := &cachingObject{object: obj.DeepCopyObject().(metaRuntimeInterface)}
result := &cachingObject{
object: obj,
deepCopied: false,
}
result.serializations.Store(make(serializationsCache))
return result, nil
}
@@ -125,6 +137,10 @@ func (o *cachingObject) CacheEncode(id runtime.Identifier, encode func(runtime.O
result := o.getSerializationResult(id)
result.once.Do(func() {
buffer := bytes.NewBuffer(nil)
// TODO(wojtek-t): This is currently making a copy to avoid races
// in cases where encoding is making subtle object modifications,
// e.g. #82497
// Figure out if we can somehow avoid this under some conditions.
result.err = encode(o.GetObject(), buffer)
result.raw = buffer.Bytes()
})
@@ -157,7 +173,9 @@ func (o *cachingObject) DeepCopyObject() runtime.Object {
// DeepCopyObject on cachingObject is not expected to be called anywhere.
// However, to be on the safe-side, we implement it, though given the
// cache is only an optimization we ignore copying it.
result := &cachingObject{}
result := &cachingObject{
deepCopied: true,
}
result.serializations.Store(make(serializationsCache))
o.lock.RLock()
@@ -215,6 +233,10 @@ func (o *cachingObject) conditionalSet(isNoop func() bool, set func()) {
if isNoop() {
return
}
if !o.deepCopied {
o.object = o.object.DeepCopyObject().(metaRuntimeInterface)
o.deepCopied = true
}
o.invalidateCacheLocked()
set()
}
@@ -373,17 +395,6 @@ func (o *cachingObject) SetOwnerReferences(references []metav1.OwnerReference) {
func() { o.object.SetOwnerReferences(references) },
)
}
func (o *cachingObject) GetClusterName() string {
o.lock.RLock()
defer o.lock.RUnlock()
return o.object.GetClusterName()
}
func (o *cachingObject) SetClusterName(clusterName string) {
o.conditionalSet(
func() bool { return o.object.GetClusterName() == clusterName },
func() { o.object.SetClusterName(clusterName) },
)
}
func (o *cachingObject) GetManagedFields() []metav1.ManagedFieldsEntry {
o.lock.RLock()
defer o.lock.RUnlock()

View File

@@ -1,95 +0,0 @@
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cacher
import (
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
)
/*
* By default, all the following metrics are defined as falling under
* ALPHA stability level https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/1209-metrics-stability/20190404-kubernetes-control-plane-metrics-stability.md#stability-classes)
*
* Promoting the stability level of the metric is a responsibility of the component owner, since it
* involves explicitly acknowledging support for the metric across multiple releases, in accordance with
* the metric stability policy.
*/
var (
initCounter = metrics.NewCounterVec(
&metrics.CounterOpts{
Name: "apiserver_init_events_total",
Help: "Counter of init events processed in watchcache broken by resource type.",
StabilityLevel: metrics.ALPHA,
},
[]string{"resource"},
)
terminatedWatchersCounter = metrics.NewCounterVec(
&metrics.CounterOpts{
Name: "apiserver_terminated_watchers_total",
Help: "Counter of watchers closed due to unresponsiveness broken by resource type.",
StabilityLevel: metrics.ALPHA,
},
[]string{"resource"},
)
watchCacheCapacityIncreaseTotal = metrics.NewCounterVec(
&metrics.CounterOpts{
Name: "watch_cache_capacity_increase_total",
Help: "Total number of watch cache capacity increase events broken by resource type.",
StabilityLevel: metrics.ALPHA,
},
[]string{"resource"},
)
watchCacheCapacityDecreaseTotal = metrics.NewCounterVec(
&metrics.CounterOpts{
Name: "watch_cache_capacity_decrease_total",
Help: "Total number of watch cache capacity decrease events broken by resource type.",
StabilityLevel: metrics.ALPHA,
},
[]string{"resource"},
)
watchCacheCapacity = metrics.NewGaugeVec(
&metrics.GaugeOpts{
Name: "watch_cache_capacity",
Help: "Total capacity of watch cache broken by resource type.",
StabilityLevel: metrics.ALPHA,
},
[]string{"resource"},
)
)
func init() {
legacyregistry.MustRegister(initCounter)
legacyregistry.MustRegister(terminatedWatchersCounter)
legacyregistry.MustRegister(watchCacheCapacityIncreaseTotal)
legacyregistry.MustRegister(watchCacheCapacityDecreaseTotal)
legacyregistry.MustRegister(watchCacheCapacity)
}
// recordsWatchCacheCapacityChange record watchCache capacity resize(increase or decrease) operations.
func recordsWatchCacheCapacityChange(objType string, old, new int) {
if old < new {
watchCacheCapacityIncreaseTotal.WithLabelValues(objType).Inc()
return
}
watchCacheCapacityDecreaseTotal.WithLabelValues(objType).Inc()
watchCacheCapacity.WithLabelValues(objType).Set(float64(new))
}

View File

@@ -0,0 +1,8 @@
# See the OWNERS docs at https://go.k8s.io/owners
approvers:
- sig-instrumentation-approvers
reviewers:
- sig-instrumentation-reviewers
labels:
- sig/instrumentation

View File

@@ -0,0 +1,174 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"sync"
compbasemetrics "k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
)
const (
namespace = "apiserver"
subsystem = "watch_cache"
)
/*
* By default, all the following metrics are defined as falling under
* ALPHA stability level https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/1209-metrics-stability/kubernetes-control-plane-metrics-stability.md#stability-classes)
*
* Promoting the stability level of the metric is a responsibility of the component owner, since it
* involves explicitly acknowledging support for the metric across multiple releases, in accordance with
* the metric stability policy.
*/
var (
listCacheCount = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Namespace: namespace,
Name: "cache_list_total",
Help: "Number of LIST requests served from watch cache",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"resource_prefix", "index"},
)
listCacheNumFetched = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Namespace: namespace,
Name: "cache_list_fetched_objects_total",
Help: "Number of objects read from watch cache in the course of serving a LIST request",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"resource_prefix", "index"},
)
listCacheNumReturned = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Namespace: namespace,
Name: "cache_list_returned_objects_total",
Help: "Number of objects returned for a LIST request from watch cache",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"resource_prefix"},
)
InitCounter = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Namespace: namespace,
Name: "init_events_total",
Help: "Counter of init events processed in watch cache broken by resource type.",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"resource"},
)
EventsCounter = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "events_dispatched_total",
Help: "Counter of events dispatched in watch cache broken by resource type.",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"resource"},
)
TerminatedWatchersCounter = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Namespace: namespace,
Name: "terminated_watchers_total",
Help: "Counter of watchers closed due to unresponsiveness broken by resource type.",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"resource"},
)
watchCacheCapacityIncreaseTotal = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Subsystem: subsystem,
Name: "capacity_increase_total",
Help: "Total number of watch cache capacity increase events broken by resource type.",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"resource"},
)
watchCacheCapacityDecreaseTotal = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Subsystem: subsystem,
Name: "capacity_decrease_total",
Help: "Total number of watch cache capacity decrease events broken by resource type.",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"resource"},
)
WatchCacheCapacity = compbasemetrics.NewGaugeVec(
&compbasemetrics.GaugeOpts{
Subsystem: subsystem,
Name: "capacity",
Help: "Total capacity of watch cache broken by resource type.",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"resource"},
)
WatchCacheInitializations = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "initializations_total",
Help: "Counter of watch cache initializations broken by resource type.",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"resource"},
)
)
var registerMetrics sync.Once
// Register all metrics.
func Register() {
// Register the metrics.
registerMetrics.Do(func() {
legacyregistry.MustRegister(listCacheCount)
legacyregistry.MustRegister(listCacheNumFetched)
legacyregistry.MustRegister(listCacheNumReturned)
legacyregistry.MustRegister(InitCounter)
legacyregistry.MustRegister(EventsCounter)
legacyregistry.MustRegister(TerminatedWatchersCounter)
legacyregistry.MustRegister(watchCacheCapacityIncreaseTotal)
legacyregistry.MustRegister(watchCacheCapacityDecreaseTotal)
legacyregistry.MustRegister(WatchCacheCapacity)
legacyregistry.MustRegister(WatchCacheInitializations)
})
}
// RecordListCacheMetrics notes various metrics of the cost to serve a LIST request
func RecordListCacheMetrics(resourcePrefix, indexName string, numFetched, numReturned int) {
listCacheCount.WithLabelValues(resourcePrefix, indexName).Inc()
listCacheNumFetched.WithLabelValues(resourcePrefix, indexName).Add(float64(numFetched))
listCacheNumReturned.WithLabelValues(resourcePrefix).Add(float64(numReturned))
}
// RecordsWatchCacheCapacityChange record watchCache capacity resize(increase or decrease) operations.
func RecordsWatchCacheCapacityChange(objType string, old, new int) {
WatchCacheCapacity.WithLabelValues(objType).Set(float64(new))
if old < new {
WatchCacheCapacity.WithLabelValues(objType).Inc()
return
}
watchCacheCapacityDecreaseTotal.WithLabelValues(objType).Inc()
}

96
vendor/k8s.io/apiserver/pkg/storage/cacher/ready.go generated vendored Normal file
View File

@@ -0,0 +1,96 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cacher
import (
"fmt"
"sync"
)
type status int
const (
Pending status = iota
Ready
Stopped
)
// ready is a three state condition variable that blocks until is Ready if is not Stopped.
// Its initial state is Pending.
type ready struct {
state status
c *sync.Cond
}
func newReady() *ready {
return &ready{
c: sync.NewCond(&sync.RWMutex{}),
state: Pending,
}
}
// wait blocks until it is Ready or Stopped, it returns an error if is Stopped.
func (r *ready) wait() error {
r.c.L.Lock()
defer r.c.L.Unlock()
for r.state == Pending {
r.c.Wait()
}
switch r.state {
case Ready:
return nil
case Stopped:
return fmt.Errorf("apiserver cacher is stopped")
default:
return fmt.Errorf("unexpected apiserver cache state: %v", r.state)
}
}
// check returns true only if it is Ready.
func (r *ready) check() bool {
// TODO: Make check() function more sophisticated, in particular
// allow it to behave as "waitWithTimeout".
rwMutex := r.c.L.(*sync.RWMutex)
rwMutex.RLock()
defer rwMutex.RUnlock()
return r.state == Ready
}
// set the state to Pending (false) or Ready (true), it does not have effect if the state is Stopped.
func (r *ready) set(ok bool) {
r.c.L.Lock()
defer r.c.L.Unlock()
if r.state == Stopped {
return
}
if ok {
r.state = Ready
} else {
r.state = Pending
}
r.c.Broadcast()
}
// stop the condition variable and set it as Stopped. This state is irreversible.
func (r *ready) stop() {
r.c.L.Lock()
defer r.c.L.Unlock()
if r.state != Stopped {
r.state = Stopped
r.c.Broadcast()
}
}

View File

@@ -19,6 +19,8 @@ package cacher
import (
"sync"
"time"
"k8s.io/utils/clock"
)
const (
@@ -28,13 +30,14 @@ const (
// timeBudget implements a budget of time that you can use and is
// periodically being refreshed. The pattern to use it is:
// budget := newTimeBudget(...)
// ...
// timeout := budget.takeAvailable()
// // Now you can spend at most timeout on doing stuff
// ...
// // If you didn't use all timeout, return what you didn't use
// budget.returnUnused(<unused part of timeout>)
//
// budget := newTimeBudget(...)
// ...
// timeout := budget.takeAvailable()
// // Now you can spend at most timeout on doing stuff
// ...
// // If you didn't use all timeout, return what you didn't use
// budget.returnUnused(<unused part of timeout>)
//
// NOTE: It's not recommended to be used concurrently from multiple threads -
// if first user takes the whole timeout, the second one will get 0 timeout
@@ -46,42 +49,39 @@ type timeBudget interface {
type timeBudgetImpl struct {
sync.Mutex
budget time.Duration
refresh time.Duration
clock clock.Clock
budget time.Duration
maxBudget time.Duration
refresh time.Duration
// last store last access time
last time.Time
}
func newTimeBudget(stopCh <-chan struct{}) timeBudget {
func newTimeBudget() timeBudget {
result := &timeBudgetImpl{
clock: clock.RealClock{},
budget: time.Duration(0),
refresh: refreshPerSecond,
maxBudget: maxBudget,
}
go result.periodicallyRefresh(stopCh)
result.last = result.clock.Now()
return result
}
func (t *timeBudgetImpl) periodicallyRefresh(stopCh <-chan struct{}) {
ticker := time.NewTicker(time.Second)
defer ticker.Stop()
for {
select {
case <-ticker.C:
t.Lock()
if t.budget = t.budget + t.refresh; t.budget > t.maxBudget {
t.budget = t.maxBudget
}
t.Unlock()
case <-stopCh:
return
}
}
}
func (t *timeBudgetImpl) takeAvailable() time.Duration {
t.Lock()
defer t.Unlock()
// budget accumulated since last access
now := t.clock.Now()
acc := now.Sub(t.last).Seconds() * t.refresh.Seconds()
if acc < 0 {
acc = 0
}
// update current budget and store the current time
if t.budget = t.budget + time.Duration(acc*1e9); t.budget > t.maxBudget {
t.budget = t.maxBudget
}
t.last = now
result := t.budget
t.budget = time.Duration(0)
return result
@@ -94,6 +94,8 @@ func (t *timeBudgetImpl) returnUnused(unused time.Duration) {
// We used more than allowed.
return
}
// add the unused time directly to the budget
// takeAvailable() will take into account the elapsed time
if t.budget = t.budget + unused; t.budget > t.maxBudget {
t.budget = t.maxBudget
}

View File

@@ -18,6 +18,7 @@ package cacher
import (
"fmt"
"math"
"reflect"
"sort"
"sync"
@@ -27,11 +28,12 @@ import (
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/clock"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/apiserver/pkg/storage"
"k8s.io/apiserver/pkg/storage/cacher/metrics"
"k8s.io/client-go/tools/cache"
"k8s.io/klog/v2"
"k8s.io/utils/clock"
utiltrace "k8s.io/utils/trace"
)
@@ -189,6 +191,9 @@ type watchCache struct {
// cacher's objectType.
objectType reflect.Type
// For testing cache interval invalidation.
indexValidator indexValidator
}
func newWatchCache(
@@ -217,8 +222,10 @@ func newWatchCache(
objectType: objectType,
}
objType := objectType.String()
watchCacheCapacity.WithLabelValues(objType).Set(float64(wc.capacity))
metrics.WatchCacheCapacity.WithLabelValues(objType).Set(float64(wc.capacity))
wc.cond = sync.NewCond(wc.RLocker())
wc.indexValidator = wc.isIndexValidLocked
return wc
}
@@ -380,7 +387,7 @@ func (w *watchCache) doCacheResizeLocked(capacity int) {
newCache[i%capacity] = w.cache[i%w.capacity]
}
w.cache = newCache
recordsWatchCacheCapacityChange(w.objectType.String(), w.capacity, capacity)
metrics.RecordsWatchCacheCapacityChange(w.objectType.String(), w.capacity, capacity)
w.capacity = capacity
}
@@ -420,17 +427,27 @@ func (w *watchCache) List() []interface{} {
// You HAVE TO explicitly call w.RUnlock() after this function.
func (w *watchCache) waitUntilFreshAndBlock(resourceVersion uint64, trace *utiltrace.Trace) error {
startTime := w.clock.Now()
go func() {
// Wake us up when the time limit has expired. The docs
// promise that time.After (well, NewTimer, which it calls)
// will wait *at least* the duration given. Since this go
// routine starts sometime after we record the start time, and
// it will wake up the loop below sometime after the broadcast,
// we don't need to worry about waking it up before the time
// has expired accidentally.
<-w.clock.After(blockTimeout)
w.cond.Broadcast()
}()
// In case resourceVersion is 0, we accept arbitrarily stale result.
// As a result, the condition in the below for loop will never be
// satisfied (w.resourceVersion is never negative), this call will
// never hit the w.cond.Wait().
// As a result - we can optimize the code by not firing the wakeup
// function (and avoid starting a gorotuine), especially given that
// resourceVersion=0 is the most common case.
if resourceVersion > 0 {
go func() {
// Wake us up when the time limit has expired. The docs
// promise that time.After (well, NewTimer, which it calls)
// will wait *at least* the duration given. Since this go
// routine starts sometime after we record the start time, and
// it will wake up the loop below sometime after the broadcast,
// we don't need to worry about waking it up before the time
// has expired accidentally.
<-w.clock.After(blockTimeout)
w.cond.Broadcast()
}()
}
w.RLock()
if trace != nil {
@@ -449,12 +466,13 @@ func (w *watchCache) waitUntilFreshAndBlock(resourceVersion uint64, trace *utilt
return nil
}
// WaitUntilFreshAndList returns list of pointers to <storeElement> objects.
func (w *watchCache) WaitUntilFreshAndList(resourceVersion uint64, matchValues []storage.MatchValue, trace *utiltrace.Trace) ([]interface{}, uint64, error) {
// WaitUntilFreshAndList returns list of pointers to `storeElement` objects along
// with their ResourceVersion and the name of the index, if any, that was used.
func (w *watchCache) WaitUntilFreshAndList(resourceVersion uint64, matchValues []storage.MatchValue, trace *utiltrace.Trace) ([]interface{}, uint64, string, error) {
err := w.waitUntilFreshAndBlock(resourceVersion, trace)
defer w.RUnlock()
if err != nil {
return nil, 0, err
return nil, 0, "", err
}
// This isn't the place where we do "final filtering" - only some "prefiltering" is happening here. So the only
@@ -463,10 +481,10 @@ func (w *watchCache) WaitUntilFreshAndList(resourceVersion uint64, matchValues [
// TODO: if multiple indexes match, return the one with the fewest items, so as to do as much filtering as possible.
for _, matchValue := range matchValues {
if result, err := w.store.ByIndex(matchValue.IndexName, matchValue.Value); err == nil {
return result, w.resourceVersion, nil
return result, w.resourceVersion, matchValue.IndexName, nil
}
}
return w.store.List(), w.resourceVersion, nil
return w.store.List(), w.resourceVersion, "", nil
}
// WaitUntilFreshAndGet returns a pointers to <storeElement> object.
@@ -557,7 +575,74 @@ func (w *watchCache) SetOnReplace(onReplace func()) {
w.onReplace = onReplace
}
func (w *watchCache) GetAllEventsSinceThreadUnsafe(resourceVersion uint64) ([]*watchCacheEvent, error) {
func (w *watchCache) Resync() error {
// Nothing to do
return nil
}
func (w *watchCache) currentCapacity() int {
w.Lock()
defer w.Unlock()
return w.capacity
}
const (
// minWatchChanSize is the min size of channels used by the watch.
// We keep that set to 10 for "backward compatibility" until we
// convince ourselves based on some metrics that decreasing is safe.
minWatchChanSize = 10
// maxWatchChanSizeWithIndexAndTriger is the max size of the channel
// used by the watch using the index and trigger selector.
maxWatchChanSizeWithIndexAndTrigger = 10
// maxWatchChanSizeWithIndexWithoutTrigger is the max size of the channel
// used by the watch using the index but without triggering selector.
// We keep that set to 1000 for "backward compatibility", until we
// convinced ourselves based on some metrics that decreasing is safe.
maxWatchChanSizeWithIndexWithoutTrigger = 1000
// maxWatchChanSizeWithoutIndex is the max size of the channel
// used by the watch not using the index.
// TODO(wojtek-t): Figure out if the value shouldn't be higher.
maxWatchChanSizeWithoutIndex = 100
)
func (w *watchCache) suggestedWatchChannelSize(indexExists, triggerUsed bool) int {
// To estimate the channel size we use a heuristic that a channel
// should roughly be able to keep one second of history.
// We don't have an exact data, but given we store updates from
// the last <eventFreshDuration>, we approach it by dividing the
// capacity by the length of the history window.
chanSize := int(math.Ceil(float64(w.currentCapacity()) / eventFreshDuration.Seconds()))
// Finally we adjust the size to avoid ending with too low or
// to large values.
if chanSize < minWatchChanSize {
chanSize = minWatchChanSize
}
var maxChanSize int
switch {
case indexExists && triggerUsed:
maxChanSize = maxWatchChanSizeWithIndexAndTrigger
case indexExists && !triggerUsed:
maxChanSize = maxWatchChanSizeWithIndexWithoutTrigger
case !indexExists:
maxChanSize = maxWatchChanSizeWithoutIndex
}
if chanSize > maxChanSize {
chanSize = maxChanSize
}
return chanSize
}
// isIndexValidLocked checks if a given index is still valid.
// This assumes that the lock is held.
func (w *watchCache) isIndexValidLocked(index int) bool {
return index >= w.startIndex
}
// getAllEventsSinceLocked returns a watchCacheInterval that can be used to
// retrieve events since a certain resourceVersion. This function assumes to
// be called under the watchCache lock.
func (w *watchCache) getAllEventsSinceLocked(resourceVersion uint64) (*watchCacheInterval, error) {
size := w.endIndex - w.startIndex
var oldest uint64
switch {
@@ -583,27 +668,11 @@ func (w *watchCache) GetAllEventsSinceThreadUnsafe(resourceVersion uint64) ([]*w
// current state and only then start watching from that point.
//
// TODO: In v2 api, we should stop returning the current state - #13969.
allItems := w.store.List()
result := make([]*watchCacheEvent, len(allItems))
for i, item := range allItems {
elem, ok := item.(*storeElement)
if !ok {
return nil, fmt.Errorf("not a storeElement: %v", elem)
}
objLabels, objFields, err := w.getAttrsFunc(elem.Object)
if err != nil {
return nil, err
}
result[i] = &watchCacheEvent{
Type: watch.Added,
Object: elem.Object,
ObjLabels: objLabels,
ObjFields: objFields,
Key: elem.Key,
ResourceVersion: w.resourceVersion,
}
ci, err := newCacheIntervalFromStore(w.resourceVersion, w.store, w.getAttrsFunc)
if err != nil {
return nil, err
}
return result, nil
return ci, nil
}
if resourceVersion < oldest-1 {
return nil, errors.NewResourceExpired(fmt.Sprintf("too old resource version: %d (%d)", resourceVersion, oldest-1))
@@ -614,20 +683,9 @@ func (w *watchCache) GetAllEventsSinceThreadUnsafe(resourceVersion uint64) ([]*w
return w.cache[(w.startIndex+i)%w.capacity].ResourceVersion > resourceVersion
}
first := sort.Search(size, f)
result := make([]*watchCacheEvent, size-first)
for i := 0; i < size-first; i++ {
result[i] = w.cache[(w.startIndex+first+i)%w.capacity]
indexerFunc := func(i int) *watchCacheEvent {
return w.cache[i%w.capacity]
}
return result, nil
}
func (w *watchCache) GetAllEventsSince(resourceVersion uint64) ([]*watchCacheEvent, error) {
w.RLock()
defer w.RUnlock()
return w.GetAllEventsSinceThreadUnsafe(resourceVersion)
}
func (w *watchCache) Resync() error {
// Nothing to do
return nil
ci := newCacheInterval(w.startIndex+first, w.endIndex, indexerFunc, w.indexValidator, &w.RWMutex)
return ci, nil
}

View File

@@ -0,0 +1,226 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cacher
import (
"fmt"
"sync"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/tools/cache"
)
// watchCacheInterval serves as an abstraction over a source
// of watchCacheEvents. It maintains a window of events over
// an underlying source and these events can be served using
// the exposed Next() API. The main intent for doing things
// this way is to introduce an upper bound of memory usage
// for starting a watch and reduce the maximum possible time
// interval for which the lock would be held while events are
// copied over.
//
// The source of events for the interval is typically either
// the watchCache circular buffer, if events being retrieved
// need to be for resource versions > 0 or the underlying
// implementation of Store, if resource version = 0.
//
// Furthermore, an interval can be either valid or invalid at
// any given point of time. The notion of validity makes sense
// only in cases where the window of events in the underlying
// source can change over time - i.e. for watchCache circular
// buffer. When the circular buffer is full and an event needs
// to be popped off, watchCache::startIndex is incremented. In
// this case, an interval tracking that popped event is valid
// only if it has already been copied to its internal buffer.
// However, for efficiency we perform that lazily and we mark
// an interval as invalid iff we need to copy events from the
// watchCache and we end up needing events that have already
// been popped off. This translates to the following condition:
//
// watchCacheInterval::startIndex >= watchCache::startIndex.
//
// When this condition becomes false, the interval is no longer
// valid and should not be used to retrieve and serve elements
// from the underlying source.
type watchCacheInterval struct {
// startIndex denotes the starting point of the interval
// being considered. The value is the index in the actual
// source of watchCacheEvents. If the source of events is
// the watchCache, then this must be used modulo capacity.
startIndex int
// endIndex denotes the ending point of the interval being
// considered. The value is the index in the actual source
// of events. If the source of the events is the watchCache,
// then this should be used modulo capacity.
endIndex int
// indexer is meant to inject behaviour for how an event must
// be retrieved from the underlying source given an index.
indexer indexerFunc
// indexValidator is used to check if a given index is still
// valid perspective. If it is deemed that the index is not
// valid, then this interval can no longer be used to serve
// events. Use of indexValidator is warranted only in cases
// where the window of events in the underlying source can
// change over time. Furthermore, an interval is invalid if
// its startIndex no longer coincides with the startIndex of
// underlying source.
indexValidator indexValidator
// buffer holds watchCacheEvents that this interval returns on
// a call to Next(). This exists mainly to reduce acquiring the
// lock on each invocation of Next().
buffer *watchCacheIntervalBuffer
// lock effectively protects access to the underlying source
// of events through - indexer and indexValidator.
//
// Given that indexer and indexValidator only read state, if
// possible, Locker obtained through RLocker() is provided.
lock sync.Locker
}
type attrFunc func(runtime.Object) (labels.Set, fields.Set, error)
type indexerFunc func(int) *watchCacheEvent
type indexValidator func(int) bool
func newCacheInterval(startIndex, endIndex int, indexer indexerFunc, indexValidator indexValidator, locker sync.Locker) *watchCacheInterval {
return &watchCacheInterval{
startIndex: startIndex,
endIndex: endIndex,
indexer: indexer,
indexValidator: indexValidator,
buffer: &watchCacheIntervalBuffer{buffer: make([]*watchCacheEvent, bufferSize)},
lock: locker,
}
}
// newCacheIntervalFromStore is meant to handle the case of rv=0, such that the events
// returned by Next() need to be events from a List() done on the underlying store of
// the watch cache.
func newCacheIntervalFromStore(resourceVersion uint64, store cache.Indexer, getAttrsFunc attrFunc) (*watchCacheInterval, error) {
buffer := &watchCacheIntervalBuffer{}
allItems := store.List()
buffer.buffer = make([]*watchCacheEvent, len(allItems))
for i, item := range allItems {
elem, ok := item.(*storeElement)
if !ok {
return nil, fmt.Errorf("not a storeElement: %v", elem)
}
objLabels, objFields, err := getAttrsFunc(elem.Object)
if err != nil {
return nil, err
}
buffer.buffer[i] = &watchCacheEvent{
Type: watch.Added,
Object: elem.Object,
ObjLabels: objLabels,
ObjFields: objFields,
Key: elem.Key,
ResourceVersion: resourceVersion,
}
buffer.endIndex++
}
ci := &watchCacheInterval{
startIndex: 0,
// Simulate that we already have all the events we're looking for.
endIndex: 0,
buffer: buffer,
}
return ci, nil
}
// Next returns the next item in the cache interval provided the cache
// interval is still valid. An error is returned if the interval is
// invalidated.
func (wci *watchCacheInterval) Next() (*watchCacheEvent, error) {
// if there are items in the buffer to return, return from
// the buffer.
if event, exists := wci.buffer.next(); exists {
return event, nil
}
// check if there are still other events in this interval
// that can be processed.
if wci.startIndex >= wci.endIndex {
return nil, nil
}
wci.lock.Lock()
defer wci.lock.Unlock()
if valid := wci.indexValidator(wci.startIndex); !valid {
return nil, fmt.Errorf("cache interval invalidated, interval startIndex: %d", wci.startIndex)
}
wci.fillBuffer()
if event, exists := wci.buffer.next(); exists {
return event, nil
}
return nil, nil
}
func (wci *watchCacheInterval) fillBuffer() {
wci.buffer.startIndex = 0
wci.buffer.endIndex = 0
for wci.startIndex < wci.endIndex && !wci.buffer.isFull() {
event := wci.indexer(wci.startIndex)
if event == nil {
break
}
wci.buffer.buffer[wci.buffer.endIndex] = event
wci.buffer.endIndex++
wci.startIndex++
}
}
const bufferSize = 100
// watchCacheIntervalBuffer is used to reduce acquiring
// the lock on each invocation of watchCacheInterval.Next().
type watchCacheIntervalBuffer struct {
// buffer is used to hold watchCacheEvents that
// the interval returns on a call to Next().
buffer []*watchCacheEvent
// The first element of buffer is defined by startIndex,
// its last element is defined by endIndex.
startIndex int
endIndex int
}
// next returns the next event present in the interval buffer provided
// it is not empty.
func (wcib *watchCacheIntervalBuffer) next() (*watchCacheEvent, bool) {
if wcib.isEmpty() {
return nil, false
}
next := wcib.buffer[wcib.startIndex]
wcib.startIndex++
return next, true
}
func (wcib *watchCacheIntervalBuffer) isFull() bool {
return wcib.endIndex >= bufferSize
}
func (wcib *watchCacheIntervalBuffer) isEmpty() bool {
return wcib.startIndex == wcib.endIndex
}

93
vendor/k8s.io/apiserver/pkg/storage/continue.go generated vendored Normal file
View File

@@ -0,0 +1,93 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package storage
import (
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"path"
"strings"
)
var (
ErrInvalidStartRV = errors.New("continue key is not valid: incorrect encoded start resourceVersion (version meta.k8s.io/v1)")
ErrEmptyStartKey = errors.New("continue key is not valid: encoded start key empty (version meta.k8s.io/v1)")
ErrGenericInvalidKey = errors.New("continue key is not valid")
ErrUnrecognizedEncodedVersion = errors.New("continue key is not valid: server does not recognize this encoded version")
)
// continueToken is a simple structured object for encoding the state of a continue token.
// TODO: if we change the version of the encoded from, we can't start encoding the new version
// until all other servers are upgraded (i.e. we need to support rolling schema)
// This is a public API struct and cannot change.
type continueToken struct {
APIVersion string `json:"v"`
ResourceVersion int64 `json:"rv"`
StartKey string `json:"start"`
}
// DecodeContinue transforms an encoded predicate from into a versioned struct.
// TODO: return a typed error that instructs clients that they must relist
func DecodeContinue(continueValue, keyPrefix string) (fromKey string, rv int64, err error) {
data, err := base64.RawURLEncoding.DecodeString(continueValue)
if err != nil {
return "", 0, fmt.Errorf("%w: %v", ErrGenericInvalidKey, err)
}
var c continueToken
if err := json.Unmarshal(data, &c); err != nil {
return "", 0, fmt.Errorf("%w: %v", ErrGenericInvalidKey, err)
}
switch c.APIVersion {
case "meta.k8s.io/v1":
if c.ResourceVersion == 0 {
return "", 0, ErrInvalidStartRV
}
if len(c.StartKey) == 0 {
return "", 0, ErrEmptyStartKey
}
// defend against path traversal attacks by clients - path.Clean will ensure that startKey cannot
// be at a higher level of the hierarchy, and so when we append the key prefix we will end up with
// continue start key that is fully qualified and cannot range over anything less specific than
// keyPrefix.
key := c.StartKey
if !strings.HasPrefix(key, "/") {
key = "/" + key
}
cleaned := path.Clean(key)
if cleaned != key {
return "", 0, fmt.Errorf("%w: %v", ErrGenericInvalidKey, c.StartKey)
}
return keyPrefix + cleaned[1:], c.ResourceVersion, nil
default:
return "", 0, fmt.Errorf("%w %v", ErrUnrecognizedEncodedVersion, c.APIVersion)
}
}
// EncodeContinue returns a string representing the encoded continuation of the current query.
func EncodeContinue(key, keyPrefix string, resourceVersion int64) (string, error) {
nextKey := strings.TrimPrefix(key, keyPrefix)
if nextKey == key {
return "", fmt.Errorf("unable to encode next field: the key and key prefix do not match")
}
out, err := json.Marshal(&continueToken{APIVersion: "meta.k8s.io/v1", ResourceVersion: resourceVersion, StartKey: nextKey})
if err != nil {
return "", err
}
return base64.RawURLEncoding.EncodeToString(out), nil
}

View File

@@ -97,8 +97,8 @@ func IsNotFound(err error) bool {
return isErrCode(err, ErrCodeKeyNotFound)
}
// IsNodeExist returns true if and only if err is an node already exist error.
func IsNodeExist(err error) bool {
// IsExist returns true if and only if err is "key" already exists error.
func IsExist(err error) bool {
return isErrCode(err, ErrCodeKeyExists)
}

View File

@@ -56,7 +56,7 @@ func InterpretGetError(err error, qualifiedResource schema.GroupResource, name s
// operation into the appropriate API error.
func InterpretCreateError(err error, qualifiedResource schema.GroupResource, name string) error {
switch {
case storage.IsNodeExist(err):
case storage.IsExist(err):
return errors.NewAlreadyExists(qualifiedResource, name)
case storage.IsUnreachable(err):
return errors.NewServerTimeout(qualifiedResource, "create", 2) // TODO: make configurable or handled at a higher level
@@ -71,7 +71,7 @@ func InterpretCreateError(err error, qualifiedResource schema.GroupResource, nam
// operation into the appropriate API error.
func InterpretUpdateError(err error, qualifiedResource schema.GroupResource, name string) error {
switch {
case storage.IsConflict(err), storage.IsNodeExist(err), storage.IsInvalidObj(err):
case storage.IsConflict(err), storage.IsExist(err), storage.IsInvalidObj(err):
return errors.NewConflict(qualifiedResource, name, err)
case storage.IsUnreachable(err):
return errors.NewServerTimeout(qualifiedResource, "update", 2) // TODO: make configurable or handled at a higher level
@@ -92,7 +92,7 @@ func InterpretDeleteError(err error, qualifiedResource schema.GroupResource, nam
return errors.NewNotFound(qualifiedResource, name)
case storage.IsUnreachable(err):
return errors.NewServerTimeout(qualifiedResource, "delete", 2) // TODO: make configurable or handled at a higher level
case storage.IsConflict(err), storage.IsNodeExist(err), storage.IsInvalidObj(err):
case storage.IsConflict(err), storage.IsExist(err), storage.IsInvalidObj(err):
return errors.NewConflict(qualifiedResource, name, err)
case storage.IsInternalError(err):
return errors.NewInternalError(err)

View File

@@ -1,6 +1,4 @@
# See the OWNERS docs at https://go.k8s.io/owners
reviewers:
- wojtek-t
- timothysc
- hongchaodeng
- wojtek-t

View File

@@ -22,7 +22,7 @@ import (
"sync"
"time"
"go.etcd.io/etcd/clientv3"
clientv3 "go.etcd.io/etcd/client/v3"
"k8s.io/klog/v2"
)

View File

@@ -18,8 +18,9 @@ package etcd3
import (
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apiserver/pkg/storage"
etcdrpc "go.etcd.io/etcd/etcdserver/api/v3rpc/rpctypes"
etcdrpc "go.etcd.io/etcd/api/v3/v3rpc/rpctypes"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
)
@@ -60,7 +61,7 @@ func handleCompactedErrorForPaging(continueKey, keyPrefix string) error {
// continueToken.ResoureVersion=-1 means that the apiserver can
// continue the list at the latest resource version. We don't use rv=0
// for this purpose to distinguish from a bad token that has empty rv.
newToken, err := encodeContinue(continueKey, keyPrefix, -1)
newToken, err := storage.EncodeContinue(continueKey, keyPrefix, -1)
if err != nil {
utilruntime.HandleError(err)
return errors.NewResourceExpired(continueExpired)

View File

@@ -18,8 +18,8 @@ package etcd3
import (
"fmt"
"go.etcd.io/etcd/clientv3"
"go.etcd.io/etcd/mvcc/mvccpb"
"go.etcd.io/etcd/api/v3/mvccpb"
clientv3 "go.etcd.io/etcd/client/v3"
)
type event struct {

View File

@@ -0,0 +1,108 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package etcd3
import (
"context"
"time"
clientv3 "go.etcd.io/etcd/client/v3"
endpointsrequest "k8s.io/apiserver/pkg/endpoints/request"
)
// NewETCDLatencyTracker returns an implementation of
// clientv3.KV that times the calls from the specified
// 'delegate' KV instance in order to track latency incurred.
func NewETCDLatencyTracker(delegate clientv3.KV) clientv3.KV {
return &clientV3KVLatencyTracker{KV: delegate}
}
// clientV3KVLatencyTracker decorates a clientv3.KV instance and times
// each call so we can track the latency an API request incurs in etcd
// round trips (the time it takes to send data to etcd and get the
// complete response back)
//
// If an API request involves N (N>=1) round trips to etcd, then we will sum
// up the latenciy incurred in each roundtrip.
// It uses the context associated with the request in flight, so there
// are no states shared among the requests in flight, and so there is no
// concurrency overhead.
// If the goroutine executing the request handler makes concurrent calls
// to the underlying storage layer, that is protected since the latency
// tracking function TrackStorageLatency is thread safe.
//
// NOTE: Compact is an asynchronous process and is not associated with
//
// any request, so we will not be tracking its latency.
type clientV3KVLatencyTracker struct {
clientv3.KV
}
func (c *clientV3KVLatencyTracker) Put(ctx context.Context, key, val string, opts ...clientv3.OpOption) (*clientv3.PutResponse, error) {
startedAt := time.Now()
defer func() {
endpointsrequest.TrackStorageLatency(ctx, time.Since(startedAt))
}()
return c.KV.Put(ctx, key, val, opts...)
}
func (c *clientV3KVLatencyTracker) Get(ctx context.Context, key string, opts ...clientv3.OpOption) (*clientv3.GetResponse, error) {
startedAt := time.Now()
defer func() {
endpointsrequest.TrackStorageLatency(ctx, time.Since(startedAt))
}()
return c.KV.Get(ctx, key, opts...)
}
func (c *clientV3KVLatencyTracker) Delete(ctx context.Context, key string, opts ...clientv3.OpOption) (*clientv3.DeleteResponse, error) {
startedAt := time.Now()
defer func() {
endpointsrequest.TrackStorageLatency(ctx, time.Since(startedAt))
}()
return c.KV.Delete(ctx, key, opts...)
}
func (c *clientV3KVLatencyTracker) Do(ctx context.Context, op clientv3.Op) (clientv3.OpResponse, error) {
startedAt := time.Now()
defer func() {
endpointsrequest.TrackStorageLatency(ctx, time.Since(startedAt))
}()
return c.KV.Do(ctx, op)
}
func (c *clientV3KVLatencyTracker) Txn(ctx context.Context) clientv3.Txn {
return &clientV3TxnTracker{ctx: ctx, Txn: c.KV.Txn(ctx)}
}
type clientV3TxnTracker struct {
ctx context.Context
clientv3.Txn
}
func (t *clientV3TxnTracker) Commit() (*clientv3.TxnResponse, error) {
startedAt := time.Now()
defer func() {
endpointsrequest.TrackStorageLatency(t.ctx, time.Since(startedAt))
}()
return t.Txn.Commit()
}

View File

@@ -21,7 +21,7 @@ import (
"sync"
"time"
"go.etcd.io/etcd/clientv3"
clientv3 "go.etcd.io/etcd/client/v3"
"k8s.io/apiserver/pkg/storage/etcd3/metrics"
)

View File

@@ -19,12 +19,12 @@ package etcd3
import (
"fmt"
"go.etcd.io/etcd/clientv3"
"google.golang.org/grpc/grpclog"
"k8s.io/klog/v2"
)
func init() {
clientv3.SetLogger(klogWrapper{})
grpclog.SetLoggerV2(klogWrapper{})
}
type klogWrapper struct{}
@@ -32,15 +32,21 @@ type klogWrapper struct{}
const klogWrapperDepth = 4
func (klogWrapper) Info(args ...interface{}) {
klog.InfoDepth(klogWrapperDepth, args...)
if klogV := klog.V(5); klogV.Enabled() {
klogV.InfoSDepth(klogWrapperDepth, fmt.Sprint(args...))
}
}
func (klogWrapper) Infoln(args ...interface{}) {
klog.InfoDepth(klogWrapperDepth, fmt.Sprintln(args...))
if klogV := klog.V(5); klogV.Enabled() {
klogV.InfoSDepth(klogWrapperDepth, fmt.Sprintln(args...))
}
}
func (klogWrapper) Infof(format string, args ...interface{}) {
klog.InfoDepth(klogWrapperDepth, fmt.Sprintf(format, args...))
if klogV := klog.V(5); klogV.Enabled() {
klog.V(5).InfoSDepth(klogWrapperDepth, fmt.Sprintf(format, args...))
}
}
func (klogWrapper) Warning(args ...interface{}) {

View File

@@ -26,7 +26,7 @@ import (
/*
* By default, all the following metrics are defined as falling under
* ALPHA stability level https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/1209-metrics-stability/20190404-kubernetes-control-plane-metrics-stability.md#stability-classes)
* ALPHA stability level https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/1209-metrics-stability/kubernetes-control-plane-metrics-stability.md#stability-classes)
*
* Promoting the stability level of the metric is a responsibility of the component owner, since it
* involves explicitly acknowledging support for the metric across multiple releases, in accordance with
@@ -38,20 +38,15 @@ var (
Name: "etcd_request_duration_seconds",
Help: "Etcd request latency in seconds for each operation and object type.",
// Etcd request latency in seconds for each operation and object type.
Buckets: []float64{0.005, 0.025, 0.1, 0.25, 0.5, 1.0, 2.0, 4.0, 15.0, 30.0, 60.0},
// This metric is used for verifying etcd api call latencies SLO
// keep consistent with apiserver metric 'requestLatencies' in
// staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go
Buckets: []float64{0.005, 0.025, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.25, 1.5, 2, 3,
4, 5, 6, 8, 10, 15, 20, 30, 45, 60},
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"operation", "type"},
)
etcdObjectCounts = compbasemetrics.NewGaugeVec(
&compbasemetrics.GaugeOpts{
Name: "etcd_object_counts",
DeprecatedVersion: "1.22.0",
Help: "Number of stored objects at the time of last check split by kind. This metric is replaced by apiserver_storage_object_counts.",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"resource"},
)
objectCounts = compbasemetrics.NewGaugeVec(
&compbasemetrics.GaugeOpts{
Name: "apiserver_storage_objects",
@@ -85,6 +80,38 @@ var (
},
[]string{},
)
listStorageCount = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Name: "apiserver_storage_list_total",
Help: "Number of LIST requests served from storage",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"resource"},
)
listStorageNumFetched = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Name: "apiserver_storage_list_fetched_objects_total",
Help: "Number of objects read from storage in the course of serving a LIST request",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"resource"},
)
listStorageNumSelectorEvals = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Name: "apiserver_storage_list_evaluated_objects_total",
Help: "Number of objects tested in the course of serving a LIST request from storage",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"resource"},
)
listStorageNumReturned = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Name: "apiserver_storage_list_returned_objects_total",
Help: "Number of objects returned for a LIST request from storage",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"resource"},
)
)
var registerMetrics sync.Once
@@ -95,17 +122,19 @@ func Register() {
registerMetrics.Do(func() {
legacyregistry.MustRegister(etcdRequestLatency)
legacyregistry.MustRegister(objectCounts)
legacyregistry.MustRegister(etcdObjectCounts)
legacyregistry.MustRegister(dbTotalSize)
legacyregistry.MustRegister(etcdBookmarkCounts)
legacyregistry.MustRegister(etcdLeaseObjectCounts)
legacyregistry.MustRegister(listStorageCount)
legacyregistry.MustRegister(listStorageNumFetched)
legacyregistry.MustRegister(listStorageNumSelectorEvals)
legacyregistry.MustRegister(listStorageNumReturned)
})
}
// UpdateObjectCount sets the apiserver_storage_object_counts and etcd_object_counts (deprecated) metric.
// UpdateObjectCount sets the apiserver_storage_object_counts metric.
func UpdateObjectCount(resourcePrefix string, count int64) {
objectCounts.WithLabelValues(resourcePrefix).Set(float64(count))
etcdObjectCounts.WithLabelValues(resourcePrefix).Set(float64(count))
}
// RecordEtcdRequestLatency sets the etcd_request_duration_seconds metrics.
@@ -139,3 +168,11 @@ func UpdateLeaseObjectCount(count int64) {
// See pkg/storage/etcd3/lease_manager.go
etcdLeaseObjectCounts.WithLabelValues().Observe(float64(count))
}
// RecordListEtcd3Metrics notes various metrics of the cost to serve a LIST request
func RecordStorageListMetrics(resource string, numFetched, numEvald, numReturned int) {
listStorageCount.WithLabelValues(resource).Inc()
listStorageNumFetched.WithLabelValues(resource).Add(float64(numFetched))
listStorageNumSelectorEvals.WithLabelValues(resource).Add(float64(numEvald))
listStorageNumReturned.WithLabelValues(resource).Add(float64(numReturned))
}

View File

@@ -19,8 +19,6 @@ package etcd3
import (
"bytes"
"context"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"path"
@@ -28,16 +26,18 @@ import (
"strings"
"time"
"go.etcd.io/etcd/clientv3"
clientv3 "go.etcd.io/etcd/client/v3"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/conversion"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/watch"
endpointsrequest "k8s.io/apiserver/pkg/endpoints/request"
"k8s.io/apiserver/pkg/features"
"k8s.io/apiserver/pkg/storage"
"k8s.io/apiserver/pkg/storage/etcd3/metrics"
@@ -47,6 +47,13 @@ import (
utiltrace "k8s.io/utils/trace"
)
const (
// maxLimit is a maximum page limit increase used when fetching objects from etcd.
// This limit is used only for increasing page size by kube-apiserver. If request
// specifies larger limit initially, it won't be changed.
maxLimit = 10000
)
// authenticatedDataString satisfies the value.Context interface. It uses the key to
// authenticate the stored data. This does not defend against reuse of previously
// encrypted values under the same key, but will prevent an attacker from using an
@@ -64,14 +71,16 @@ func (d authenticatedDataString) AuthenticatedData() []byte {
var _ value.Context = authenticatedDataString("")
type store struct {
client *clientv3.Client
codec runtime.Codec
versioner storage.Versioner
transformer value.Transformer
pathPrefix string
watcher *watcher
pagingEnabled bool
leaseManager *leaseManager
client *clientv3.Client
codec runtime.Codec
versioner storage.Versioner
transformer value.Transformer
pathPrefix string
groupResource schema.GroupResource
groupResourceString string
watcher *watcher
pagingEnabled bool
leaseManager *leaseManager
}
type objState struct {
@@ -83,12 +92,12 @@ type objState struct {
}
// New returns an etcd3 implementation of storage.Interface.
func New(c *clientv3.Client, codec runtime.Codec, newFunc func() runtime.Object, prefix string, transformer value.Transformer, pagingEnabled bool, leaseManagerConfig LeaseManagerConfig) storage.Interface {
return newStore(c, codec, newFunc, prefix, transformer, pagingEnabled, leaseManagerConfig)
func New(c *clientv3.Client, codec runtime.Codec, newFunc func() runtime.Object, prefix string, groupResource schema.GroupResource, transformer value.Transformer, pagingEnabled bool, leaseManagerConfig LeaseManagerConfig) storage.Interface {
return newStore(c, codec, newFunc, prefix, groupResource, transformer, pagingEnabled, leaseManagerConfig)
}
func newStore(c *clientv3.Client, codec runtime.Codec, newFunc func() runtime.Object, prefix string, transformer value.Transformer, pagingEnabled bool, leaseManagerConfig LeaseManagerConfig) *store {
versioner := APIObjectVersioner{}
func newStore(c *clientv3.Client, codec runtime.Codec, newFunc func() runtime.Object, prefix string, groupResource schema.GroupResource, transformer value.Transformer, pagingEnabled bool, leaseManagerConfig LeaseManagerConfig) *store {
versioner := storage.APIObjectVersioner{}
result := &store{
client: c,
codec: codec,
@@ -98,9 +107,11 @@ func newStore(c *clientv3.Client, codec runtime.Codec, newFunc func() runtime.Ob
// for compatibility with etcd2 impl.
// no-op for default prefix of '/registry'.
// keeps compatibility with etcd2 impl for custom prefixes that don't start with '/'
pathPrefix: path.Join("/", prefix),
watcher: newWatcher(c, codec, newFunc, versioner, transformer),
leaseManager: newDefaultLeaseManager(c, leaseManagerConfig),
pathPrefix: path.Join("/", prefix),
groupResource: groupResource,
groupResourceString: groupResource.String(),
watcher: newWatcher(c, codec, newFunc, versioner, transformer),
leaseManager: newDefaultLeaseManager(c, leaseManagerConfig),
}
return result
}
@@ -131,7 +142,7 @@ func (s *store) Get(ctx context.Context, key string, opts storage.GetOptions, ou
}
kv := getResp.Kvs[0]
data, _, err := s.transformer.TransformFromStorage(kv.Value, authenticatedDataString(key))
data, _, err := s.transformer.TransformFromStorage(ctx, kv.Value, authenticatedDataString(key))
if err != nil {
return storage.NewInternalError(err.Error())
}
@@ -141,13 +152,21 @@ func (s *store) Get(ctx context.Context, key string, opts storage.GetOptions, ou
// Create implements storage.Interface.Create.
func (s *store) Create(ctx context.Context, key string, obj, out runtime.Object, ttl uint64) error {
trace := utiltrace.New("Create etcd3",
utiltrace.Field{"audit-id", endpointsrequest.GetAuditIDTruncated(ctx)},
utiltrace.Field{"key", key},
utiltrace.Field{"type", getTypeName(obj)},
)
defer trace.LogIfLong(500 * time.Millisecond)
if version, err := s.versioner.ObjectResourceVersion(obj); err == nil && version != 0 {
return errors.New("resourceVersion should not be set on objects to be created")
}
if err := s.versioner.PrepareObjectForStorage(obj); err != nil {
return fmt.Errorf("PrepareObjectForStorage failed: %v", err)
}
trace.Step("About to Encode")
data, err := runtime.Encode(s.codec, obj)
trace.Step("Encode finished", utiltrace.Field{"len", len(data)}, utiltrace.Field{"err", err})
if err != nil {
return err
}
@@ -158,7 +177,8 @@ func (s *store) Create(ctx context.Context, key string, obj, out runtime.Object,
return err
}
newData, err := s.transformer.TransformToStorage(data, authenticatedDataString(key))
newData, err := s.transformer.TransformToStorage(ctx, data, authenticatedDataString(key))
trace.Step("TransformToStorage finished", utiltrace.Field{"err", err})
if err != nil {
return storage.NewInternalError(err.Error())
}
@@ -170,16 +190,20 @@ func (s *store) Create(ctx context.Context, key string, obj, out runtime.Object,
clientv3.OpPut(key, string(newData), opts...),
).Commit()
metrics.RecordEtcdRequestLatency("create", getTypeName(obj), startTime)
trace.Step("Txn call finished", utiltrace.Field{"err", err})
if err != nil {
return err
}
if !txnResp.Succeeded {
return storage.NewKeyExistsError(key, 0)
}
if out != nil {
putResp := txnResp.Responses[0].GetResponsePut()
return decode(s.codec, s.versioner, data, out, putResp.Header.Revision)
err = decode(s.codec, s.versioner, data, out, putResp.Header.Revision)
trace.Step("decode finished", utiltrace.Field{"len", len(data)}, utiltrace.Field{"err", err})
return err
}
return nil
}
@@ -206,7 +230,7 @@ func (s *store) conditionalDelete(
if err != nil {
return nil, err
}
return s.getState(getResp, key, v, false)
return s.getState(ctx, getResp, key, v, false)
}
var origState *objState
@@ -230,12 +254,22 @@ func (s *store) conditionalDelete(
}
// It's possible we're working with stale data.
// Remember the revision of the potentially stale data and the resulting update error
cachedRev := origState.rev
cachedUpdateErr := err
// Actually fetch
origState, err = getCurrentState()
if err != nil {
return err
}
origStateIsCurrent = true
// it turns out our cached data was not stale, return the error
if cachedRev == origState.rev {
return cachedUpdateErr
}
// Retry
continue
}
@@ -246,12 +280,22 @@ func (s *store) conditionalDelete(
}
// It's possible we're working with stale data.
// Remember the revision of the potentially stale data and the resulting update error
cachedRev := origState.rev
cachedUpdateErr := err
// Actually fetch
origState, err = getCurrentState()
if err != nil {
return err
}
origStateIsCurrent = true
// it turns out our cached data was not stale, return the error
if cachedRev == origState.rev {
return cachedUpdateErr
}
// Retry
continue
}
@@ -271,7 +315,7 @@ func (s *store) conditionalDelete(
if !txnResp.Succeeded {
getResp := (*clientv3.GetResponse)(txnResp.Responses[0].GetResponseRange())
klog.V(4).Infof("deletion of %s failed because of a conflict, going to retry", key)
origState, err = s.getState(getResp, key, v, false)
origState, err = s.getState(ctx, getResp, key, v, false)
if err != nil {
return err
}
@@ -284,12 +328,15 @@ func (s *store) conditionalDelete(
// GuaranteedUpdate implements storage.Interface.GuaranteedUpdate.
func (s *store) GuaranteedUpdate(
ctx context.Context, key string, out runtime.Object, ignoreNotFound bool,
ctx context.Context, key string, destination runtime.Object, ignoreNotFound bool,
preconditions *storage.Preconditions, tryUpdate storage.UpdateFunc, cachedExistingObject runtime.Object) error {
trace := utiltrace.New("GuaranteedUpdate etcd3", utiltrace.Field{"type", getTypeName(out)})
trace := utiltrace.New("GuaranteedUpdate etcd3",
utiltrace.Field{"audit-id", endpointsrequest.GetAuditIDTruncated(ctx)},
utiltrace.Field{"key", key},
utiltrace.Field{"type", getTypeName(destination)})
defer trace.LogIfLong(500 * time.Millisecond)
v, err := conversion.EnforcePtr(out)
v, err := conversion.EnforcePtr(destination)
if err != nil {
return fmt.Errorf("unable to convert output object to pointer: %v", err)
}
@@ -298,11 +345,11 @@ func (s *store) GuaranteedUpdate(
getCurrentState := func() (*objState, error) {
startTime := time.Now()
getResp, err := s.client.KV.Get(ctx, key)
metrics.RecordEtcdRequestLatency("get", getTypeName(out), startTime)
metrics.RecordEtcdRequestLatency("get", getTypeName(destination), startTime)
if err != nil {
return nil, err
}
return s.getState(getResp, key, v, ignoreNotFound)
return s.getState(ctx, getResp, key, v, ignoreNotFound)
}
var origState *objState
@@ -345,17 +392,29 @@ func (s *store) GuaranteedUpdate(
}
// It's possible we were working with stale data
// Remember the revision of the potentially stale data and the resulting update error
cachedRev := origState.rev
cachedUpdateErr := err
// Actually fetch
origState, err = getCurrentState()
if err != nil {
return err
}
origStateIsCurrent = true
// it turns out our cached data was not stale, return the error
if cachedRev == origState.rev {
return cachedUpdateErr
}
// Retry
continue
}
trace.Step("About to Encode")
data, err := runtime.Encode(s.codec, ret)
trace.Step("Encode finished", utiltrace.Field{"len", len(data)}, utiltrace.Field{"err", err})
if err != nil {
return err
}
@@ -376,11 +435,12 @@ func (s *store) GuaranteedUpdate(
}
// recheck that the data from etcd is not stale before short-circuiting a write
if !origState.stale {
return decode(s.codec, s.versioner, origState.data, out, origState.rev)
return decode(s.codec, s.versioner, origState.data, destination, origState.rev)
}
}
newData, err := s.transformer.TransformToStorage(data, transformContext)
newData, err := s.transformer.TransformToStorage(ctx, data, transformContext)
trace.Step("TransformToStorage finished", utiltrace.Field{"err", err})
if err != nil {
return storage.NewInternalError(err.Error())
}
@@ -399,7 +459,8 @@ func (s *store) GuaranteedUpdate(
).Else(
clientv3.OpGet(key),
).Commit()
metrics.RecordEtcdRequestLatency("update", getTypeName(out), startTime)
metrics.RecordEtcdRequestLatency("update", getTypeName(destination), startTime)
trace.Step("Txn call finished", utiltrace.Field{"err", err})
if err != nil {
return err
}
@@ -407,7 +468,7 @@ func (s *store) GuaranteedUpdate(
if !txnResp.Succeeded {
getResp := (*clientv3.GetResponse)(txnResp.Responses[0].GetResponseRange())
klog.V(4).Infof("GuaranteedUpdate of %s failed because of a conflict, going to retry", key)
origState, err = s.getState(getResp, key, v, ignoreNotFound)
origState, err = s.getState(ctx, getResp, key, v, ignoreNotFound)
if err != nil {
return err
}
@@ -417,64 +478,10 @@ func (s *store) GuaranteedUpdate(
}
putResp := txnResp.Responses[0].GetResponsePut()
return decode(s.codec, s.versioner, data, out, putResp.Header.Revision)
}
}
// GetToList implements storage.Interface.GetToList.
func (s *store) GetToList(ctx context.Context, key string, listOpts storage.ListOptions, listObj runtime.Object) error {
resourceVersion := listOpts.ResourceVersion
match := listOpts.ResourceVersionMatch
pred := listOpts.Predicate
trace := utiltrace.New("GetToList etcd3",
utiltrace.Field{"key", key},
utiltrace.Field{"resourceVersion", resourceVersion},
utiltrace.Field{"resourceVersionMatch", match},
utiltrace.Field{"limit", pred.Limit},
utiltrace.Field{"continue", pred.Continue})
defer trace.LogIfLong(500 * time.Millisecond)
listPtr, err := meta.GetItemsPtr(listObj)
if err != nil {
err = decode(s.codec, s.versioner, data, destination, putResp.Header.Revision)
trace.Step("decode finished", utiltrace.Field{"len", len(data)}, utiltrace.Field{"err", err})
return err
}
v, err := conversion.EnforcePtr(listPtr)
if err != nil || v.Kind() != reflect.Slice {
return fmt.Errorf("need ptr to slice: %v", err)
}
newItemFunc := getNewItemFunc(listObj, v)
key = path.Join(s.pathPrefix, key)
startTime := time.Now()
var opts []clientv3.OpOption
if len(resourceVersion) > 0 && match == metav1.ResourceVersionMatchExact {
rv, err := s.versioner.ParseResourceVersion(resourceVersion)
if err != nil {
return apierrors.NewBadRequest(fmt.Sprintf("invalid resource version: %v", err))
}
opts = append(opts, clientv3.WithRev(int64(rv)))
}
getResp, err := s.client.KV.Get(ctx, key, opts...)
metrics.RecordEtcdRequestLatency("get", getTypeName(listPtr), startTime)
if err != nil {
return err
}
if err = s.validateMinimumResourceVersion(resourceVersion, uint64(getResp.Header.Revision)); err != nil {
return err
}
if len(getResp.Kvs) > 0 {
data, _, err := s.transformer.TransformFromStorage(getResp.Kvs[0].Value, authenticatedDataString(key))
if err != nil {
return storage.NewInternalError(err.Error())
}
if err := appendListItem(v, data, uint64(getResp.Kvs[0].ModRevision), pred, s.codec, s.versioner, newItemFunc); err != nil {
return err
}
}
// update version with cluster level revision
return s.versioner.UpdateList(listObj, uint64(getResp.Header.Revision), "", nil)
}
func getNewItemFunc(listObj runtime.Object, v reflect.Value) func() runtime.Object {
@@ -513,72 +520,14 @@ func (s *store) Count(key string) (int64, error) {
return getResp.Count, nil
}
// continueToken is a simple structured object for encoding the state of a continue token.
// TODO: if we change the version of the encoded from, we can't start encoding the new version
// until all other servers are upgraded (i.e. we need to support rolling schema)
// This is a public API struct and cannot change.
type continueToken struct {
APIVersion string `json:"v"`
ResourceVersion int64 `json:"rv"`
StartKey string `json:"start"`
}
// parseFrom transforms an encoded predicate from into a versioned struct.
// TODO: return a typed error that instructs clients that they must relist
func decodeContinue(continueValue, keyPrefix string) (fromKey string, rv int64, err error) {
data, err := base64.RawURLEncoding.DecodeString(continueValue)
if err != nil {
return "", 0, fmt.Errorf("continue key is not valid: %v", err)
}
var c continueToken
if err := json.Unmarshal(data, &c); err != nil {
return "", 0, fmt.Errorf("continue key is not valid: %v", err)
}
switch c.APIVersion {
case "meta.k8s.io/v1":
if c.ResourceVersion == 0 {
return "", 0, fmt.Errorf("continue key is not valid: incorrect encoded start resourceVersion (version meta.k8s.io/v1)")
}
if len(c.StartKey) == 0 {
return "", 0, fmt.Errorf("continue key is not valid: encoded start key empty (version meta.k8s.io/v1)")
}
// defend against path traversal attacks by clients - path.Clean will ensure that startKey cannot
// be at a higher level of the hierarchy, and so when we append the key prefix we will end up with
// continue start key that is fully qualified and cannot range over anything less specific than
// keyPrefix.
key := c.StartKey
if !strings.HasPrefix(key, "/") {
key = "/" + key
}
cleaned := path.Clean(key)
if cleaned != key {
return "", 0, fmt.Errorf("continue key is not valid: %s", c.StartKey)
}
return keyPrefix + cleaned[1:], c.ResourceVersion, nil
default:
return "", 0, fmt.Errorf("continue key is not valid: server does not recognize this encoded version %q", c.APIVersion)
}
}
// encodeContinue returns a string representing the encoded continuation of the current query.
func encodeContinue(key, keyPrefix string, resourceVersion int64) (string, error) {
nextKey := strings.TrimPrefix(key, keyPrefix)
if nextKey == key {
return "", fmt.Errorf("unable to encode next field: the key and key prefix do not match")
}
out, err := json.Marshal(&continueToken{APIVersion: "meta.k8s.io/v1", ResourceVersion: resourceVersion, StartKey: nextKey})
if err != nil {
return "", err
}
return base64.RawURLEncoding.EncodeToString(out), nil
}
// List implements storage.Interface.List.
func (s *store) List(ctx context.Context, key string, opts storage.ListOptions, listObj runtime.Object) error {
// GetList implements storage.Interface.
func (s *store) GetList(ctx context.Context, key string, opts storage.ListOptions, listObj runtime.Object) error {
recursive := opts.Recursive
resourceVersion := opts.ResourceVersion
match := opts.ResourceVersionMatch
pred := opts.Predicate
trace := utiltrace.New("List etcd3",
trace := utiltrace.New(fmt.Sprintf("List(recursive=%v) etcd3", recursive),
utiltrace.Field{"audit-id", endpointsrequest.GetAuditIDTruncated(ctx)},
utiltrace.Field{"key", key},
utiltrace.Field{"resourceVersion", resourceVersion},
utiltrace.Field{"resourceVersionMatch", match},
@@ -593,24 +542,26 @@ func (s *store) List(ctx context.Context, key string, opts storage.ListOptions,
if err != nil || v.Kind() != reflect.Slice {
return fmt.Errorf("need ptr to slice: %v", err)
}
key = path.Join(s.pathPrefix, key)
if s.pathPrefix != "" {
key = path.Join(s.pathPrefix, key)
}
// We need to make sure the key ended with "/" so that we only get children "directories".
// e.g. if we have key "/a", "/a/b", "/ab", getting keys with prefix "/a" will return all three,
// while with prefix "/a/" will return only "/a/b" which is the correct answer.
if !strings.HasSuffix(key, "/") {
// For recursive lists, we need to make sure the key ended with "/" so that we only
// get children "directories". e.g. if we have key "/a", "/a/b", "/ab", getting keys
// with prefix "/a" will return all three, while with prefix "/a/" will return only
// "/a/b" which is the correct answer.
if recursive && !strings.HasSuffix(key, "/") {
key += "/"
}
keyPrefix := key
// set the appropriate clientv3 options to filter the returned data set
var limitOption *clientv3.OpOption
limit := pred.Limit
var paging bool
options := make([]clientv3.OpOption, 0, 4)
if s.pagingEnabled && pred.Limit > 0 {
paging = true
options = append(options, clientv3.WithLimit(pred.Limit))
options = append(options, clientv3.WithLimit(limit))
limitOption = &options[len(options)-1]
}
newItemFunc := getNewItemFunc(listObj, v)
@@ -627,8 +578,8 @@ func (s *store) List(ctx context.Context, key string, opts storage.ListOptions,
var returnedRV, continueRV, withRev int64
var continueKey string
switch {
case s.pagingEnabled && len(pred.Continue) > 0:
continueKey, continueRV, err = decodeContinue(pred.Continue, keyPrefix)
case recursive && s.pagingEnabled && len(pred.Continue) > 0:
continueKey, continueRV, err = storage.DecodeContinue(pred.Continue, keyPrefix)
if err != nil {
return apierrors.NewBadRequest(fmt.Sprintf("invalid continue token: %v", err))
}
@@ -648,7 +599,7 @@ func (s *store) List(ctx context.Context, key string, opts storage.ListOptions,
withRev = continueRV
returnedRV = continueRV
}
case s.pagingEnabled && pred.Limit > 0:
case recursive && s.pagingEnabled && pred.Limit > 0:
if fromRV != nil {
switch match {
case metav1.ResourceVersionMatchNotOlderThan:
@@ -684,7 +635,9 @@ func (s *store) List(ctx context.Context, key string, opts storage.ListOptions,
}
}
options = append(options, clientv3.WithPrefix())
if recursive {
options = append(options, clientv3.WithPrefix())
}
}
if withRev != 0 {
options = append(options, clientv3.WithRev(withRev))
@@ -694,13 +647,26 @@ func (s *store) List(ctx context.Context, key string, opts storage.ListOptions,
var lastKey []byte
var hasMore bool
var getResp *clientv3.GetResponse
var numFetched int
var numEvald int
// Because these metrics are for understanding the costs of handling LIST requests,
// get them recorded even in error cases.
defer func() {
numReturn := v.Len()
metrics.RecordStorageListMetrics(s.groupResourceString, numFetched, numEvald, numReturn)
}()
for {
startTime := time.Now()
getResp, err = s.client.KV.Get(ctx, key, options...)
metrics.RecordEtcdRequestLatency("list", getTypeName(listPtr), startTime)
if recursive {
metrics.RecordEtcdRequestLatency("list", getTypeName(listPtr), startTime)
} else {
metrics.RecordEtcdRequestLatency("get", getTypeName(listPtr), startTime)
}
if err != nil {
return interpretListError(err, len(pred.Continue) > 0, continueKey, keyPrefix)
}
numFetched += len(getResp.Kvs)
if err = s.validateMinimumResourceVersion(resourceVersion, uint64(getResp.Header.Revision)); err != nil {
return err
}
@@ -719,14 +685,14 @@ func (s *store) List(ctx context.Context, key string, opts storage.ListOptions,
}
// take items from the response until the bucket is full, filtering as we go
for _, kv := range getResp.Kvs {
for i, kv := range getResp.Kvs {
if paging && int64(v.Len()) >= pred.Limit {
hasMore = true
break
}
lastKey = kv.Key
data, _, err := s.transformer.TransformFromStorage(kv.Value, authenticatedDataString(kv.Key))
data, _, err := s.transformer.TransformFromStorage(ctx, kv.Value, authenticatedDataString(kv.Key))
if err != nil {
return storage.NewInternalErrorf("unable to transform key %q: %v", kv.Key, err)
}
@@ -734,6 +700,10 @@ func (s *store) List(ctx context.Context, key string, opts storage.ListOptions,
if err := appendListItem(v, data, uint64(kv.ModRevision), pred, s.codec, s.versioner, newItemFunc); err != nil {
return err
}
numEvald++
// free kv early. Long lists can take O(seconds) to decode.
getResp.Kvs[i] = nil
}
// indicate to the client which resource version was returned
@@ -749,6 +719,16 @@ func (s *store) List(ctx context.Context, key string, opts storage.ListOptions,
if int64(v.Len()) >= pred.Limit {
break
}
if limit < maxLimit {
// We got incomplete result due to field/label selector dropping the object.
// Double page size to reduce total number of calls to etcd.
limit *= 2
if limit > maxLimit {
limit = maxLimit
}
*limitOption = clientv3.WithLimit(limit)
}
key = string(lastKey) + "\x00"
if withRev == 0 {
withRev = returnedRV
@@ -760,7 +740,7 @@ func (s *store) List(ctx context.Context, key string, opts storage.ListOptions,
// we never return a key that the client wouldn't be allowed to see
if hasMore {
// we want to start immediately after the last key
next, err := encodeContinue(string(lastKey)+"\x00", keyPrefix, returnedRV)
next, err := storage.EncodeContinue(string(lastKey)+"\x00", keyPrefix, returnedRV)
if err != nil {
return err
}
@@ -803,7 +783,7 @@ func growSlice(v reflect.Value, maxCapacity int, sizes ...int) {
return
}
if v.Len() > 0 {
extra := reflect.MakeSlice(v.Type(), 0, max)
extra := reflect.MakeSlice(v.Type(), v.Len(), max)
reflect.Copy(extra, v)
v.Set(extra)
} else {
@@ -814,24 +794,15 @@ func growSlice(v reflect.Value, maxCapacity int, sizes ...int) {
// Watch implements storage.Interface.Watch.
func (s *store) Watch(ctx context.Context, key string, opts storage.ListOptions) (watch.Interface, error) {
return s.watch(ctx, key, opts, false)
}
// WatchList implements storage.Interface.WatchList.
func (s *store) WatchList(ctx context.Context, key string, opts storage.ListOptions) (watch.Interface, error) {
return s.watch(ctx, key, opts, true)
}
func (s *store) watch(ctx context.Context, key string, opts storage.ListOptions, recursive bool) (watch.Interface, error) {
rev, err := s.versioner.ParseResourceVersion(opts.ResourceVersion)
if err != nil {
return nil, err
}
key = path.Join(s.pathPrefix, key)
return s.watcher.Watch(ctx, key, int64(rev), recursive, opts.ProgressNotify, opts.Predicate)
return s.watcher.Watch(ctx, key, int64(rev), opts.Recursive, opts.ProgressNotify, opts.Predicate)
}
func (s *store) getState(getResp *clientv3.GetResponse, key string, v reflect.Value, ignoreNotFound bool) (*objState, error) {
func (s *store) getState(ctx context.Context, getResp *clientv3.GetResponse, key string, v reflect.Value, ignoreNotFound bool) (*objState, error) {
state := &objState{
meta: &storage.ResponseMeta{},
}
@@ -850,7 +821,7 @@ func (s *store) getState(getResp *clientv3.GetResponse, key string, v reflect.Va
return nil, err
}
} else {
data, stale, err := s.transformer.TransformFromStorage(getResp.Kvs[0].Value, authenticatedDataString(key))
data, stale, err := s.transformer.TransformFromStorage(ctx, getResp.Kvs[0].Value, authenticatedDataString(key))
if err != nil {
return nil, storage.NewInternalError(err.Error())
}

View File

@@ -32,8 +32,9 @@ import (
"k8s.io/apiserver/pkg/storage"
"k8s.io/apiserver/pkg/storage/etcd3/metrics"
"k8s.io/apiserver/pkg/storage/value"
utilflowcontrol "k8s.io/apiserver/pkg/util/flowcontrol"
"go.etcd.io/etcd/clientv3"
clientv3 "go.etcd.io/etcd/client/v3"
"k8s.io/klog/v2"
)
@@ -120,6 +121,14 @@ func (w *watcher) Watch(ctx context.Context, key string, rev int64, recursive, p
}
wc := w.createWatchChan(ctx, key, rev, recursive, progressNotify, pred)
go wc.run()
// For etcd watch we don't have an easy way to answer whether the watch
// has already caught up. So in the initial version (given that watchcache
// is by default enabled for all resources but Events), we just deliver
// the initialization signal immediately. Improving this will be explored
// in the future.
utilflowcontrol.WatchInitialized(ctx)
return wc, nil
}
@@ -283,7 +292,7 @@ func (wc *watchChan) processEvent(wg *sync.WaitGroup) {
continue
}
if len(wc.resultChan) == outgoingBufSize {
klog.V(3).InfoS("Fast watcher, slow processing. Probably caused by slow dispatching events to watchers", "outgoingEvents", outgoingBufSize)
klog.V(3).InfoS("Fast watcher, slow processing. Probably caused by slow dispatching events to watchers", "outgoingEvents", outgoingBufSize, "objectType", wc.watcher.objectType)
}
// If user couldn't receive results fast enough, we also block incoming events from watcher.
// Because storing events in local will cause more memory usage.
@@ -402,7 +411,7 @@ func (wc *watchChan) sendError(err error) {
func (wc *watchChan) sendEvent(e *event) {
if len(wc.incomingEventChan) == incomingBufSize {
klog.V(3).InfoS("Fast watcher, slow processing. Probably caused by slow decoding, user not receiving fast, or other processing logic", "incomingEvents", incomingBufSize)
klog.V(3).InfoS("Fast watcher, slow processing. Probably caused by slow decoding, user not receiving fast, or other processing logic", "incomingEvents", incomingBufSize, "objectType", wc.watcher.objectType)
}
select {
case wc.incomingEventChan <- e:
@@ -417,7 +426,7 @@ func (wc *watchChan) prepareObjs(e *event) (curObj runtime.Object, oldObj runtim
}
if !e.isDeleted {
data, _, err := wc.watcher.transformer.TransformFromStorage(e.value, authenticatedDataString(e.key))
data, _, err := wc.watcher.transformer.TransformFromStorage(wc.ctx, e.value, authenticatedDataString(e.key))
if err != nil {
return nil, nil, err
}
@@ -432,7 +441,7 @@ func (wc *watchChan) prepareObjs(e *event) (curObj runtime.Object, oldObj runtim
// we need the object only to compute whether it was filtered out
// before).
if len(e.prevValue) > 0 && (e.isDeleted || !wc.acceptAll()) {
data, _, err := wc.watcher.transformer.TransformFromStorage(e.prevValue, authenticatedDataString(e.key))
data, _, err := wc.watcher.transformer.TransformFromStorage(wc.ctx, e.prevValue, authenticatedDataString(e.key))
if err != nil {
return nil, nil, err
}

View File

@@ -75,11 +75,11 @@ type ResponseMeta struct {
}
// IndexerFunc is a function that for a given object computes
// <value of an index> for a particular <index>.
// `<value of an index>` for a particular `<index>`.
type IndexerFunc func(obj runtime.Object) string
// IndexerFuncs is a mapping from <index name> to function that
// for a given object computes <value for that index>.
// IndexerFuncs is a mapping from `<index name>` to function that
// for a given object computes `<value for that index>`.
type IndexerFuncs map[string]IndexerFunc
// Everything accepts all objects.
@@ -88,7 +88,7 @@ var Everything = SelectionPredicate{
Field: fields.Everything(),
}
// MatchValue defines a pair (<index name>, <value for that index>).
// MatchValue defines a pair (`<index name>`, `<value for that index>`).
type MatchValue struct {
IndexName string
Value string
@@ -183,43 +183,31 @@ type Interface interface {
// and send it in an "ADDED" event, before watch starts.
Watch(ctx context.Context, key string, opts ListOptions) (watch.Interface, error)
// WatchList begins watching the specified key's items. Items are decoded into API
// objects and any item selected by 'p' are sent down to returned watch.Interface.
// resourceVersion may be used to specify what version to begin watching,
// which should be the current resourceVersion, and no longer rv+1
// (e.g. reconnecting without missing any updates).
// If resource version is "0", this interface will list current objects directory defined by key
// and send them in "ADDED" events, before watch starts.
WatchList(ctx context.Context, key string, opts ListOptions) (watch.Interface, error)
// Get unmarshals json found at key into objPtr. On a not found error, will either
// Get unmarshals object found at key into objPtr. On a not found error, will either
// return a zero object of the requested type, or an error, depending on 'opts.ignoreNotFound'.
// Treats empty responses and nil response nodes exactly like a not found error.
// The returned contents may be delayed, but it is guaranteed that they will
// match 'opts.ResourceVersion' according 'opts.ResourceVersionMatch'.
Get(ctx context.Context, key string, opts GetOptions, objPtr runtime.Object) error
// GetToList unmarshals json found at key and opaque it into *List api object
// (an object that satisfies the runtime.IsList definition).
// GetList unmarshalls objects found at key into a *List api object (an object
// that satisfies runtime.IsList definition).
// If 'opts.Recursive' is false, 'key' is used as an exact match. If `opts.Recursive'
// is true, 'key' is used as a prefix.
// The returned contents may be delayed, but it is guaranteed that they will
// match 'opts.ResourceVersion' according 'opts.ResourceVersionMatch'.
GetToList(ctx context.Context, key string, opts ListOptions, listObj runtime.Object) error
GetList(ctx context.Context, key string, opts ListOptions, listObj runtime.Object) error
// List unmarshalls jsons found at directory defined by key and opaque them
// into *List api object (an object that satisfies runtime.IsList definition).
// The returned contents may be delayed, but it is guaranteed that they will
// match 'opts.ResourceVersion' according 'opts.ResourceVersionMatch'.
List(ctx context.Context, key string, opts ListOptions, listObj runtime.Object) error
// GuaranteedUpdate keeps calling 'tryUpdate()' to update key 'key' (of type 'ptrToType')
// GuaranteedUpdate keeps calling 'tryUpdate()' to update key 'key' (of type 'destination')
// retrying the update until success if there is index conflict.
// Note that object passed to tryUpdate may change across invocations of tryUpdate() if
// other writers are simultaneously updating it, so tryUpdate() needs to take into account
// the current contents of the object when deciding how the update object should look.
// If the key doesn't exist, it will return NotFound storage error if ignoreNotFound=false
// or zero value in 'ptrToType' parameter otherwise.
// If the object to update has the same value as previous, it won't do any update
// but will return the object in 'ptrToType' parameter.
// else `destination` will be set to the zero value of it's type.
// If the eventual successful invocation of `tryUpdate` returns an output with the same serialized
// contents as the input, it won't perform any update, but instead set `destination` to an object with those
// contents.
// If 'cachedExistingObject' is non-nil, it can be used as a suggestion about the
// current version of the object to avoid read operation from storage to get it.
// However, the implementations have to retry in case suggestion is stale.
@@ -228,7 +216,7 @@ type Interface interface {
//
// s := /* implementation of Interface */
// err := s.GuaranteedUpdate(
// "myKey", &MyType{}, true,
// "myKey", &MyType{}, true, preconditions,
// func(input runtime.Object, res ResponseMeta) (runtime.Object, *uint64, error) {
// // Before each invocation of the user defined function, "input" is reset to
// // current contents for "myKey" in database.
@@ -240,11 +228,11 @@ type Interface interface {
// // Return the modified object - return an error to stop iterating. Return
// // a uint64 to alter the TTL on the object, or nil to keep it the same value.
// return cur, nil, nil
// },
// }, cachedExistingObject
// )
GuaranteedUpdate(
ctx context.Context, key string, ptrToType runtime.Object, ignoreNotFound bool,
precondtions *Preconditions, tryUpdate UpdateFunc, cachedExistingObject runtime.Object) error
ctx context.Context, key string, destination runtime.Object, ignoreNotFound bool,
preconditions *Preconditions, tryUpdate UpdateFunc, cachedExistingObject runtime.Object) error
// Count returns number of different entries under the key (generally being path prefix).
Count(key string) (int64, error)
@@ -274,6 +262,9 @@ type ListOptions struct {
ResourceVersionMatch metav1.ResourceVersionMatch
// Predicate provides the selection rules for the list operation.
Predicate SelectionPredicate
// Recursive determines whether the list or watch is defined for a single object located at the
// given key, or for the whole set of objects with the given key as a prefix.
Recursive bool
// ProgressNotify determines whether storage-originated bookmark (progress notify) events should
// be delivered to the users. The option is ignored for non-watch requests.
ProgressNotify bool

View File

@@ -43,12 +43,12 @@ const (
// TODO: make this flexible for non-core resources with alternate naming rules.
maxNameLength = 63
randomLength = 5
maxGeneratedNameLength = maxNameLength - randomLength
MaxGeneratedNameLength = maxNameLength - randomLength
)
func (simpleNameGenerator) GenerateName(base string) string {
if len(base) > maxGeneratedNameLength {
base = base[:maxGeneratedNameLength]
if len(base) > MaxGeneratedNameLength {
base = base[:MaxGeneratedNameLength]
}
return fmt.Sprintf("%s%s", base, utilrand.String(randomLength))
}

View File

@@ -1,8 +1,6 @@
# See the OWNERS docs at https://go.k8s.io/owners
reviewers:
- lavalamp
- smarterclayton
- wojtek-t
- timothysc
- hongchaodeng
- lavalamp
- smarterclayton
- wojtek-t

View File

@@ -19,19 +19,25 @@ package storagebackend
import (
"time"
oteltrace "go.opentelemetry.io/otel/trace"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apiserver/pkg/server/egressselector"
"k8s.io/apiserver/pkg/storage/etcd3"
"k8s.io/apiserver/pkg/storage/value"
flowcontrolrequest "k8s.io/apiserver/pkg/util/flowcontrol/request"
)
const (
StorageTypeUnset = ""
StorageTypeETCD2 = "etcd2"
StorageTypeETCD3 = "etcd3"
DefaultCompactInterval = 5 * time.Minute
DefaultDBMetricPollInterval = 30 * time.Second
DefaultHealthcheckTimeout = 2 * time.Second
DefaultReadinessTimeout = 2 * time.Second
)
// TransportConfig holds all connection related info, i.e. equal TransportConfig means equal servers we talk to.
@@ -44,6 +50,8 @@ type TransportConfig struct {
TrustedCAFile string
// function to determine the egress dialer. (i.e. konnectivity server dialer)
EgressLookup egressselector.Lookup
// The TracerProvider can add tracing the connection
TracerProvider oteltrace.TracerProvider
}
// Config is configuration for creating a storage backend.
@@ -78,8 +86,31 @@ type Config struct {
DBMetricPollInterval time.Duration
// HealthcheckTimeout specifies the timeout used when checking health
HealthcheckTimeout time.Duration
// ReadycheckTimeout specifies the timeout used when checking readiness
ReadycheckTimeout time.Duration
LeaseManagerConfig etcd3.LeaseManagerConfig
// StorageObjectCountTracker is used to keep track of the total
// number of objects in the storage per resource.
StorageObjectCountTracker flowcontrolrequest.StorageObjectCountTracker
}
// ConfigForResource is a Config specialized to a particular `schema.GroupResource`
type ConfigForResource struct {
// Config is the resource-independent configuration
Config
// GroupResource is the relevant one
GroupResource schema.GroupResource
}
// ForResource specializes to the given resource
func (config *Config) ForResource(resource schema.GroupResource) *ConfigForResource {
return &ConfigForResource{
Config: *config,
GroupResource: resource,
}
}
func NewDefaultConfig(prefix string, codec runtime.Codec) *Config {
@@ -90,6 +121,8 @@ func NewDefaultConfig(prefix string, codec runtime.Codec) *Config {
CompactionInterval: DefaultCompactInterval,
DBMetricPollInterval: DefaultDBMetricPollInterval,
HealthcheckTimeout: DefaultHealthcheckTimeout,
ReadycheckTimeout: DefaultReadinessTimeout,
LeaseManagerConfig: etcd3.NewDefaultLeaseManagerConfig(),
Transport: TransportConfig{TracerProvider: oteltrace.NewNoopTracerProvider()},
}
}

View File

@@ -19,28 +19,38 @@ package factory
import (
"context"
"fmt"
"log"
"net"
"net/url"
"os"
"path"
"strings"
"sync"
"sync/atomic"
"time"
grpcprom "github.com/grpc-ecosystem/go-grpc-prometheus"
"go.etcd.io/etcd/clientv3"
"go.etcd.io/etcd/pkg/transport"
"go.etcd.io/etcd/client/pkg/v3/logutil"
"go.etcd.io/etcd/client/pkg/v3/transport"
clientv3 "go.etcd.io/etcd/client/v3"
"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
"google.golang.org/grpc"
"k8s.io/apimachinery/pkg/runtime"
utilnet "k8s.io/apimachinery/pkg/util/net"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/wait"
genericfeatures "k8s.io/apiserver/pkg/features"
"k8s.io/apiserver/pkg/server/egressselector"
"k8s.io/apiserver/pkg/storage"
"k8s.io/apiserver/pkg/storage/etcd3"
"k8s.io/apiserver/pkg/storage/etcd3/metrics"
"k8s.io/apiserver/pkg/storage/storagebackend"
"k8s.io/apiserver/pkg/storage/value"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/component-base/metrics/legacyregistry"
tracing "k8s.io/component-base/tracing"
"k8s.io/klog/v2"
)
@@ -58,6 +68,14 @@ const (
dbMetricsMonitorJitter = 0.5
)
// TODO(negz): Stop using a package scoped logger. At the time of writing we're
// creating an etcd client for each CRD. We need to pass each etcd client a
// logger or each client will create its own, which comes with a significant
// memory cost (around 20% of the API server's memory when hundreds of CRDs are
// present). The correct fix here is to not create a client per CRD. See
// https://github.com/kubernetes/kubernetes/issues/111476 for more.
var etcd3ClientLogger *zap.Logger
func init() {
// grpcprom auto-registers (via an init function) their client metrics, since we are opting out of
// using the global prometheus registry and using our own wrapped global registry,
@@ -65,49 +83,115 @@ func init() {
// For reference: https://github.com/kubernetes/kubernetes/pull/81387
legacyregistry.RawMustRegister(grpcprom.DefaultClientMetrics)
dbMetricsMonitors = make(map[string]struct{})
l, err := logutil.CreateDefaultZapLogger(etcdClientDebugLevel())
if err != nil {
l = zap.NewNop()
}
etcd3ClientLogger = l.Named("etcd-client")
}
func newETCD3HealthCheck(c storagebackend.Config) (func() error, error) {
// etcdClientDebugLevel translates ETCD_CLIENT_DEBUG into zap log level.
// NOTE(negz): This is a copy of a private etcd client function:
// https://github.com/etcd-io/etcd/blob/v3.5.4/client/v3/logger.go#L47
func etcdClientDebugLevel() zapcore.Level {
envLevel := os.Getenv("ETCD_CLIENT_DEBUG")
if envLevel == "" || envLevel == "true" {
return zapcore.InfoLevel
}
var l zapcore.Level
if err := l.Set(envLevel); err == nil {
log.Printf("Deprecated env ETCD_CLIENT_DEBUG value. Using default level: 'info'")
return zapcore.InfoLevel
}
return l
}
func newETCD3HealthCheck(c storagebackend.Config, stopCh <-chan struct{}) (func() error, error) {
timeout := storagebackend.DefaultHealthcheckTimeout
if c.HealthcheckTimeout != time.Duration(0) {
timeout = c.HealthcheckTimeout
}
return newETCD3Check(c, timeout, stopCh)
}
func newETCD3ReadyCheck(c storagebackend.Config, stopCh <-chan struct{}) (func() error, error) {
timeout := storagebackend.DefaultReadinessTimeout
if c.ReadycheckTimeout != time.Duration(0) {
timeout = c.ReadycheckTimeout
}
return newETCD3Check(c, timeout, stopCh)
}
func newETCD3Check(c storagebackend.Config, timeout time.Duration, stopCh <-chan struct{}) (func() error, error) {
// constructing the etcd v3 client blocks and times out if etcd is not available.
// retry in a loop in the background until we successfully create the client, storing the client or error encountered
clientValue := &atomic.Value{}
clientErrMsg := &atomic.Value{}
clientErrMsg.Store("etcd client connection not yet established")
lock := sync.Mutex{}
var client *clientv3.Client
clientErr := fmt.Errorf("etcd client connection not yet established")
go wait.PollUntil(time.Second, func() (bool, error) {
client, err := newETCD3Client(c.Transport)
newClient, err := newETCD3Client(c.Transport)
lock.Lock()
defer lock.Unlock()
// Ensure that server is already not shutting down.
select {
case <-stopCh:
if err == nil {
newClient.Close()
}
return true, nil
default:
}
if err != nil {
clientErrMsg.Store(err.Error())
clientErr = err
return false, nil
}
clientValue.Store(client)
clientErrMsg.Store("")
client = newClient
clientErr = nil
return true, nil
}, wait.NeverStop)
}, stopCh)
// Close the client on shutdown.
go func() {
defer utilruntime.HandleCrash()
<-stopCh
lock.Lock()
defer lock.Unlock()
if client != nil {
client.Close()
clientErr = fmt.Errorf("server is shutting down")
}
}()
return func() error {
if errMsg := clientErrMsg.Load().(string); len(errMsg) > 0 {
return fmt.Errorf(errMsg)
// Given that client is closed on shutdown we hold the lock for
// the entire period of healthcheck call to ensure that client will
// not be closed during healthcheck.
// Given that healthchecks has a 2s timeout, worst case of blocking
// shutdown for additional 2s seems acceptable.
lock.Lock()
defer lock.Unlock()
if clientErr != nil {
return clientErr
}
client := clientValue.Load().(*clientv3.Client)
healthcheckTimeout := storagebackend.DefaultHealthcheckTimeout
if c.HealthcheckTimeout != time.Duration(0) {
healthcheckTimeout = c.HealthcheckTimeout
}
ctx, cancel := context.WithTimeout(context.Background(), healthcheckTimeout)
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
// See https://github.com/etcd-io/etcd/blob/c57f8b3af865d1b531b979889c602ba14377420e/etcdctl/ctlv3/command/ep_command.go#L118
_, err := client.Get(ctx, path.Join("/", c.Prefix, "health"))
if err == nil {
return nil
}
return fmt.Errorf("error getting data from etcd: %v", err)
return fmt.Errorf("error getting data from etcd: %w", err)
}, nil
}
func newETCD3Client(c storagebackend.TransportConfig) (*clientv3.Client, error) {
var newETCD3Client = func(c storagebackend.TransportConfig) (*clientv3.Client, error) {
tlsInfo := transport.TLSInfo{
CertFile: c.CertFile,
KeyFile: c.KeyFile,
@@ -132,19 +216,40 @@ func newETCD3Client(c storagebackend.TransportConfig) (*clientv3.Client, error)
}
dialOptions := []grpc.DialOption{
grpc.WithBlock(), // block until the underlying connection is up
grpc.WithUnaryInterceptor(grpcprom.UnaryClientInterceptor),
grpc.WithStreamInterceptor(grpcprom.StreamClientInterceptor),
// use chained interceptors so that the default (retry and backoff) interceptors are added.
// otherwise they will be overwritten by the metric interceptor.
//
// these optional interceptors will be placed after the default ones.
// which seems to be what we want as the metrics will be collected on each attempt (retry)
grpc.WithChainUnaryInterceptor(grpcprom.UnaryClientInterceptor),
grpc.WithChainStreamInterceptor(grpcprom.StreamClientInterceptor),
}
if utilfeature.DefaultFeatureGate.Enabled(genericfeatures.APIServerTracing) {
tracingOpts := []otelgrpc.Option{
otelgrpc.WithPropagators(tracing.Propagators()),
otelgrpc.WithTracerProvider(c.TracerProvider),
}
// Even with Noop TracerProvider, the otelgrpc still handles context propagation.
// See https://github.com/open-telemetry/opentelemetry-go/tree/main/example/passthrough
dialOptions = append(dialOptions,
grpc.WithUnaryInterceptor(otelgrpc.UnaryClientInterceptor(tracingOpts...)),
grpc.WithStreamInterceptor(otelgrpc.StreamClientInterceptor(tracingOpts...)))
}
if egressDialer != nil {
dialer := func(ctx context.Context, addr string) (net.Conn, error) {
u, err := url.Parse(addr)
if err != nil {
return nil, err
if strings.Contains(addr, "//") {
// etcd client prior to 3.5 passed URLs to dialer, normalize to address
u, err := url.Parse(addr)
if err != nil {
return nil, err
}
addr = u.Host
}
return egressDialer(ctx, "tcp", u.Host)
return egressDialer(ctx, "tcp", addr)
}
dialOptions = append(dialOptions, grpc.WithContextDialer(dialer))
}
cfg := clientv3.Config{
DialTimeout: dialTimeout,
DialKeepAliveTime: keepaliveTime,
@@ -152,6 +257,7 @@ func newETCD3Client(c storagebackend.TransportConfig) (*clientv3.Client, error)
DialOptions: dialOptions,
Endpoints: c.ServerList,
TLS: tlsConfig,
Logger: etcd3ClientLogger,
}
return clientv3.New(cfg)
@@ -222,7 +328,7 @@ func startCompactorOnce(c storagebackend.TransportConfig, interval time.Duration
}, nil
}
func newETCD3Storage(c storagebackend.Config, newFunc func() runtime.Object) (storage.Interface, DestroyFunc, error) {
func newETCD3Storage(c storagebackend.ConfigForResource, newFunc func() runtime.Object) (storage.Interface, DestroyFunc, error) {
stopCompactor, err := startCompactorOnce(c.Transport, c.CompactionInterval)
if err != nil {
return nil, nil, err
@@ -234,6 +340,9 @@ func newETCD3Storage(c storagebackend.Config, newFunc func() runtime.Object) (st
return nil, nil, err
}
// decorate the KV instance so we can track etcd latency per request.
client.KV = etcd3.NewETCDLatencyTracker(client.KV)
stopDBSizeMonitor, err := startDBSizeMonitorPerEndpoint(client, c.DBMetricPollInterval)
if err != nil {
return nil, nil, err
@@ -254,7 +363,7 @@ func newETCD3Storage(c storagebackend.Config, newFunc func() runtime.Object) (st
if transformer == nil {
transformer = value.IdentityTransformer
}
return etcd3.New(client, c.Codec, newFunc, c.Prefix, transformer, c.Paging, c.LeaseManagerConfig), destroyFunc, nil
return etcd3.New(client, c.Codec, newFunc, c.Prefix, c.GroupResource, transformer, c.Paging, c.LeaseManagerConfig), destroyFunc, nil
}
// startDBSizeMonitorPerEndpoint starts a loop to monitor etcd database size and update the

View File

@@ -28,10 +28,10 @@ import (
type DestroyFunc func()
// Create creates a storage backend based on given config.
func Create(c storagebackend.Config, newFunc func() runtime.Object) (storage.Interface, DestroyFunc, error) {
func Create(c storagebackend.ConfigForResource, newFunc func() runtime.Object) (storage.Interface, DestroyFunc, error) {
switch c.Type {
case "etcd2":
return nil, nil, fmt.Errorf("%v is no longer a supported storage backend", c.Type)
case storagebackend.StorageTypeETCD2:
return nil, nil, fmt.Errorf("%s is no longer a supported storage backend", c.Type)
case storagebackend.StorageTypeUnset, storagebackend.StorageTypeETCD3:
return newETCD3Storage(c, newFunc)
default:
@@ -40,12 +40,23 @@ func Create(c storagebackend.Config, newFunc func() runtime.Object) (storage.Int
}
// CreateHealthCheck creates a healthcheck function based on given config.
func CreateHealthCheck(c storagebackend.Config) (func() error, error) {
func CreateHealthCheck(c storagebackend.Config, stopCh <-chan struct{}) (func() error, error) {
switch c.Type {
case "etcd2":
return nil, fmt.Errorf("%v is no longer a supported storage backend", c.Type)
case storagebackend.StorageTypeETCD2:
return nil, fmt.Errorf("%s is no longer a supported storage backend", c.Type)
case storagebackend.StorageTypeUnset, storagebackend.StorageTypeETCD3:
return newETCD3HealthCheck(c)
return newETCD3HealthCheck(c, stopCh)
default:
return nil, fmt.Errorf("unknown storage type: %s", c.Type)
}
}
func CreateReadyCheck(c storagebackend.Config, stopCh <-chan struct{}) (func() error, error) {
switch c.Type {
case storagebackend.StorageTypeETCD2:
return nil, fmt.Errorf("%s is no longer a supported storage backend", c.Type)
case storagebackend.StorageTypeUnset, storagebackend.StorageTypeETCD3:
return newETCD3ReadyCheck(c, stopCh)
default:
return nil, fmt.Errorf("unknown storage type: %s", c.Type)
}

View File

@@ -19,6 +19,7 @@ package aes
import (
"bytes"
"context"
"crypto/aes"
"crypto/cipher"
"crypto/rand"
@@ -52,7 +53,7 @@ func NewGCMTransformer(block cipher.Block) value.Transformer {
return &gcm{block: block}
}
func (t *gcm) TransformFromStorage(data []byte, context value.Context) ([]byte, bool, error) {
func (t *gcm) TransformFromStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, bool, error) {
aead, err := cipher.NewGCM(t.block)
if err != nil {
return nil, false, err
@@ -61,11 +62,11 @@ func (t *gcm) TransformFromStorage(data []byte, context value.Context) ([]byte,
if len(data) < nonceSize {
return nil, false, fmt.Errorf("the stored data was shorter than the required size")
}
result, err := aead.Open(nil, data[:nonceSize], data[nonceSize:], context.AuthenticatedData())
result, err := aead.Open(nil, data[:nonceSize], data[nonceSize:], dataCtx.AuthenticatedData())
return result, false, err
}
func (t *gcm) TransformToStorage(data []byte, context value.Context) ([]byte, error) {
func (t *gcm) TransformToStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, error) {
aead, err := cipher.NewGCM(t.block)
if err != nil {
return nil, err
@@ -79,7 +80,7 @@ func (t *gcm) TransformToStorage(data []byte, context value.Context) ([]byte, er
if n != nonceSize {
return nil, fmt.Errorf("unable to read sufficient random bytes")
}
cipherText := aead.Seal(result[nonceSize:nonceSize], result[:nonceSize], data, context.AuthenticatedData())
cipherText := aead.Seal(result[nonceSize:nonceSize], result[:nonceSize], data, dataCtx.AuthenticatedData())
return result[:nonceSize+len(cipherText)], nil
}
@@ -95,12 +96,12 @@ func NewCBCTransformer(block cipher.Block) value.Transformer {
}
var (
errInvalidBlockSize = fmt.Errorf("the stored data is not a multiple of the block size")
ErrInvalidBlockSize = fmt.Errorf("the stored data is not a multiple of the block size")
errInvalidPKCS7Data = errors.New("invalid PKCS7 data (empty or not padded)")
errInvalidPKCS7Padding = errors.New("invalid padding on input")
)
func (t *cbc) TransformFromStorage(data []byte, context value.Context) ([]byte, bool, error) {
func (t *cbc) TransformFromStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, bool, error) {
blockSize := aes.BlockSize
if len(data) < blockSize {
return nil, false, fmt.Errorf("the stored data was shorter than the required size")
@@ -109,7 +110,7 @@ func (t *cbc) TransformFromStorage(data []byte, context value.Context) ([]byte,
data = data[blockSize:]
if len(data)%blockSize != 0 {
return nil, false, errInvalidBlockSize
return nil, false, ErrInvalidBlockSize
}
result := make([]byte, len(data))
@@ -133,7 +134,7 @@ func (t *cbc) TransformFromStorage(data []byte, context value.Context) ([]byte,
return result[:size], false, nil
}
func (t *cbc) TransformToStorage(data []byte, context value.Context) ([]byte, error) {
func (t *cbc) TransformToStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, error) {
blockSize := aes.BlockSize
paddingSize := blockSize - (len(data) % blockSize)
result := make([]byte, blockSize+len(data)+paddingSize)

View File

@@ -18,6 +18,7 @@ limitations under the License.
package envelope
import (
"context"
"crypto/aes"
"crypto/cipher"
"crypto/rand"
@@ -26,14 +27,15 @@ import (
"time"
"k8s.io/apiserver/pkg/storage/value"
"k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics"
"k8s.io/utils/lru"
lru "github.com/hashicorp/golang-lru"
"golang.org/x/crypto/cryptobyte"
)
func init() {
value.RegisterMetrics()
registerMetrics()
metrics.RegisterMetrics()
}
// Service allows encrypting and decrypting data using an external Key Management Service.
@@ -64,14 +66,10 @@ type envelopeTransformer struct {
func NewEnvelopeTransformer(envelopeService Service, cacheSize int, baseTransformerFunc func(cipher.Block) value.Transformer) (value.Transformer, error) {
var (
cache *lru.Cache
err error
)
if cacheSize > 0 {
cache, err = lru.New(cacheSize)
if err != nil {
return nil, err
}
cache = lru.New(cacheSize)
}
return &envelopeTransformer{
envelopeService: envelopeService,
@@ -83,8 +81,8 @@ func NewEnvelopeTransformer(envelopeService Service, cacheSize int, baseTransfor
}
// TransformFromStorage decrypts data encrypted by this transformer using envelope encryption.
func (t *envelopeTransformer) TransformFromStorage(data []byte, context value.Context) ([]byte, bool, error) {
recordArrival(fromStorageLabel, time.Now())
func (t *envelopeTransformer) TransformFromStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, bool, error) {
metrics.RecordArrival(metrics.FromStorageLabel, time.Now())
// Read the 16 bit length-of-DEK encoded at the start of the encrypted DEK. 16 bits can
// represent a maximum key length of 65536 bytes. We are using a 256 bit key, whose
@@ -117,12 +115,12 @@ func (t *envelopeTransformer) TransformFromStorage(data []byte, context value.Co
}
}
return transformer.TransformFromStorage(encData, context)
return transformer.TransformFromStorage(ctx, encData, dataCtx)
}
// TransformToStorage encrypts data to be written to disk using envelope encryption.
func (t *envelopeTransformer) TransformToStorage(data []byte, context value.Context) ([]byte, error) {
recordArrival(toStorageLabel, time.Now())
func (t *envelopeTransformer) TransformToStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, error) {
metrics.RecordArrival(metrics.ToStorageLabel, time.Now())
newKey, err := generateKey(32)
if err != nil {
return nil, err
@@ -141,7 +139,7 @@ func (t *envelopeTransformer) TransformToStorage(data []byte, context value.Cont
return nil, err
}
result, err := transformer.TransformToStorage(data, context)
result, err := transformer.TransformToStorage(ctx, data, dataCtx)
if err != nil {
return nil, err
}
@@ -168,7 +166,7 @@ func (t *envelopeTransformer) addTransformer(encKey []byte, key []byte) (value.T
// cannot hash []uint8.
if t.cacheEnabled {
t.transformers.Add(base64.StdEncoding.EncodeToString(encKey), transformer)
dekCacheFillPercent.Set(float64(t.transformers.Len()) / float64(t.cacheSize))
metrics.RecordDekCacheFillPercent(float64(t.transformers.Len()) / float64(t.cacheSize))
}
return transformer, nil
}

View File

@@ -21,22 +21,21 @@ import (
"context"
"fmt"
"net"
"net/url"
"strings"
"sync"
"time"
"k8s.io/klog/v2"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
"k8s.io/apiserver/pkg/storage/value/encrypt/envelope/util"
kmsapi "k8s.io/apiserver/pkg/storage/value/encrypt/envelope/v1beta1"
)
const (
// Now only supported unix domain socket.
// unixProtocol is the only supported protocol for remote KMS provider.
unixProtocol = "unix"
// Current version for the protocol interface definition.
kmsapiVersion = "v1beta1"
@@ -56,7 +55,7 @@ type gRPCService struct {
func NewGRPCService(endpoint string, callTimeout time.Duration) (Service, error) {
klog.V(4).Infof("Configure KMS provider with endpoint: %s", endpoint)
addr, err := parseEndpoint(endpoint)
addr, err := util.ParseEndpoint(endpoint)
if err != nil {
return nil, err
}
@@ -64,7 +63,7 @@ func NewGRPCService(endpoint string, callTimeout time.Duration) (Service, error)
s := &gRPCService{callTimeout: callTimeout}
s.connection, err = grpc.Dial(
addr,
grpc.WithInsecure(),
grpc.WithTransportCredentials(insecure.NewCredentials()),
grpc.WithUnaryInterceptor(s.interceptor),
grpc.WithDefaultCallOptions(grpc.WaitForReady(true)),
grpc.WithContextDialer(
@@ -88,32 +87,6 @@ func NewGRPCService(endpoint string, callTimeout time.Duration) (Service, error)
return s, nil
}
// Parse the endpoint to extract schema, host or path.
func parseEndpoint(endpoint string) (string, error) {
if len(endpoint) == 0 {
return "", fmt.Errorf("remote KMS provider can't use empty string as endpoint")
}
u, err := url.Parse(endpoint)
if err != nil {
return "", fmt.Errorf("invalid endpoint %q for remote KMS provider, error: %v", endpoint, err)
}
if u.Scheme != unixProtocol {
return "", fmt.Errorf("unsupported scheme %q for remote KMS provider", u.Scheme)
}
// Linux abstract namespace socket - no physical file required
// Warning: Linux Abstract sockets have not concept of ACL (unlike traditional file based sockets).
// However, Linux Abstract sockets are subject to Linux networking namespace, so will only be accessible to
// containers within the same pod (unless host networking is used).
if strings.HasPrefix(u.Path, "/@") {
return strings.TrimPrefix(u.Path, "/"), nil
}
return u.Path, nil
}
func (g *gRPCService) checkAPIVersion(ctx context.Context) error {
g.mux.Lock()
defer g.mux.Unlock()

View File

@@ -0,0 +1,246 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package kmsv2 transforms values for storage at rest using a Envelope v2 provider
package kmsv2
import (
"context"
"crypto/aes"
"crypto/cipher"
"crypto/rand"
"encoding/base64"
"fmt"
"time"
"github.com/gogo/protobuf/proto"
"k8s.io/apimachinery/pkg/util/uuid"
"k8s.io/apiserver/pkg/storage/value"
kmstypes "k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/v2alpha1"
"k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics"
"k8s.io/utils/lru"
)
const (
// KMSAPIVersion is the version of the KMS API.
KMSAPIVersion = "v2alpha1"
)
// Service allows encrypting and decrypting data using an external Key Management Service.
type Service interface {
// Decrypt a given bytearray to obtain the original data as bytes.
Decrypt(ctx context.Context, uid string, req *DecryptRequest) ([]byte, error)
// Encrypt bytes to a ciphertext.
Encrypt(ctx context.Context, uid string, data []byte) (*EncryptResponse, error)
// Status returns the status of the KMS.
Status(ctx context.Context) (*StatusResponse, error)
}
type envelopeTransformer struct {
envelopeService Service
// transformers is a thread-safe LRU cache which caches decrypted DEKs indexed by their encrypted form.
transformers *lru.Cache
// baseTransformerFunc creates a new transformer for encrypting the data with the DEK.
baseTransformerFunc func(cipher.Block) value.Transformer
cacheSize int
cacheEnabled bool
pluginName string
}
// EncryptResponse is the response from the Envelope service when encrypting data.
type EncryptResponse struct {
Ciphertext []byte
KeyID string
Annotations map[string][]byte
}
// DecryptRequest is the request to the Envelope service when decrypting data.
type DecryptRequest struct {
Ciphertext []byte
KeyID string
Annotations map[string][]byte
}
// StatusResponse is the response from the Envelope service when getting the status of the service.
type StatusResponse struct {
Version string
Healthz string
KeyID string
}
// NewEnvelopeTransformer returns a transformer which implements a KEK-DEK based envelope encryption scheme.
// It uses envelopeService to encrypt and decrypt DEKs. Respective DEKs (in encrypted form) are prepended to
// the data items they encrypt. A cache (of size cacheSize) is maintained to store the most recently
// used decrypted DEKs in memory.
func NewEnvelopeTransformer(envelopeService Service, cacheSize int, baseTransformerFunc func(cipher.Block) value.Transformer) (value.Transformer, error) {
var cache *lru.Cache
if cacheSize > 0 {
// TODO(aramase): Switch to using expiring cache: kubernetes/kubernetes/staging/src/k8s.io/apimachinery/pkg/util/cache/expiring.go.
// It handles scans a lot better, doesn't have to be right sized, and don't have a global lock on reads.
cache = lru.New(cacheSize)
}
return &envelopeTransformer{
envelopeService: envelopeService,
transformers: cache,
baseTransformerFunc: baseTransformerFunc,
cacheEnabled: cacheSize > 0,
cacheSize: cacheSize,
}, nil
}
// TransformFromStorage decrypts data encrypted by this transformer using envelope encryption.
func (t *envelopeTransformer) TransformFromStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, bool, error) {
metrics.RecordArrival(metrics.FromStorageLabel, time.Now())
// Deserialize the EncryptedObject from the data.
encryptedObject, err := t.doDecode(data)
if err != nil {
return nil, false, err
}
// Look up the decrypted DEK from cache or Envelope.
transformer := t.getTransformer(encryptedObject.EncryptedDEK)
if transformer == nil {
if t.cacheEnabled {
value.RecordCacheMiss()
}
uid := string(uuid.NewUUID())
key, err := t.envelopeService.Decrypt(ctx, uid, &DecryptRequest{
Ciphertext: encryptedObject.EncryptedDEK,
KeyID: encryptedObject.KeyID,
Annotations: encryptedObject.Annotations,
})
if err != nil {
return nil, false, fmt.Errorf("failed to decrypt DEK, error: %w", err)
}
transformer, err = t.addTransformer(encryptedObject.EncryptedDEK, key)
if err != nil {
return nil, false, err
}
}
return transformer.TransformFromStorage(ctx, encryptedObject.EncryptedData, dataCtx)
}
// TransformToStorage encrypts data to be written to disk using envelope encryption.
func (t *envelopeTransformer) TransformToStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, error) {
metrics.RecordArrival(metrics.ToStorageLabel, time.Now())
newKey, err := generateKey(32)
if err != nil {
return nil, err
}
uid := string(uuid.NewUUID())
resp, err := t.envelopeService.Encrypt(ctx, uid, newKey)
if err != nil {
return nil, fmt.Errorf("failed to encrypt DEK, error: %w", err)
}
transformer, err := t.addTransformer(resp.Ciphertext, newKey)
if err != nil {
return nil, err
}
result, err := transformer.TransformToStorage(ctx, data, dataCtx)
if err != nil {
return nil, err
}
encObject := &kmstypes.EncryptedObject{
KeyID: resp.KeyID,
EncryptedDEK: resp.Ciphertext,
EncryptedData: result,
Annotations: resp.Annotations,
}
// Serialize the EncryptedObject to a byte array.
return t.doEncode(encObject)
}
// addTransformer inserts a new transformer to the Envelope cache of DEKs for future reads.
func (t *envelopeTransformer) addTransformer(encKey []byte, key []byte) (value.Transformer, error) {
block, err := aes.NewCipher(key)
if err != nil {
return nil, err
}
transformer := t.baseTransformerFunc(block)
// Use base64 of encKey as the key into the cache because hashicorp/golang-lru
// cannot hash []uint8.
if t.cacheEnabled {
t.transformers.Add(base64.StdEncoding.EncodeToString(encKey), transformer)
metrics.RecordDekCacheFillPercent(float64(t.transformers.Len()) / float64(t.cacheSize))
}
return transformer, nil
}
// getTransformer fetches the transformer corresponding to encKey from cache, if it exists.
func (t *envelopeTransformer) getTransformer(encKey []byte) value.Transformer {
if !t.cacheEnabled {
return nil
}
_transformer, found := t.transformers.Get(base64.StdEncoding.EncodeToString(encKey))
if found {
return _transformer.(value.Transformer)
}
return nil
}
// doEncode encodes the EncryptedObject to a byte array.
func (t *envelopeTransformer) doEncode(request *kmstypes.EncryptedObject) ([]byte, error) {
return proto.Marshal(request)
}
// doDecode decodes the byte array to an EncryptedObject.
func (t *envelopeTransformer) doDecode(originalData []byte) (*kmstypes.EncryptedObject, error) {
o := &kmstypes.EncryptedObject{}
if err := proto.Unmarshal(originalData, o); err != nil {
return nil, err
}
// validate the EncryptedObject
if o.EncryptedData == nil {
return nil, fmt.Errorf("encrypted data is nil after unmarshal")
}
if o.KeyID == "" {
return nil, fmt.Errorf("keyID is empty after unmarshal")
}
if o.EncryptedDEK == nil {
return nil, fmt.Errorf("encrypted dek is nil after unmarshal")
}
return o, nil
}
// generateKey generates a random key using system randomness.
func generateKey(length int) (key []byte, err error) {
defer func(start time.Time) {
value.RecordDataKeyGeneration(start, err)
}(time.Now())
key = make([]byte, length)
if _, err = rand.Read(key); err != nil {
return nil, err
}
return key, nil
}

View File

@@ -0,0 +1,130 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package kmsv2 transforms values for storage at rest using a Envelope provider
package kmsv2
import (
"context"
"fmt"
"net"
"time"
"k8s.io/klog/v2"
"google.golang.org/grpc"
"k8s.io/apiserver/pkg/storage/value/encrypt/envelope/util"
kmsapi "k8s.io/apiserver/pkg/storage/value/encrypt/envelope/v2alpha1"
)
const (
// unixProtocol is the only supported protocol for remote KMS provider.
unixProtocol = "unix"
)
// The gRPC implementation for envelope.Service.
type gRPCService struct {
kmsClient kmsapi.KeyManagementServiceClient
connection *grpc.ClientConn
callTimeout time.Duration
}
// NewGRPCService returns an envelope.Service which use gRPC to communicate the remote KMS provider.
func NewGRPCService(endpoint string, callTimeout time.Duration) (Service, error) {
klog.V(4).Infof("Configure KMS provider with endpoint: %s", endpoint)
addr, err := util.ParseEndpoint(endpoint)
if err != nil {
return nil, err
}
s := &gRPCService{callTimeout: callTimeout}
s.connection, err = grpc.Dial(
addr,
grpc.WithInsecure(),
grpc.WithDefaultCallOptions(grpc.WaitForReady(true)),
grpc.WithContextDialer(
func(context.Context, string) (net.Conn, error) {
// Ignoring addr and timeout arguments:
// addr - comes from the closure
c, err := net.DialUnix(unixProtocol, nil, &net.UnixAddr{Name: addr})
if err != nil {
klog.Errorf("failed to create connection to unix socket: %s, error: %v", addr, err)
} else {
klog.V(4).Infof("Successfully dialed Unix socket %v", addr)
}
return c, err
}))
if err != nil {
return nil, fmt.Errorf("failed to create connection to %s, error: %v", endpoint, err)
}
s.kmsClient = kmsapi.NewKeyManagementServiceClient(s.connection)
return s, nil
}
// Decrypt a given data string to obtain the original byte data.
func (g *gRPCService) Decrypt(ctx context.Context, uid string, req *DecryptRequest) ([]byte, error) {
ctx, cancel := context.WithTimeout(ctx, g.callTimeout)
defer cancel()
request := &kmsapi.DecryptRequest{
Ciphertext: req.Ciphertext,
Uid: uid,
KeyId: req.KeyID,
Annotations: req.Annotations,
}
response, err := g.kmsClient.Decrypt(ctx, request)
if err != nil {
return nil, err
}
return response.Plaintext, nil
}
// Encrypt bytes to a string ciphertext.
func (g *gRPCService) Encrypt(ctx context.Context, uid string, plaintext []byte) (*EncryptResponse, error) {
ctx, cancel := context.WithTimeout(ctx, g.callTimeout)
defer cancel()
request := &kmsapi.EncryptRequest{
Plaintext: plaintext,
Uid: uid,
}
response, err := g.kmsClient.Encrypt(ctx, request)
if err != nil {
return nil, err
}
return &EncryptResponse{
Ciphertext: response.Ciphertext,
KeyID: response.KeyId,
Annotations: response.Annotations,
}, nil
}
// Status returns the status of the KMSv2 provider.
func (g *gRPCService) Status(ctx context.Context) (*StatusResponse, error) {
ctx, cancel := context.WithTimeout(ctx, g.callTimeout)
defer cancel()
request := &kmsapi.StatusRequest{}
response, err := g.kmsClient.Status(ctx, request)
if err != nil {
return nil, err
}
return &StatusResponse{Version: response.Version, Healthz: response.Healthz, KeyID: response.KeyId}, nil
}

View File

@@ -0,0 +1,9 @@
# See the OWNERS docs at https://go.k8s.io/owners
# Disable inheritance as this is an api owners file
options:
no_parent_owners: true
approvers:
- api-approvers
reviewers:
- sig-auth-api-reviewers

View File

@@ -0,0 +1,128 @@
/*
Copyright The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Code generated by protoc-gen-gogo. DO NOT EDIT.
// source: api.proto
package v2alpha1
import (
fmt "fmt"
proto "github.com/gogo/protobuf/proto"
math "math"
)
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal
var _ = fmt.Errorf
var _ = math.Inf
// This is a compile-time assertion to ensure that this generated file
// is compatible with the proto package it is being compiled against.
// A compilation error at this line likely means your copy of the
// proto package needs to be updated.
const _ = proto.GoGoProtoPackageIsVersion3 // please upgrade the proto package
// EncryptedObject is the representation of data stored in etcd after envelope encryption.
type EncryptedObject struct {
// EncryptedData is the encrypted data.
EncryptedData []byte `protobuf:"bytes,1,opt,name=encryptedData,proto3" json:"encryptedData,omitempty"`
// KeyID is the KMS key ID used for encryption operations.
KeyID string `protobuf:"bytes,2,opt,name=keyID,proto3" json:"keyID,omitempty"`
// EncryptedDEK is the encrypted DEK.
EncryptedDEK []byte `protobuf:"bytes,3,opt,name=encryptedDEK,proto3" json:"encryptedDEK,omitempty"`
// Annotations is additional metadata that was provided by the KMS plugin.
Annotations map[string][]byte `protobuf:"bytes,4,rep,name=annotations,proto3" json:"annotations,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
}
func (m *EncryptedObject) Reset() { *m = EncryptedObject{} }
func (m *EncryptedObject) String() string { return proto.CompactTextString(m) }
func (*EncryptedObject) ProtoMessage() {}
func (*EncryptedObject) Descriptor() ([]byte, []int) {
return fileDescriptor_00212fb1f9d3bf1c, []int{0}
}
func (m *EncryptedObject) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_EncryptedObject.Unmarshal(m, b)
}
func (m *EncryptedObject) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_EncryptedObject.Marshal(b, m, deterministic)
}
func (m *EncryptedObject) XXX_Merge(src proto.Message) {
xxx_messageInfo_EncryptedObject.Merge(m, src)
}
func (m *EncryptedObject) XXX_Size() int {
return xxx_messageInfo_EncryptedObject.Size(m)
}
func (m *EncryptedObject) XXX_DiscardUnknown() {
xxx_messageInfo_EncryptedObject.DiscardUnknown(m)
}
var xxx_messageInfo_EncryptedObject proto.InternalMessageInfo
func (m *EncryptedObject) GetEncryptedData() []byte {
if m != nil {
return m.EncryptedData
}
return nil
}
func (m *EncryptedObject) GetKeyID() string {
if m != nil {
return m.KeyID
}
return ""
}
func (m *EncryptedObject) GetEncryptedDEK() []byte {
if m != nil {
return m.EncryptedDEK
}
return nil
}
func (m *EncryptedObject) GetAnnotations() map[string][]byte {
if m != nil {
return m.Annotations
}
return nil
}
func init() {
proto.RegisterType((*EncryptedObject)(nil), "v2alpha1.EncryptedObject")
proto.RegisterMapType((map[string][]byte)(nil), "v2alpha1.EncryptedObject.AnnotationsEntry")
}
func init() { proto.RegisterFile("api.proto", fileDescriptor_00212fb1f9d3bf1c) }
var fileDescriptor_00212fb1f9d3bf1c = []byte{
// 200 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xe2, 0xe2, 0x4c, 0x2c, 0xc8, 0xd4,
0x2b, 0x28, 0xca, 0x2f, 0xc9, 0x17, 0xe2, 0x28, 0x33, 0x4a, 0xcc, 0x29, 0xc8, 0x48, 0x34, 0x54,
0xfa, 0xcf, 0xc8, 0xc5, 0xef, 0x9a, 0x97, 0x5c, 0x54, 0x59, 0x50, 0x92, 0x9a, 0xe2, 0x9f, 0x94,
0x95, 0x9a, 0x5c, 0x22, 0xa4, 0xc2, 0xc5, 0x9b, 0x0a, 0x13, 0x72, 0x49, 0x2c, 0x49, 0x94, 0x60,
0x54, 0x60, 0xd4, 0xe0, 0x09, 0x42, 0x15, 0x14, 0x12, 0xe1, 0x62, 0xcd, 0x4e, 0xad, 0xf4, 0x74,
0x91, 0x60, 0x52, 0x60, 0xd4, 0xe0, 0x0c, 0x82, 0x70, 0x84, 0x94, 0xb8, 0x78, 0x10, 0xca, 0x5c,
0xbd, 0x25, 0x98, 0xc1, 0x5a, 0x51, 0xc4, 0x84, 0x7c, 0xb8, 0xb8, 0x13, 0xf3, 0xf2, 0xf2, 0x4b,
0x12, 0x4b, 0x32, 0xf3, 0xf3, 0x8a, 0x25, 0x58, 0x14, 0x98, 0x35, 0xb8, 0x8d, 0xb4, 0xf4, 0x60,
0x6e, 0xd2, 0x43, 0x73, 0x8f, 0x9e, 0x23, 0x42, 0xb1, 0x6b, 0x5e, 0x49, 0x51, 0x65, 0x10, 0xb2,
0x76, 0x29, 0x3b, 0x2e, 0x01, 0x74, 0x05, 0x42, 0x02, 0x5c, 0xcc, 0xd9, 0xa9, 0x95, 0x60, 0x77,
0x73, 0x06, 0x81, 0x98, 0x20, 0xd7, 0x96, 0x25, 0xe6, 0x94, 0xa6, 0x82, 0x5d, 0xcb, 0x13, 0x04,
0xe1, 0x58, 0x31, 0x59, 0x30, 0x26, 0xb1, 0x81, 0x83, 0xc4, 0x18, 0x10, 0x00, 0x00, 0xff, 0xff,
0x88, 0x8c, 0xbb, 0x4e, 0x1f, 0x01, 0x00, 0x00,
}

View File

@@ -0,0 +1,35 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// To regenerate api.pb.go run hack/update-generated-kms.sh
syntax = "proto3";
package v2alpha1;
// EncryptedObject is the representation of data stored in etcd after envelope encryption.
message EncryptedObject {
// EncryptedData is the encrypted data.
bytes encryptedData = 1;
// KeyID is the KMS key ID used for encryption operations.
string keyID = 2;
// EncryptedDEK is the encrypted DEK.
bytes encryptedDEK = 3;
// Annotations is additional metadata that was provided by the KMS plugin.
map<string, bytes> annotations = 4;
}

View File

@@ -0,0 +1,18 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package v2alpha1 contains definition of kms-plugin's serialized types.
package v2alpha1

View File

@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
package envelope
package metrics
import (
"sync"
@@ -27,13 +27,13 @@ import (
const (
namespace = "apiserver"
subsystem = "envelope_encryption"
fromStorageLabel = "from_storage"
toStorageLabel = "to_storage"
FromStorageLabel = "from_storage"
ToStorageLabel = "to_storage"
)
/*
* By default, all the following metrics are defined as falling under
* ALPHA stability level https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/1209-metrics-stability/20190404-kubernetes-control-plane-metrics-stability.md#stability-classes)
* ALPHA stability level https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/1209-metrics-stability/kubernetes-control-plane-metrics-stability.md#stability-classes)
*
* Promoting the stability level of the metric is a responsibility of the component owner, since it
* involves explicitly acknowledging support for the metric across multiple releases, in accordance with
@@ -71,16 +71,16 @@ var (
var registerMetricsFunc sync.Once
func registerMetrics() {
func RegisterMetrics() {
registerMetricsFunc.Do(func() {
legacyregistry.MustRegister(dekCacheFillPercent)
legacyregistry.MustRegister(dekCacheInterArrivals)
})
}
func recordArrival(transformationType string, start time.Time) {
func RecordArrival(transformationType string, start time.Time) {
switch transformationType {
case fromStorageLabel:
case FromStorageLabel:
lockLastFromStorage.Lock()
defer lockLastFromStorage.Unlock()
@@ -89,7 +89,7 @@ func recordArrival(transformationType string, start time.Time) {
}
dekCacheInterArrivals.WithLabelValues(transformationType).Observe(start.Sub(lastFromStorage).Seconds())
lastFromStorage = start
case toStorageLabel:
case ToStorageLabel:
lockLastToStorage.Lock()
defer lockLastToStorage.Unlock()
@@ -100,3 +100,7 @@ func recordArrival(transformationType string, start time.Time) {
lastToStorage = start
}
}
func RecordDekCacheFillPercent(percent float64) {
dekCacheFillPercent.Set(percent)
}

View File

@@ -0,0 +1,54 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"fmt"
"net/url"
"strings"
)
const (
// unixProtocol is the only supported protocol for remote KMS provider.
unixProtocol = "unix"
)
// Parse the endpoint to extract schema, host or path.
func ParseEndpoint(endpoint string) (string, error) {
if len(endpoint) == 0 {
return "", fmt.Errorf("remote KMS provider can't use empty string as endpoint")
}
u, err := url.Parse(endpoint)
if err != nil {
return "", fmt.Errorf("invalid endpoint %q for remote KMS provider, error: %v", endpoint, err)
}
if u.Scheme != unixProtocol {
return "", fmt.Errorf("unsupported scheme %q for remote KMS provider", u.Scheme)
}
// Linux abstract namespace socket - no physical file required
// Warning: Linux Abstract sockets have not concept of ACL (unlike traditional file based sockets).
// However, Linux Abstract sockets are subject to Linux networking namespace, so will only be accessible to
// containers within the same pod (unless host networking is used).
if strings.HasPrefix(u.Path, "/@") {
return strings.TrimPrefix(u.Path, "/"), nil
}
return u.Path, nil
}

View File

@@ -15,7 +15,7 @@ limitations under the License.
*/
// Code generated by protoc-gen-gogo. DO NOT EDIT.
// source: service.proto
// source: api.proto
package v1beta1
@@ -52,7 +52,7 @@ func (m *VersionRequest) Reset() { *m = VersionRequest{} }
func (m *VersionRequest) String() string { return proto.CompactTextString(m) }
func (*VersionRequest) ProtoMessage() {}
func (*VersionRequest) Descriptor() ([]byte, []int) {
return fileDescriptor_a0b84a42fa06f626, []int{0}
return fileDescriptor_00212fb1f9d3bf1c, []int{0}
}
func (m *VersionRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_VersionRequest.Unmarshal(m, b)
@@ -95,7 +95,7 @@ func (m *VersionResponse) Reset() { *m = VersionResponse{} }
func (m *VersionResponse) String() string { return proto.CompactTextString(m) }
func (*VersionResponse) ProtoMessage() {}
func (*VersionResponse) Descriptor() ([]byte, []int) {
return fileDescriptor_a0b84a42fa06f626, []int{1}
return fileDescriptor_00212fb1f9d3bf1c, []int{1}
}
func (m *VersionResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_VersionResponse.Unmarshal(m, b)
@@ -150,7 +150,7 @@ func (m *DecryptRequest) Reset() { *m = DecryptRequest{} }
func (m *DecryptRequest) String() string { return proto.CompactTextString(m) }
func (*DecryptRequest) ProtoMessage() {}
func (*DecryptRequest) Descriptor() ([]byte, []int) {
return fileDescriptor_a0b84a42fa06f626, []int{2}
return fileDescriptor_00212fb1f9d3bf1c, []int{2}
}
func (m *DecryptRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_DecryptRequest.Unmarshal(m, b)
@@ -196,7 +196,7 @@ func (m *DecryptResponse) Reset() { *m = DecryptResponse{} }
func (m *DecryptResponse) String() string { return proto.CompactTextString(m) }
func (*DecryptResponse) ProtoMessage() {}
func (*DecryptResponse) Descriptor() ([]byte, []int) {
return fileDescriptor_a0b84a42fa06f626, []int{3}
return fileDescriptor_00212fb1f9d3bf1c, []int{3}
}
func (m *DecryptResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_DecryptResponse.Unmarshal(m, b)
@@ -237,7 +237,7 @@ func (m *EncryptRequest) Reset() { *m = EncryptRequest{} }
func (m *EncryptRequest) String() string { return proto.CompactTextString(m) }
func (*EncryptRequest) ProtoMessage() {}
func (*EncryptRequest) Descriptor() ([]byte, []int) {
return fileDescriptor_a0b84a42fa06f626, []int{4}
return fileDescriptor_00212fb1f9d3bf1c, []int{4}
}
func (m *EncryptRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_EncryptRequest.Unmarshal(m, b)
@@ -283,7 +283,7 @@ func (m *EncryptResponse) Reset() { *m = EncryptResponse{} }
func (m *EncryptResponse) String() string { return proto.CompactTextString(m) }
func (*EncryptResponse) ProtoMessage() {}
func (*EncryptResponse) Descriptor() ([]byte, []int) {
return fileDescriptor_a0b84a42fa06f626, []int{5}
return fileDescriptor_00212fb1f9d3bf1c, []int{5}
}
func (m *EncryptResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_EncryptResponse.Unmarshal(m, b)
@@ -319,28 +319,28 @@ func init() {
proto.RegisterType((*EncryptResponse)(nil), "v1beta1.EncryptResponse")
}
func init() { proto.RegisterFile("service.proto", fileDescriptor_a0b84a42fa06f626) }
func init() { proto.RegisterFile("api.proto", fileDescriptor_00212fb1f9d3bf1c) }
var fileDescriptor_a0b84a42fa06f626 = []byte{
// 287 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x52, 0xcd, 0x4a, 0xc4, 0x30,
0x10, 0xde, 0xae, 0xb8, 0xc5, 0xb1, 0xb6, 0x10, 0x16, 0x2d, 0x9e, 0x34, 0x97, 0x55, 0x0f, 0x85,
0xd5, 0xbb, 0x88, 0xe8, 0x49, 0xf4, 0x50, 0xc1, 0xab, 0x64, 0xcb, 0xa0, 0x05, 0x9b, 0xc6, 0x24,
0x5b, 0xd9, 0x17, 0xf5, 0x79, 0xc4, 0x66, 0x5a, 0xd3, 0x15, 0x71, 0x8f, 0x33, 0x99, 0xef, 0x6f,
0x26, 0xb0, 0x67, 0x50, 0x37, 0x65, 0x81, 0x99, 0xd2, 0xb5, 0xad, 0x59, 0xd8, 0xcc, 0x17, 0x68,
0xc5, 0x9c, 0x9f, 0x41, 0xfc, 0x84, 0xda, 0x94, 0xb5, 0xcc, 0xf1, 0x7d, 0x89, 0xc6, 0xb2, 0x14,
0xc2, 0xc6, 0x75, 0xd2, 0xe0, 0x28, 0x38, 0xd9, 0xc9, 0xbb, 0x92, 0x7f, 0x40, 0xd2, 0xcf, 0x1a,
0x55, 0x4b, 0x83, 0x7f, 0x0f, 0xb3, 0x63, 0x88, 0xf4, 0x52, 0xda, 0xb2, 0xc2, 0x67, 0x29, 0x2a,
0x4c, 0xc7, 0xed, 0xf3, 0x2e, 0xf5, 0x1e, 0x44, 0x85, 0x6c, 0x06, 0x49, 0x37, 0xd2, 0x91, 0x6c,
0xb5, 0x53, 0x31, 0xb5, 0x49, 0x8d, 0x5f, 0x43, 0x7c, 0x83, 0x85, 0x5e, 0x29, 0xfb, 0xaf, 0x49,
0xb6, 0x0f, 0x93, 0xa2, 0x54, 0xaf, 0xa8, 0x5b, 0xc5, 0x28, 0xa7, 0x8a, 0xcf, 0x20, 0xe9, 0x39,
0xc8, 0xfc, 0x14, 0xb6, 0xd5, 0x9b, 0x28, 0x1d, 0x45, 0x94, 0xbb, 0x82, 0x5f, 0x41, 0x7c, 0x2b,
0x37, 0x14, 0xeb, 0x19, 0xc6, 0x3e, 0xc3, 0x29, 0x24, 0x3d, 0x03, 0x49, 0xfd, 0xb8, 0x0a, 0x7c,
0x57, 0xe7, 0x9f, 0x01, 0x4c, 0xef, 0x70, 0x75, 0x2f, 0xa4, 0x78, 0xc1, 0x0a, 0xa5, 0x7d, 0x74,
0x67, 0x62, 0x97, 0x10, 0x52, 0x7a, 0x76, 0x90, 0xd1, 0xb1, 0xb2, 0xe1, 0xa5, 0x0e, 0xd3, 0xdf,
0x0f, 0x4e, 0x8e, 0x8f, 0xbe, 0xf1, 0x14, 0xd7, 0xc3, 0x0f, 0x97, 0xe8, 0xe1, 0xd7, 0x36, 0xe3,
0xf0, 0x94, 0xc1, 0xc3, 0x0f, 0xf7, 0xe2, 0xe1, 0xd7, 0xe2, 0xf2, 0xd1, 0x62, 0xd2, 0xfe, 0xb3,
0x8b, 0xaf, 0x00, 0x00, 0x00, 0xff, 0xff, 0x33, 0x8d, 0x09, 0xe1, 0x78, 0x02, 0x00, 0x00,
var fileDescriptor_00212fb1f9d3bf1c = []byte{
// 286 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x52, 0x4f, 0x4b, 0xc3, 0x30,
0x14, 0x5f, 0x27, 0xae, 0xec, 0x59, 0x5a, 0x08, 0x43, 0x8b, 0x27, 0xcd, 0x65, 0xea, 0xa1, 0x30,
0xbd, 0x8b, 0x88, 0x9e, 0x44, 0x0f, 0x15, 0xbc, 0x4a, 0x56, 0x1e, 0x1a, 0xb0, 0x69, 0x4c, 0xb3,
0xca, 0xbe, 0xa8, 0x9f, 0x47, 0x6c, 0x5e, 0x6b, 0x3a, 0x11, 0x77, 0x7c, 0x2f, 0xef, 0xf7, 0xef,
0xbd, 0xc0, 0x54, 0x68, 0x99, 0x69, 0x53, 0xd9, 0x8a, 0x85, 0xcd, 0x62, 0x89, 0x56, 0x2c, 0xf8,
0x19, 0xc4, 0x4f, 0x68, 0x6a, 0x59, 0xa9, 0x1c, 0xdf, 0x57, 0x58, 0x5b, 0x96, 0x42, 0xd8, 0xb8,
0x4e, 0x1a, 0x1c, 0x05, 0x27, 0xd3, 0xbc, 0x2b, 0xf9, 0x07, 0x24, 0xfd, 0x6c, 0xad, 0x2b, 0x55,
0xe3, 0xdf, 0xc3, 0xec, 0x18, 0x22, 0xb3, 0x52, 0x56, 0x96, 0xf8, 0xac, 0x44, 0x89, 0xe9, 0xb8,
0x7d, 0xde, 0xa3, 0xde, 0x83, 0x28, 0x91, 0xcd, 0x21, 0xe9, 0x46, 0x3a, 0x92, 0x9d, 0x76, 0x2a,
0xa6, 0x36, 0xa9, 0xf1, 0x6b, 0x88, 0x6f, 0xb0, 0x30, 0x6b, 0x6d, 0xff, 0x35, 0xc9, 0xf6, 0x61,
0x52, 0x48, 0xfd, 0x8a, 0xa6, 0x55, 0x8c, 0x72, 0xaa, 0xf8, 0x1c, 0x92, 0x9e, 0x83, 0xcc, 0xcf,
0x60, 0x57, 0xbf, 0x09, 0xe9, 0x28, 0xa2, 0xdc, 0x15, 0xfc, 0x0a, 0xe2, 0x5b, 0xb5, 0xa5, 0x58,
0xcf, 0x30, 0xf6, 0x19, 0x4e, 0x21, 0xe9, 0x19, 0x48, 0xea, 0xc7, 0x55, 0xe0, 0xbb, 0x3a, 0xff,
0x0c, 0x60, 0x76, 0x87, 0xeb, 0x7b, 0xa1, 0xc4, 0x0b, 0x96, 0xa8, 0xec, 0x23, 0x9a, 0x46, 0x16,
0xc8, 0x2e, 0x21, 0xa4, 0xf4, 0xec, 0x20, 0xa3, 0x63, 0x65, 0xc3, 0x4b, 0x1d, 0xa6, 0xbf, 0x1f,
0x9c, 0x1c, 0x1f, 0x7d, 0xe3, 0x29, 0xae, 0x87, 0x1f, 0x2e, 0xd1, 0xc3, 0x6f, 0x6c, 0xc6, 0xe1,
0x29, 0x83, 0x87, 0x1f, 0xee, 0xc5, 0xc3, 0x6f, 0xc4, 0xe5, 0xa3, 0xe5, 0xa4, 0xfd, 0x67, 0x17,
0x5f, 0x01, 0x00, 0x00, 0xff, 0xff, 0x57, 0xc8, 0x65, 0x5a, 0x74, 0x02, 0x00, 0x00,
}
// Reference imports to suppress errors if they are not otherwise used.
@@ -498,5 +498,5 @@ var _KeyManagementService_serviceDesc = grpc.ServiceDesc{
},
},
Streams: []grpc.StreamDesc{},
Metadata: "service.proto",
Metadata: "api.proto",
}

View File

@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
// To regenerate service.pb.go run hack/update-generated-kms.sh
// To regenerate api.pb.go run hack/update-generated-kms.sh
syntax = "proto3";
package v1beta1;

View File

@@ -0,0 +1,542 @@
/*
Copyright The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Code generated by protoc-gen-gogo. DO NOT EDIT.
// source: api.proto
package v2alpha1
import (
context "context"
fmt "fmt"
proto "github.com/gogo/protobuf/proto"
grpc "google.golang.org/grpc"
codes "google.golang.org/grpc/codes"
status "google.golang.org/grpc/status"
math "math"
)
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal
var _ = fmt.Errorf
var _ = math.Inf
// This is a compile-time assertion to ensure that this generated file
// is compatible with the proto package it is being compiled against.
// A compilation error at this line likely means your copy of the
// proto package needs to be updated.
const _ = proto.GoGoProtoPackageIsVersion3 // please upgrade the proto package
type StatusRequest struct {
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
}
func (m *StatusRequest) Reset() { *m = StatusRequest{} }
func (m *StatusRequest) String() string { return proto.CompactTextString(m) }
func (*StatusRequest) ProtoMessage() {}
func (*StatusRequest) Descriptor() ([]byte, []int) {
return fileDescriptor_00212fb1f9d3bf1c, []int{0}
}
func (m *StatusRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StatusRequest.Unmarshal(m, b)
}
func (m *StatusRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_StatusRequest.Marshal(b, m, deterministic)
}
func (m *StatusRequest) XXX_Merge(src proto.Message) {
xxx_messageInfo_StatusRequest.Merge(m, src)
}
func (m *StatusRequest) XXX_Size() int {
return xxx_messageInfo_StatusRequest.Size(m)
}
func (m *StatusRequest) XXX_DiscardUnknown() {
xxx_messageInfo_StatusRequest.DiscardUnknown(m)
}
var xxx_messageInfo_StatusRequest proto.InternalMessageInfo
type StatusResponse struct {
// Version of the KMS plugin API. Must match the configured .resources[].providers[].kms.apiVersion
Version string `protobuf:"bytes,1,opt,name=version,proto3" json:"version,omitempty"`
// Any value other than "ok" is failing healthz. On failure, the associated API server healthz endpoint will contain this value as part of the error message.
Healthz string `protobuf:"bytes,2,opt,name=healthz,proto3" json:"healthz,omitempty"`
// the current write key, used to determine staleness of data updated via value.Transformer.TransformFromStorage.
KeyId string `protobuf:"bytes,3,opt,name=key_id,json=keyId,proto3" json:"key_id,omitempty"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
}
func (m *StatusResponse) Reset() { *m = StatusResponse{} }
func (m *StatusResponse) String() string { return proto.CompactTextString(m) }
func (*StatusResponse) ProtoMessage() {}
func (*StatusResponse) Descriptor() ([]byte, []int) {
return fileDescriptor_00212fb1f9d3bf1c, []int{1}
}
func (m *StatusResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StatusResponse.Unmarshal(m, b)
}
func (m *StatusResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_StatusResponse.Marshal(b, m, deterministic)
}
func (m *StatusResponse) XXX_Merge(src proto.Message) {
xxx_messageInfo_StatusResponse.Merge(m, src)
}
func (m *StatusResponse) XXX_Size() int {
return xxx_messageInfo_StatusResponse.Size(m)
}
func (m *StatusResponse) XXX_DiscardUnknown() {
xxx_messageInfo_StatusResponse.DiscardUnknown(m)
}
var xxx_messageInfo_StatusResponse proto.InternalMessageInfo
func (m *StatusResponse) GetVersion() string {
if m != nil {
return m.Version
}
return ""
}
func (m *StatusResponse) GetHealthz() string {
if m != nil {
return m.Healthz
}
return ""
}
func (m *StatusResponse) GetKeyId() string {
if m != nil {
return m.KeyId
}
return ""
}
type DecryptRequest struct {
// The data to be decrypted.
Ciphertext []byte `protobuf:"bytes,1,opt,name=ciphertext,proto3" json:"ciphertext,omitempty"`
// UID is a unique identifier for the request.
Uid string `protobuf:"bytes,2,opt,name=uid,proto3" json:"uid,omitempty"`
// The keyID that was provided to the apiserver during encryption.
// This represents the KMS KEK that was used to encrypt the data.
KeyId string `protobuf:"bytes,3,opt,name=key_id,json=keyId,proto3" json:"key_id,omitempty"`
// Additional metadata that was sent by the KMS plugin during encryption.
Annotations map[string][]byte `protobuf:"bytes,4,rep,name=annotations,proto3" json:"annotations,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
}
func (m *DecryptRequest) Reset() { *m = DecryptRequest{} }
func (m *DecryptRequest) String() string { return proto.CompactTextString(m) }
func (*DecryptRequest) ProtoMessage() {}
func (*DecryptRequest) Descriptor() ([]byte, []int) {
return fileDescriptor_00212fb1f9d3bf1c, []int{2}
}
func (m *DecryptRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_DecryptRequest.Unmarshal(m, b)
}
func (m *DecryptRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_DecryptRequest.Marshal(b, m, deterministic)
}
func (m *DecryptRequest) XXX_Merge(src proto.Message) {
xxx_messageInfo_DecryptRequest.Merge(m, src)
}
func (m *DecryptRequest) XXX_Size() int {
return xxx_messageInfo_DecryptRequest.Size(m)
}
func (m *DecryptRequest) XXX_DiscardUnknown() {
xxx_messageInfo_DecryptRequest.DiscardUnknown(m)
}
var xxx_messageInfo_DecryptRequest proto.InternalMessageInfo
func (m *DecryptRequest) GetCiphertext() []byte {
if m != nil {
return m.Ciphertext
}
return nil
}
func (m *DecryptRequest) GetUid() string {
if m != nil {
return m.Uid
}
return ""
}
func (m *DecryptRequest) GetKeyId() string {
if m != nil {
return m.KeyId
}
return ""
}
func (m *DecryptRequest) GetAnnotations() map[string][]byte {
if m != nil {
return m.Annotations
}
return nil
}
type DecryptResponse struct {
// The decrypted data.
Plaintext []byte `protobuf:"bytes,1,opt,name=plaintext,proto3" json:"plaintext,omitempty"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
}
func (m *DecryptResponse) Reset() { *m = DecryptResponse{} }
func (m *DecryptResponse) String() string { return proto.CompactTextString(m) }
func (*DecryptResponse) ProtoMessage() {}
func (*DecryptResponse) Descriptor() ([]byte, []int) {
return fileDescriptor_00212fb1f9d3bf1c, []int{3}
}
func (m *DecryptResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_DecryptResponse.Unmarshal(m, b)
}
func (m *DecryptResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_DecryptResponse.Marshal(b, m, deterministic)
}
func (m *DecryptResponse) XXX_Merge(src proto.Message) {
xxx_messageInfo_DecryptResponse.Merge(m, src)
}
func (m *DecryptResponse) XXX_Size() int {
return xxx_messageInfo_DecryptResponse.Size(m)
}
func (m *DecryptResponse) XXX_DiscardUnknown() {
xxx_messageInfo_DecryptResponse.DiscardUnknown(m)
}
var xxx_messageInfo_DecryptResponse proto.InternalMessageInfo
func (m *DecryptResponse) GetPlaintext() []byte {
if m != nil {
return m.Plaintext
}
return nil
}
type EncryptRequest struct {
// The data to be encrypted.
Plaintext []byte `protobuf:"bytes,1,opt,name=plaintext,proto3" json:"plaintext,omitempty"`
// UID is a unique identifier for the request.
Uid string `protobuf:"bytes,2,opt,name=uid,proto3" json:"uid,omitempty"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
}
func (m *EncryptRequest) Reset() { *m = EncryptRequest{} }
func (m *EncryptRequest) String() string { return proto.CompactTextString(m) }
func (*EncryptRequest) ProtoMessage() {}
func (*EncryptRequest) Descriptor() ([]byte, []int) {
return fileDescriptor_00212fb1f9d3bf1c, []int{4}
}
func (m *EncryptRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_EncryptRequest.Unmarshal(m, b)
}
func (m *EncryptRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_EncryptRequest.Marshal(b, m, deterministic)
}
func (m *EncryptRequest) XXX_Merge(src proto.Message) {
xxx_messageInfo_EncryptRequest.Merge(m, src)
}
func (m *EncryptRequest) XXX_Size() int {
return xxx_messageInfo_EncryptRequest.Size(m)
}
func (m *EncryptRequest) XXX_DiscardUnknown() {
xxx_messageInfo_EncryptRequest.DiscardUnknown(m)
}
var xxx_messageInfo_EncryptRequest proto.InternalMessageInfo
func (m *EncryptRequest) GetPlaintext() []byte {
if m != nil {
return m.Plaintext
}
return nil
}
func (m *EncryptRequest) GetUid() string {
if m != nil {
return m.Uid
}
return ""
}
type EncryptResponse struct {
// The encrypted data.
Ciphertext []byte `protobuf:"bytes,1,opt,name=ciphertext,proto3" json:"ciphertext,omitempty"`
// The KMS key ID used to encrypt the data. This must always refer to the KMS KEK and not any local KEKs that may be in use.
// This can be used to inform staleness of data updated via value.Transformer.TransformFromStorage.
KeyId string `protobuf:"bytes,2,opt,name=key_id,json=keyId,proto3" json:"key_id,omitempty"`
// Additional metadata to be stored with the encrypted data.
// This metadata can contain the encrypted local KEK that was used to encrypt the DEK.
// This data is stored in plaintext in etcd. KMS plugin implementations are responsible for pre-encrypting any sensitive data.
Annotations map[string][]byte `protobuf:"bytes,3,rep,name=annotations,proto3" json:"annotations,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
}
func (m *EncryptResponse) Reset() { *m = EncryptResponse{} }
func (m *EncryptResponse) String() string { return proto.CompactTextString(m) }
func (*EncryptResponse) ProtoMessage() {}
func (*EncryptResponse) Descriptor() ([]byte, []int) {
return fileDescriptor_00212fb1f9d3bf1c, []int{5}
}
func (m *EncryptResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_EncryptResponse.Unmarshal(m, b)
}
func (m *EncryptResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_EncryptResponse.Marshal(b, m, deterministic)
}
func (m *EncryptResponse) XXX_Merge(src proto.Message) {
xxx_messageInfo_EncryptResponse.Merge(m, src)
}
func (m *EncryptResponse) XXX_Size() int {
return xxx_messageInfo_EncryptResponse.Size(m)
}
func (m *EncryptResponse) XXX_DiscardUnknown() {
xxx_messageInfo_EncryptResponse.DiscardUnknown(m)
}
var xxx_messageInfo_EncryptResponse proto.InternalMessageInfo
func (m *EncryptResponse) GetCiphertext() []byte {
if m != nil {
return m.Ciphertext
}
return nil
}
func (m *EncryptResponse) GetKeyId() string {
if m != nil {
return m.KeyId
}
return ""
}
func (m *EncryptResponse) GetAnnotations() map[string][]byte {
if m != nil {
return m.Annotations
}
return nil
}
func init() {
proto.RegisterType((*StatusRequest)(nil), "v2alpha1.StatusRequest")
proto.RegisterType((*StatusResponse)(nil), "v2alpha1.StatusResponse")
proto.RegisterType((*DecryptRequest)(nil), "v2alpha1.DecryptRequest")
proto.RegisterMapType((map[string][]byte)(nil), "v2alpha1.DecryptRequest.AnnotationsEntry")
proto.RegisterType((*DecryptResponse)(nil), "v2alpha1.DecryptResponse")
proto.RegisterType((*EncryptRequest)(nil), "v2alpha1.EncryptRequest")
proto.RegisterType((*EncryptResponse)(nil), "v2alpha1.EncryptResponse")
proto.RegisterMapType((map[string][]byte)(nil), "v2alpha1.EncryptResponse.AnnotationsEntry")
}
func init() { proto.RegisterFile("api.proto", fileDescriptor_00212fb1f9d3bf1c) }
var fileDescriptor_00212fb1f9d3bf1c = []byte{
// 391 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xb4, 0x93, 0xcf, 0x4f, 0xe2, 0x40,
0x14, 0xc7, 0x29, 0x5d, 0x60, 0x79, 0xb0, 0x40, 0x26, 0x6c, 0xb6, 0x4b, 0x36, 0x1b, 0x32, 0x27,
0x76, 0x0f, 0xdd, 0x2c, 0x5e, 0x8c, 0x89, 0x06, 0x13, 0x39, 0x18, 0xf4, 0x52, 0x8e, 0x1e, 0xcc,
0x08, 0x2f, 0x76, 0x42, 0x9d, 0xd6, 0x76, 0xda, 0x58, 0xff, 0x50, 0x13, 0xff, 0x01, 0xff, 0x0e,
0xd3, 0x76, 0xa0, 0x2d, 0x88, 0x9e, 0xbc, 0xcd, 0xfb, 0xd1, 0xef, 0xf7, 0xcd, 0x67, 0x5e, 0xa1,
0xc9, 0x3c, 0x6e, 0x7a, 0xbe, 0x2b, 0x5d, 0xf2, 0x35, 0x1a, 0x33, 0xc7, 0xb3, 0xd9, 0x7f, 0xda,
0x85, 0x6f, 0x73, 0xc9, 0x64, 0x18, 0x58, 0x78, 0x1f, 0x62, 0x20, 0xe9, 0x15, 0x74, 0xd6, 0x89,
0xc0, 0x73, 0x45, 0x80, 0xc4, 0x80, 0x46, 0x84, 0x7e, 0xc0, 0x5d, 0x61, 0x68, 0x43, 0x6d, 0xd4,
0xb4, 0xd6, 0x61, 0x52, 0xb1, 0x91, 0x39, 0xd2, 0x7e, 0x34, 0xaa, 0x59, 0x45, 0x85, 0xe4, 0x3b,
0xd4, 0x57, 0x18, 0x5f, 0xf3, 0xa5, 0xa1, 0xa7, 0x85, 0xda, 0x0a, 0xe3, 0xf3, 0x25, 0x7d, 0xd1,
0xa0, 0x73, 0x86, 0x0b, 0x3f, 0xf6, 0xa4, 0xf2, 0x23, 0xbf, 0x01, 0x16, 0xdc, 0xb3, 0xd1, 0x97,
0xf8, 0x20, 0x53, 0x83, 0xb6, 0x55, 0xc8, 0x90, 0x1e, 0xe8, 0x21, 0x5f, 0x2a, 0xfd, 0xe4, 0xb8,
0x47, 0x9b, 0xcc, 0xa0, 0xc5, 0x84, 0x70, 0x25, 0x93, 0xdc, 0x15, 0x81, 0xf1, 0x65, 0xa8, 0x8f,
0x5a, 0xe3, 0x3f, 0xe6, 0xfa, 0xa6, 0x66, 0xd9, 0xd7, 0x3c, 0xcd, 0x7b, 0xa7, 0x42, 0xfa, 0xb1,
0x55, 0xfc, 0x7a, 0x70, 0x02, 0xbd, 0xed, 0x86, 0x64, 0x92, 0x15, 0xc6, 0x8a, 0x41, 0x72, 0x24,
0x7d, 0xa8, 0x45, 0xcc, 0x09, 0x31, 0x9d, 0xae, 0x6d, 0x65, 0xc1, 0x51, 0xf5, 0x50, 0xa3, 0xff,
0xa0, 0xbb, 0xf1, 0x53, 0x18, 0x7f, 0x41, 0xd3, 0x73, 0x18, 0x17, 0x85, 0x7b, 0xe6, 0x09, 0x3a,
0x81, 0xce, 0x54, 0x94, 0xc0, 0xbc, 0xdb, 0xbf, 0x8b, 0x85, 0x3e, 0x69, 0xd0, 0xdd, 0x48, 0x28,
0xcf, 0x8f, 0xe0, 0xe6, 0x28, 0xab, 0x45, 0x94, 0x17, 0x65, 0x94, 0x7a, 0x8a, 0xf2, 0x6f, 0x8e,
0x72, 0xcb, 0xe6, 0x73, 0x59, 0x8e, 0x9f, 0x35, 0xe8, 0xcf, 0x30, 0xbe, 0x64, 0x82, 0xdd, 0xe2,
0x1d, 0x0a, 0x39, 0x47, 0x3f, 0xe2, 0x0b, 0x24, 0xc7, 0x50, 0xcf, 0x56, 0x95, 0xfc, 0xc8, 0x67,
0x2b, 0x6d, 0xf3, 0xc0, 0xd8, 0x2d, 0x64, 0x33, 0xd3, 0x0a, 0x99, 0x40, 0x43, 0xbd, 0x11, 0x31,
0xf6, 0xad, 0xc9, 0xe0, 0xe7, 0x1b, 0x95, 0xa2, 0x82, 0x42, 0x51, 0x54, 0x28, 0xbf, 0x63, 0x51,
0x61, 0x8b, 0x1b, 0xad, 0xdc, 0xd4, 0xd3, 0xff, 0xf1, 0xe0, 0x35, 0x00, 0x00, 0xff, 0xff, 0xa7,
0xdd, 0xa1, 0x79, 0x9c, 0x03, 0x00, 0x00,
}
// Reference imports to suppress errors if they are not otherwise used.
var _ context.Context
var _ grpc.ClientConn
// This is a compile-time assertion to ensure that this generated file
// is compatible with the grpc package it is being compiled against.
const _ = grpc.SupportPackageIsVersion4
// KeyManagementServiceClient is the client API for KeyManagementService service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://godoc.org/google.golang.org/grpc#ClientConn.NewStream.
type KeyManagementServiceClient interface {
// this API is meant to be polled
Status(ctx context.Context, in *StatusRequest, opts ...grpc.CallOption) (*StatusResponse, error)
// Execute decryption operation in KMS provider.
Decrypt(ctx context.Context, in *DecryptRequest, opts ...grpc.CallOption) (*DecryptResponse, error)
// Execute encryption operation in KMS provider.
Encrypt(ctx context.Context, in *EncryptRequest, opts ...grpc.CallOption) (*EncryptResponse, error)
}
type keyManagementServiceClient struct {
cc *grpc.ClientConn
}
func NewKeyManagementServiceClient(cc *grpc.ClientConn) KeyManagementServiceClient {
return &keyManagementServiceClient{cc}
}
func (c *keyManagementServiceClient) Status(ctx context.Context, in *StatusRequest, opts ...grpc.CallOption) (*StatusResponse, error) {
out := new(StatusResponse)
err := c.cc.Invoke(ctx, "/v2alpha1.KeyManagementService/Status", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *keyManagementServiceClient) Decrypt(ctx context.Context, in *DecryptRequest, opts ...grpc.CallOption) (*DecryptResponse, error) {
out := new(DecryptResponse)
err := c.cc.Invoke(ctx, "/v2alpha1.KeyManagementService/Decrypt", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *keyManagementServiceClient) Encrypt(ctx context.Context, in *EncryptRequest, opts ...grpc.CallOption) (*EncryptResponse, error) {
out := new(EncryptResponse)
err := c.cc.Invoke(ctx, "/v2alpha1.KeyManagementService/Encrypt", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
// KeyManagementServiceServer is the server API for KeyManagementService service.
type KeyManagementServiceServer interface {
// this API is meant to be polled
Status(context.Context, *StatusRequest) (*StatusResponse, error)
// Execute decryption operation in KMS provider.
Decrypt(context.Context, *DecryptRequest) (*DecryptResponse, error)
// Execute encryption operation in KMS provider.
Encrypt(context.Context, *EncryptRequest) (*EncryptResponse, error)
}
// UnimplementedKeyManagementServiceServer can be embedded to have forward compatible implementations.
type UnimplementedKeyManagementServiceServer struct {
}
func (*UnimplementedKeyManagementServiceServer) Status(ctx context.Context, req *StatusRequest) (*StatusResponse, error) {
return nil, status.Errorf(codes.Unimplemented, "method Status not implemented")
}
func (*UnimplementedKeyManagementServiceServer) Decrypt(ctx context.Context, req *DecryptRequest) (*DecryptResponse, error) {
return nil, status.Errorf(codes.Unimplemented, "method Decrypt not implemented")
}
func (*UnimplementedKeyManagementServiceServer) Encrypt(ctx context.Context, req *EncryptRequest) (*EncryptResponse, error) {
return nil, status.Errorf(codes.Unimplemented, "method Encrypt not implemented")
}
func RegisterKeyManagementServiceServer(s *grpc.Server, srv KeyManagementServiceServer) {
s.RegisterService(&_KeyManagementService_serviceDesc, srv)
}
func _KeyManagementService_Status_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(StatusRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(KeyManagementServiceServer).Status(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/v2alpha1.KeyManagementService/Status",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(KeyManagementServiceServer).Status(ctx, req.(*StatusRequest))
}
return interceptor(ctx, in, info, handler)
}
func _KeyManagementService_Decrypt_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(DecryptRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(KeyManagementServiceServer).Decrypt(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/v2alpha1.KeyManagementService/Decrypt",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(KeyManagementServiceServer).Decrypt(ctx, req.(*DecryptRequest))
}
return interceptor(ctx, in, info, handler)
}
func _KeyManagementService_Encrypt_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(EncryptRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(KeyManagementServiceServer).Encrypt(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/v2alpha1.KeyManagementService/Encrypt",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(KeyManagementServiceServer).Encrypt(ctx, req.(*EncryptRequest))
}
return interceptor(ctx, in, info, handler)
}
var _KeyManagementService_serviceDesc = grpc.ServiceDesc{
ServiceName: "v2alpha1.KeyManagementService",
HandlerType: (*KeyManagementServiceServer)(nil),
Methods: []grpc.MethodDesc{
{
MethodName: "Status",
Handler: _KeyManagementService_Status_Handler,
},
{
MethodName: "Decrypt",
Handler: _KeyManagementService_Decrypt_Handler,
},
{
MethodName: "Encrypt",
Handler: _KeyManagementService_Encrypt_Handler,
},
},
Streams: []grpc.StreamDesc{},
Metadata: "api.proto",
}

View File

@@ -0,0 +1,78 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// To regenerate api.pb.go run hack/update-generated-kms.sh
syntax = "proto3";
package v2alpha1;
// This service defines the public APIs for remote KMS provider.
service KeyManagementService {
// this API is meant to be polled
rpc Status(StatusRequest) returns (StatusResponse) {}
// Execute decryption operation in KMS provider.
rpc Decrypt(DecryptRequest) returns (DecryptResponse) {}
// Execute encryption operation in KMS provider.
rpc Encrypt(EncryptRequest) returns (EncryptResponse) {}
}
message StatusRequest {}
message StatusResponse {
// Version of the KMS plugin API. Must match the configured .resources[].providers[].kms.apiVersion
string version = 1;
// Any value other than "ok" is failing healthz. On failure, the associated API server healthz endpoint will contain this value as part of the error message.
string healthz = 2;
// the current write key, used to determine staleness of data updated via value.Transformer.TransformFromStorage.
string key_id = 3;
}
message DecryptRequest {
// The data to be decrypted.
bytes ciphertext = 1;
// UID is a unique identifier for the request.
string uid = 2;
// The keyID that was provided to the apiserver during encryption.
// This represents the KMS KEK that was used to encrypt the data.
string key_id = 3;
// Additional metadata that was sent by the KMS plugin during encryption.
map<string, bytes> annotations = 4;
}
message DecryptResponse {
// The decrypted data.
bytes plaintext = 1;
}
message EncryptRequest {
// The data to be encrypted.
bytes plaintext = 1;
// UID is a unique identifier for the request.
string uid = 2;
}
message EncryptResponse {
// The encrypted data.
bytes ciphertext = 1;
// The KMS key ID used to encrypt the data. This must always refer to the KMS KEK and not any local KEKs that may be in use.
// This can be used to inform staleness of data updated via value.Transformer.TransformFromStorage.
string key_id = 2;
// Additional metadata to be stored with the encrypted data.
// This metadata can contain the encrypted local KEK that was used to encrypt the DEK.
// This data is stored in plaintext in etcd. KMS plugin implementations are responsible for pre-encrypting any sensitive data.
map<string, bytes> annotations = 3;
}

View File

@@ -0,0 +1,18 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package v2alpha1 contains definition of kms-plugin's gRPC service.
package v2alpha1

View File

@@ -18,6 +18,7 @@ package identity
import (
"bytes"
"context"
"fmt"
"k8s.io/apiserver/pkg/storage/value"
@@ -34,17 +35,17 @@ func NewEncryptCheckTransformer() value.Transformer {
}
// TransformFromStorage returns the input bytes if the data is not encrypted
func (identityTransformer) TransformFromStorage(b []byte, context value.Context) ([]byte, bool, error) {
func (identityTransformer) TransformFromStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, bool, error) {
// identityTransformer has to return an error if the data is encoded using another transformer.
// JSON data starts with '{'. Protobuf data has a prefix 'k8s[\x00-\xFF]'.
// Prefix 'k8s:enc:' is reserved for encrypted data on disk.
if bytes.HasPrefix(b, []byte("k8s:enc:")) {
if bytes.HasPrefix(data, []byte("k8s:enc:")) {
return []byte{}, false, fmt.Errorf("identity transformer tried to read encrypted data")
}
return b, false, nil
return data, false, nil
}
// TransformToStorage implements the Transformer interface for identityTransformer
func (identityTransformer) TransformToStorage(b []byte, context value.Context) ([]byte, error) {
return b, nil
func (identityTransformer) TransformToStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, error) {
return data, nil
}

View File

@@ -18,6 +18,7 @@ limitations under the License.
package secretbox
import (
"context"
"crypto/rand"
"fmt"
@@ -41,7 +42,7 @@ func NewSecretboxTransformer(key [32]byte) value.Transformer {
return &secretboxTransformer{key: key}
}
func (t *secretboxTransformer) TransformFromStorage(data []byte, context value.Context) ([]byte, bool, error) {
func (t *secretboxTransformer) TransformFromStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, bool, error) {
if len(data) < (secretbox.Overhead + nonceSize) {
return nil, false, fmt.Errorf("the stored data was shorter than the required size")
}
@@ -56,7 +57,7 @@ func (t *secretboxTransformer) TransformFromStorage(data []byte, context value.C
return result, false, nil
}
func (t *secretboxTransformer) TransformToStorage(data []byte, context value.Context) ([]byte, error) {
func (t *secretboxTransformer) TransformToStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, error) {
var nonce [nonceSize]byte
n, err := rand.Read(nonce[:])
if err != nil {

View File

@@ -33,7 +33,7 @@ const (
/*
* By default, all the following metrics are defined as falling under
* ALPHA stability level https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/1209-metrics-stability/20190404-kubernetes-control-plane-metrics-stability.md#stability-classes)
* ALPHA stability level https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/1209-metrics-stability/kubernetes-control-plane-metrics-stability.md#stability-classes)
*
* Promoting the stability level of the metric is a responsibility of the component owner, since it
* involves explicitly acknowledging support for the metric across multiple releases, in accordance with
@@ -47,8 +47,8 @@ var (
Name: "transformation_duration_seconds",
Help: "Latencies in seconds of value transformation operations.",
// In-process transformations (ex. AES CBC) complete on the order of 20 microseconds. However, when
// external KMS is involved latencies may climb into milliseconds.
Buckets: metrics.ExponentialBuckets(5e-6, 2, 14),
// external KMS is involved latencies may climb into hundreds of milliseconds.
Buckets: metrics.ExponentialBuckets(5e-6, 2, 25),
StabilityLevel: metrics.ALPHA,
},
[]string{"transformation_type"},

View File

@@ -19,6 +19,7 @@ package value
import (
"bytes"
"context"
"fmt"
"sync"
"time"
@@ -45,9 +46,9 @@ type Transformer interface {
// TransformFromStorage may transform the provided data from its underlying storage representation or return an error.
// Stale is true if the object on disk is stale and a write to etcd should be issued, even if the contents of the object
// have not changed.
TransformFromStorage(data []byte, context Context) (out []byte, stale bool, err error)
TransformFromStorage(ctx context.Context, data []byte, dataCtx Context) (out []byte, stale bool, err error)
// TransformToStorage may transform the provided data into the appropriate form in storage or return an error.
TransformToStorage(data []byte, context Context) (out []byte, err error)
TransformToStorage(ctx context.Context, data []byte, dataCtx Context) (out []byte, err error)
}
type identityTransformer struct{}
@@ -55,11 +56,11 @@ type identityTransformer struct{}
// IdentityTransformer performs no transformation of the provided data.
var IdentityTransformer Transformer = identityTransformer{}
func (identityTransformer) TransformFromStorage(b []byte, ctx Context) ([]byte, bool, error) {
return b, false, nil
func (identityTransformer) TransformFromStorage(ctx context.Context, data []byte, dataCtx Context) ([]byte, bool, error) {
return data, false, nil
}
func (identityTransformer) TransformToStorage(b []byte, ctx Context) ([]byte, error) {
return b, nil
func (identityTransformer) TransformToStorage(ctx context.Context, data []byte, dataCtx Context) ([]byte, error) {
return data, nil
}
// DefaultContext is a simple implementation of Context for a slice of bytes.
@@ -86,17 +87,17 @@ func (t *MutableTransformer) Set(transformer Transformer) {
t.lock.Unlock()
}
func (t *MutableTransformer) TransformFromStorage(data []byte, context Context) (out []byte, stale bool, err error) {
func (t *MutableTransformer) TransformFromStorage(ctx context.Context, data []byte, dataCtx Context) (out []byte, stale bool, err error) {
t.lock.RLock()
transformer := t.transformer
t.lock.RUnlock()
return transformer.TransformFromStorage(data, context)
return transformer.TransformFromStorage(ctx, data, dataCtx)
}
func (t *MutableTransformer) TransformToStorage(data []byte, context Context) (out []byte, err error) {
func (t *MutableTransformer) TransformToStorage(ctx context.Context, data []byte, dataCtx Context) (out []byte, err error) {
t.lock.RLock()
transformer := t.transformer
t.lock.RUnlock()
return transformer.TransformToStorage(data, context)
return transformer.TransformToStorage(ctx, data, dataCtx)
}
// PrefixTransformer holds a transformer interface and the prefix that the transformation is located under.
@@ -129,12 +130,12 @@ func NewPrefixTransformers(err error, transformers ...PrefixTransformer) Transfo
// TransformFromStorage finds the first transformer with a prefix matching the provided data and returns
// the result of transforming the value. It will always mark any transformation as stale that is not using
// the first transformer.
func (t *prefixTransformers) TransformFromStorage(data []byte, context Context) ([]byte, bool, error) {
func (t *prefixTransformers) TransformFromStorage(ctx context.Context, data []byte, dataCtx Context) ([]byte, bool, error) {
start := time.Now()
var errs []error
for i, transformer := range t.transformers {
if bytes.HasPrefix(data, transformer.Prefix) {
result, stale, err := transformer.Transformer.TransformFromStorage(data[len(transformer.Prefix):], context)
result, stale, err := transformer.Transformer.TransformFromStorage(ctx, data[len(transformer.Prefix):], dataCtx)
// To migrate away from encryption, user can specify an identity transformer higher up
// (in the config file) than the encryption transformer. In that scenario, the identity transformer needs to
// identify (during reads from disk) whether the data being read is encrypted or not. If the data is encrypted,
@@ -194,12 +195,12 @@ func (t *prefixTransformers) TransformFromStorage(data []byte, context Context)
}
// TransformToStorage uses the first transformer and adds its prefix to the data.
func (t *prefixTransformers) TransformToStorage(data []byte, context Context) ([]byte, error) {
func (t *prefixTransformers) TransformToStorage(ctx context.Context, data []byte, dataCtx Context) ([]byte, error) {
start := time.Now()
transformer := t.transformers[0]
prefixedData := make([]byte, len(transformer.Prefix), len(data)+len(transformer.Prefix))
copy(prefixedData, transformer.Prefix)
result, err := transformer.Transformer.TransformToStorage(data, context)
result, err := transformer.Transformer.TransformToStorage(ctx, data, dataCtx)
RecordTransformation("to_storage", string(transformer.Prefix), start, err)
if err != nil {
return nil, err