Upgrade k8s package verison (#5358)

* upgrade k8s package version

Signed-off-by: hongzhouzi <hongzhouzi@kubesphere.io>

* Script upgrade and code formatting.

Signed-off-by: hongzhouzi <hongzhouzi@kubesphere.io>

Signed-off-by: hongzhouzi <hongzhouzi@kubesphere.io>
This commit is contained in:
hongzhouzi
2022-11-15 14:56:38 +08:00
committed by GitHub
parent 5f91c1663a
commit 44167aa47a
3106 changed files with 321340 additions and 172080 deletions

View File

@@ -31,20 +31,23 @@ import (
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/clock"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apimachinery/pkg/watch"
endpointsrequest "k8s.io/apiserver/pkg/endpoints/request"
"k8s.io/apiserver/pkg/features"
"k8s.io/apiserver/pkg/storage"
"k8s.io/apiserver/pkg/storage/cacher/metrics"
utilfeature "k8s.io/apiserver/pkg/util/feature"
utilflowcontrol "k8s.io/apiserver/pkg/util/flowcontrol"
"k8s.io/client-go/tools/cache"
"k8s.io/klog/v2"
"k8s.io/utils/clock"
utiltrace "k8s.io/utils/trace"
)
var (
emptyFunc = func() {}
emptyFunc = func(bool) {}
)
const (
@@ -144,6 +147,10 @@ func (i *indexedWatchers) deleteWatcher(number int, value string, supported bool
}
func (i *indexedWatchers) terminateAll(objectType reflect.Type, done func(*cacheWatcher)) {
// note that we don't have to call setDrainInputBufferLocked method on the watchers
// because we take advantage of the default value - stop immediately
// also watchers that have had already its draining strategy set
// are no longer available (they were removed from the allWatchers and the valueWatchers maps)
if len(i.allWatchers) > 0 || len(i.valueWatchers) > 0 {
klog.Warningf("Terminating all watchers from cacher %v", objectType)
}
@@ -180,6 +187,10 @@ func newTimeBucketWatchers(clock clock.Clock, bookmarkFrequency time.Duration) *
// adds a watcher to the bucket, if the deadline is before the start, it will be
// added to the first one.
func (t *watcherBookmarkTimeBuckets) addWatcher(w *cacheWatcher) bool {
// note that the returned time can be before t.createTime,
// especially in cases when the nextBookmarkTime method
// give us the zero value of type Time
// so buckedID can hold a negative value
nextTime, ok := w.nextBookmarkTime(t.clock.Now(), t.bookmarkFrequency)
if !ok {
return false
@@ -190,7 +201,7 @@ func (t *watcherBookmarkTimeBuckets) addWatcher(w *cacheWatcher) bool {
if bucketID < t.startBucketID {
bucketID = t.startBucketID
}
watchers, _ := t.watchersBuckets[bucketID]
watchers := t.watchersBuckets[bucketID]
t.watchersBuckets[bucketID] = append(watchers, w)
return true
}
@@ -230,6 +241,8 @@ type Cacher struct {
// Incoming events that should be dispatched to watchers.
incoming chan watchCacheEvent
resourcePrefix string
sync.RWMutex
// Before accessing the cacher's cache, wait for the ready to be ok.
@@ -292,6 +305,8 @@ type Cacher struct {
watchersToStop []*cacheWatcher
// Maintain a timeout queue to send the bookmark event before the watcher times out.
bookmarkWatchers *watcherBookmarkTimeBuckets
// expiredBookmarkWatchers is a list of watchers that were expired and need to be schedule for a next bookmark event
expiredBookmarkWatchers []*cacheWatcher
}
// NewCacherFromConfig creates a new Cacher responsible for servicing WATCH and LIST requests from
@@ -328,6 +343,7 @@ func NewCacherFromConfig(config Config) (*Cacher, error) {
}
objType := reflect.TypeOf(obj)
cacher := &Cacher{
resourcePrefix: config.ResourcePrefix,
ready: newReady(),
storage: config.Storage,
objectType: objType,
@@ -341,7 +357,7 @@ func NewCacherFromConfig(config Config) (*Cacher, error) {
},
// TODO: Figure out the correct value for the buffer size.
incoming: make(chan watchCacheEvent, 100),
dispatchTimeoutBudget: newTimeBudget(stopCh),
dispatchTimeoutBudget: newTimeBudget(),
// We need to (potentially) stop both:
// - wait.Until go-routine
// - reflector.ListAndWatch
@@ -369,6 +385,10 @@ func NewCacherFromConfig(config Config) (*Cacher, error) {
// Configure reflector's pager to for an appropriate pagination chunk size for fetching data from
// storage. The pager falls back to full list if paginated list calls fail due to an "Expired" error.
reflector.WatchListPageSize = storageWatchListPageSize
// When etcd loses leader for 3 cycles, it returns error "no leader".
// We don't want to terminate all watchers as recreating all watchers puts high load on api-server.
// In most of the cases, leader is reelected within few cycles.
reflector.MaxInternalErrorRetryDuration = time.Second * 30
cacher.watchCache = watchCache
cacher.reflector = reflector
@@ -403,6 +423,7 @@ func (c *Cacher) startCaching(stopChannel <-chan struct{}) {
successfulList = true
c.ready.set(true)
klog.V(1).Infof("cacher (%v): initialized", c.objectType.String())
metrics.WatchCacheInitializations.WithLabelValues(c.objectType.String()).Inc()
})
defer func() {
if successfulList {
@@ -456,7 +477,9 @@ func (c *Cacher) Watch(ctx context.Context, key string, opts storage.ListOptions
return nil, err
}
c.ready.wait()
if err := c.ready.wait(); err != nil {
return nil, errors.NewServiceUnavailable(err.Error())
}
triggerValue, triggerSupported := "", false
if c.indexedTrigger != nil {
@@ -469,21 +492,12 @@ func (c *Cacher) Watch(ctx context.Context, key string, opts storage.ListOptions
}
}
// If there is indexedTrigger defined, but triggerSupported is false,
// we can't narrow the amount of events significantly at this point.
//
// That said, currently indexedTrigger is defined only for couple resources:
// Pods, Nodes, Secrets and ConfigMaps and there is only a constant
// number of watchers for which triggerSupported is false (excluding those
// issued explicitly by users).
// Thus, to reduce the risk of those watchers blocking all watchers of a
// given resource in the system, we increase the sizes of buffers for them.
chanSize := 10
if c.indexedTrigger != nil && !triggerSupported {
// TODO: We should tune this value and ideally make it dependent on the
// number of objects of a given type and/or their churn.
chanSize = 1000
}
// It boils down to a tradeoff between:
// - having it as small as possible to reduce memory usage
// - having it large enough to ensure that watchers that need to process
// a bunch of changes have enough buffer to avoid from blocking other
// watchers on our watcher having a processing hiccup
chanSize := c.watchCache.suggestedWatchChannelSize(c.indexedTrigger != nil, triggerSupported)
// Determine watch timeout('0' means deadline is not set, ignore checking)
deadline, _ := ctx.Deadline()
@@ -503,7 +517,7 @@ func (c *Cacher) Watch(ctx context.Context, key string, opts storage.ListOptions
// underlying watchCache is calling processEvent under its lock.
c.watchCache.RLock()
defer c.watchCache.RUnlock()
initEvents, err := c.watchCache.GetAllEventsSinceThreadUnsafe(watchRV)
cacheInterval, err := c.watchCache.getAllEventsSinceLocked(watchRV)
if err != nil {
// To match the uncached watch implementation, once we have passed authn/authz/admission,
// and successfully parsed a resource version, other errors must fail with a watch event of type ERROR,
@@ -511,18 +525,11 @@ func (c *Cacher) Watch(ctx context.Context, key string, opts storage.ListOptions
return newErrWatcher(err), nil
}
// With some events already sent, update resourceVersion so that
// events that were buffered and not yet processed won't be delivered
// to this watcher second time causing going back in time.
if len(initEvents) > 0 {
watchRV = initEvents[len(initEvents)-1].ResourceVersion
}
func() {
c.Lock()
defer c.Unlock()
// Update watcher.forget function once we can compute it.
watcher.forget = forgetWatcher(c, c.watcherIdx, triggerValue, triggerSupported)
watcher.forget = forgetWatcher(c, watcher, c.watcherIdx, triggerValue, triggerSupported)
c.watchers.addWatcher(watcher, c.watcherIdx, triggerValue, triggerSupported)
// Add it to the queue only when the client support watch bookmarks.
@@ -532,15 +539,10 @@ func (c *Cacher) Watch(ctx context.Context, key string, opts storage.ListOptions
c.watcherIdx++
}()
go watcher.process(ctx, initEvents, watchRV)
go watcher.processInterval(ctx, cacheInterval, watchRV)
return watcher, nil
}
// WatchList implements storage.Interface.
func (c *Cacher) WatchList(ctx context.Context, key string, opts storage.ListOptions) (watch.Interface, error) {
return c.Watch(ctx, key, opts)
}
// Get implements storage.Interface.
func (c *Cacher) Get(ctx context.Context, key string, opts storage.GetOptions, objPtr runtime.Object) error {
if opts.ResourceVersion == "" {
@@ -565,7 +567,9 @@ func (c *Cacher) Get(ctx context.Context, key string, opts storage.GetOptions, o
// Do not create a trace - it's not for free and there are tons
// of Get requests. We can add it if it will be really needed.
c.ready.wait()
if err := c.ready.wait(); err != nil {
return errors.NewServiceUnavailable(err.Error())
}
objVal, err := conversion.EnforcePtr(objPtr)
if err != nil {
@@ -592,95 +596,46 @@ func (c *Cacher) Get(ctx context.Context, key string, opts storage.GetOptions, o
return nil
}
// GetToList implements storage.Interface.
func (c *Cacher) GetToList(ctx context.Context, key string, opts storage.ListOptions, listObj runtime.Object) error {
// NOTICE: Keep in sync with shouldListFromStorage function in
//
// staging/src/k8s.io/apiserver/pkg/util/flowcontrol/request/list_work_estimator.go
func shouldDelegateList(opts storage.ListOptions) bool {
resourceVersion := opts.ResourceVersion
pred := opts.Predicate
pagingEnabled := utilfeature.DefaultFeatureGate.Enabled(features.APIListChunking)
hasContinuation := pagingEnabled && len(pred.Continue) > 0
hasLimit := pagingEnabled && pred.Limit > 0 && resourceVersion != "0"
if resourceVersion == "" || hasContinuation || hasLimit || opts.ResourceVersionMatch == metav1.ResourceVersionMatchExact {
// If resourceVersion is not specified, serve it from underlying
// storage (for backward compatibility). If a continuation is
// requested, serve it from the underlying storage as well.
// Limits are only sent to storage when resourceVersion is non-zero
// since the watch cache isn't able to perform continuations, and
// limits are ignored when resource version is zero
return c.storage.GetToList(ctx, key, opts, listObj)
}
// If resourceVersion is specified, serve it from cache.
// It's guaranteed that the returned value is at least that
// fresh as the given resourceVersion.
listRV, err := c.versioner.ParseResourceVersion(resourceVersion)
if err != nil {
return err
}
if listRV == 0 && !c.ready.check() {
// If Cacher is not yet initialized and we don't require any specific
// minimal resource version, simply forward the request to storage.
return c.storage.GetToList(ctx, key, opts, listObj)
}
trace := utiltrace.New("cacher list", utiltrace.Field{"type", c.objectType.String()})
defer trace.LogIfLong(500 * time.Millisecond)
c.ready.wait()
trace.Step("Ready")
// List elements with at least 'listRV' from cache.
listPtr, err := meta.GetItemsPtr(listObj)
if err != nil {
return err
}
listVal, err := conversion.EnforcePtr(listPtr)
if err != nil {
return err
}
if listVal.Kind() != reflect.Slice {
return fmt.Errorf("need a pointer to slice, got %v", listVal.Kind())
}
filter := filterWithAttrsFunction(key, pred)
obj, exists, readResourceVersion, err := c.watchCache.WaitUntilFreshAndGet(listRV, key, trace)
if err != nil {
return err
}
trace.Step("Got from cache")
if exists {
elem, ok := obj.(*storeElement)
if !ok {
return fmt.Errorf("non *storeElement returned from storage: %v", obj)
}
if filter(elem.Key, elem.Labels, elem.Fields) {
listVal.Set(reflect.Append(listVal, reflect.ValueOf(elem.Object).Elem()))
}
}
if c.versioner != nil {
if err := c.versioner.UpdateList(listObj, readResourceVersion, "", nil); err != nil {
return err
}
}
return nil
// If resourceVersion is not specified, serve it from underlying
// storage (for backward compatibility). If a continuation is
// requested, serve it from the underlying storage as well.
// Limits are only sent to storage when resourceVersion is non-zero
// since the watch cache isn't able to perform continuations, and
// limits are ignored when resource version is zero
return resourceVersion == "" || hasContinuation || hasLimit || opts.ResourceVersionMatch == metav1.ResourceVersionMatchExact
}
// List implements storage.Interface.
func (c *Cacher) List(ctx context.Context, key string, opts storage.ListOptions, listObj runtime.Object) error {
func (c *Cacher) listItems(listRV uint64, key string, pred storage.SelectionPredicate, trace *utiltrace.Trace, recursive bool) ([]interface{}, uint64, string, error) {
if !recursive {
obj, exists, readResourceVersion, err := c.watchCache.WaitUntilFreshAndGet(listRV, key, trace)
if err != nil {
return nil, 0, "", err
}
if exists {
return []interface{}{obj}, readResourceVersion, "", nil
}
return nil, readResourceVersion, "", nil
}
return c.watchCache.WaitUntilFreshAndList(listRV, pred.MatcherIndex(), trace)
}
// GetList implements storage.Interface
func (c *Cacher) GetList(ctx context.Context, key string, opts storage.ListOptions, listObj runtime.Object) error {
recursive := opts.Recursive
resourceVersion := opts.ResourceVersion
pred := opts.Predicate
pagingEnabled := utilfeature.DefaultFeatureGate.Enabled(features.APIListChunking)
hasContinuation := pagingEnabled && len(pred.Continue) > 0
hasLimit := pagingEnabled && pred.Limit > 0 && resourceVersion != "0"
if resourceVersion == "" || hasContinuation || hasLimit || opts.ResourceVersionMatch == metav1.ResourceVersionMatchExact {
// If resourceVersion is not specified, serve it from underlying
// storage (for backward compatibility). If a continuation is
// requested, serve it from the underlying storage as well.
// Limits are only sent to storage when resourceVersion is non-zero
// since the watch cache isn't able to perform continuations, and
// limits are ignored when resource version is zero.
return c.storage.List(ctx, key, opts, listObj)
if shouldDelegateList(opts) {
return c.storage.GetList(ctx, key, opts, listObj)
}
// If resourceVersion is specified, serve it from cache.
@@ -694,13 +649,17 @@ func (c *Cacher) List(ctx context.Context, key string, opts storage.ListOptions,
if listRV == 0 && !c.ready.check() {
// If Cacher is not yet initialized and we don't require any specific
// minimal resource version, simply forward the request to storage.
return c.storage.List(ctx, key, opts, listObj)
return c.storage.GetList(ctx, key, opts, listObj)
}
trace := utiltrace.New("cacher list", utiltrace.Field{"type", c.objectType.String()})
trace := utiltrace.New("cacher list",
utiltrace.Field{"audit-id", endpointsrequest.GetAuditIDTruncated(ctx)},
utiltrace.Field{Key: "type", Value: c.objectType.String()})
defer trace.LogIfLong(500 * time.Millisecond)
c.ready.wait()
if err := c.ready.wait(); err != nil {
return errors.NewServiceUnavailable(err.Error())
}
trace.Step("Ready")
// List elements with at least 'listRV' from cache.
@@ -717,11 +676,11 @@ func (c *Cacher) List(ctx context.Context, key string, opts storage.ListOptions,
}
filter := filterWithAttrsFunction(key, pred)
objs, readResourceVersion, err := c.watchCache.WaitUntilFreshAndList(listRV, pred.MatcherIndex(), trace)
objs, readResourceVersion, indexUsed, err := c.listItems(listRV, key, pred, trace, recursive)
if err != nil {
return err
}
trace.Step("Listed items from cache", utiltrace.Field{"count", len(objs)})
trace.Step("Listed items from cache", utiltrace.Field{Key: "count", Value: len(objs)})
if len(objs) > listVal.Cap() && pred.Label.Empty() && pred.Field.Empty() {
// Resize the slice appropriately, since we already know that none
// of the elements will be filtered out.
@@ -737,18 +696,19 @@ func (c *Cacher) List(ctx context.Context, key string, opts storage.ListOptions,
listVal.Set(reflect.Append(listVal, reflect.ValueOf(elem.Object).Elem()))
}
}
trace.Step("Filtered items", utiltrace.Field{"count", listVal.Len()})
trace.Step("Filtered items", utiltrace.Field{Key: "count", Value: listVal.Len()})
if c.versioner != nil {
if err := c.versioner.UpdateList(listObj, readResourceVersion, "", nil); err != nil {
return err
}
}
metrics.RecordListCacheMetrics(c.resourcePrefix, indexUsed, len(objs), listVal.Len())
return nil
}
// GuaranteedUpdate implements storage.Interface.
func (c *Cacher) GuaranteedUpdate(
ctx context.Context, key string, ptrToType runtime.Object, ignoreNotFound bool,
ctx context.Context, key string, destination runtime.Object, ignoreNotFound bool,
preconditions *storage.Preconditions, tryUpdate storage.UpdateFunc, _ runtime.Object) error {
// Ignore the suggestion and try to pass down the current version of the object
// read from cache.
@@ -758,10 +718,10 @@ func (c *Cacher) GuaranteedUpdate(
// DeepCopy the object since we modify resource version when serializing the
// current object.
currObj := elem.(*storeElement).Object.DeepCopyObject()
return c.storage.GuaranteedUpdate(ctx, key, ptrToType, ignoreNotFound, preconditions, tryUpdate, currObj)
return c.storage.GuaranteedUpdate(ctx, key, destination, ignoreNotFound, preconditions, tryUpdate, currObj)
}
// If we couldn't get the object, fallback to no-suggestion.
return c.storage.GuaranteedUpdate(ctx, key, ptrToType, ignoreNotFound, preconditions, tryUpdate, nil)
return c.storage.GuaranteedUpdate(ctx, key, destination, ignoreNotFound, preconditions, tryUpdate, nil)
}
// Count implements storage.Interface.
@@ -828,6 +788,7 @@ func (c *Cacher) dispatchEvents() {
c.dispatchEvent(&event)
}
lastProcessedResourceVersion = event.ResourceVersion
metrics.EventsCounter.WithLabelValues(c.objectType.String()).Inc()
case <-bookmarkTimer.C():
bookmarkTimer.Reset(wait.Jitter(time.Second, 0.25))
// Never send a bookmark event if we did not see an event here, this is fine
@@ -872,11 +833,11 @@ func setCachingObjects(event *watchCacheEvent, versioner storage.Versioner) {
// Don't wrap Object for delete events - these are not to deliver any
// events. Only wrap PrevObject.
if object, err := newCachingObject(event.PrevObject); err == nil {
// Update resource version of the underlying object.
// Update resource version of the object.
// event.PrevObject is used to deliver DELETE watch events and
// for them, we set resourceVersion to <current> instead of
// the resourceVersion of the last modification of the object.
updateResourceVersionIfNeeded(object.object, versioner, event.ResourceVersion)
updateResourceVersion(object, versioner, event.ResourceVersion)
event.PrevObject = object
} else {
klog.Errorf("couldn't create cachingObject from: %#v", event.Object)
@@ -905,14 +866,14 @@ func (c *Cacher) dispatchEvent(event *watchCacheEvent) {
// from it justifies increased memory usage, so for now we drop the cached
// serializations after dispatching this event.
//
// Given the deep-copies that are done to create cachingObjects,
// we try to cache serializations only if there are at least 3 watchers.
if len(c.watchersBuffer) >= 3 {
// Make a shallow copy to allow overwriting Object and PrevObject.
wcEvent := *event
setCachingObjects(&wcEvent, c.versioner)
event = &wcEvent
}
// Given that CachingObject is just wrapping the object and not perfoming
// deep-copying (until some field is explicitly being modified), we create
// it unconditionally to ensure safety and reduce deep-copying.
//
// Make a shallow copy to allow overwriting Object and PrevObject.
wcEvent := *event
setCachingObjects(&wcEvent, c.versioner)
event = &wcEvent
c.blockedWatchers = c.blockedWatchers[:0]
for _, watcher := range c.watchersBuffer {
@@ -928,8 +889,11 @@ func (c *Cacher) dispatchEvent(event *watchCacheEvent) {
timeout := c.dispatchTimeoutBudget.takeAvailable()
c.timer.Reset(timeout)
// Make sure every watcher will try to send event without blocking first,
// even if the timer has already expired.
// Send event to all blocked watchers. As long as timer is running,
// `add` will wait for the watcher to unblock. After timeout,
// `add` will not wait, but immediately close a still blocked watcher.
// Hence, every watcher gets the chance to unblock itself while timer
// is running, not only the first ones in the list.
timer := c.timer
for _, watcher := range c.blockedWatchers {
if !watcher.add(event, timer) {
@@ -950,7 +914,7 @@ func (c *Cacher) dispatchEvent(event *watchCacheEvent) {
}
}
func (c *Cacher) startDispatchingBookmarkEvents() {
func (c *Cacher) startDispatchingBookmarkEventsLocked() {
// Pop already expired watchers. However, explicitly ignore stopped ones,
// as we don't delete watcher from bookmarkWatchers when it is stopped.
for _, watchers := range c.bookmarkWatchers.popExpiredWatchers() {
@@ -961,8 +925,7 @@ func (c *Cacher) startDispatchingBookmarkEvents() {
continue
}
c.watchersBuffer = append(c.watchersBuffer, watcher)
// Requeue the watcher for the next bookmark if needed.
c.bookmarkWatchers.addWatcher(watcher)
c.expiredBookmarkWatchers = append(c.expiredBookmarkWatchers, watcher)
}
}
}
@@ -987,7 +950,7 @@ func (c *Cacher) startDispatching(event *watchCacheEvent) {
c.watchersBuffer = c.watchersBuffer[:0]
if event.Type == watch.Bookmark {
c.startDispatchingBookmarkEvents()
c.startDispatchingBookmarkEventsLocked()
// return here to reduce following code indentation and diff
return
}
@@ -1028,22 +991,31 @@ func (c *Cacher) finishDispatching() {
defer c.Unlock()
c.dispatching = false
for _, watcher := range c.watchersToStop {
watcher.stopThreadUnsafe()
watcher.stopLocked()
}
c.watchersToStop = c.watchersToStop[:0]
for _, watcher := range c.expiredBookmarkWatchers {
if watcher.stopped {
continue
}
// requeue the watcher for the next bookmark if needed.
c.bookmarkWatchers.addWatcher(watcher)
}
c.expiredBookmarkWatchers = c.expiredBookmarkWatchers[:0]
}
func (c *Cacher) terminateAllWatchers() {
c.Lock()
defer c.Unlock()
c.watchers.terminateAll(c.objectType, c.stopWatcherThreadUnsafe)
c.watchers.terminateAll(c.objectType, c.stopWatcherLocked)
}
func (c *Cacher) stopWatcherThreadUnsafe(watcher *cacheWatcher) {
func (c *Cacher) stopWatcherLocked(watcher *cacheWatcher) {
if c.dispatching {
c.watchersToStop = append(c.watchersToStop, watcher)
} else {
watcher.stopThreadUnsafe()
watcher.stopLocked()
}
}
@@ -1062,20 +1034,23 @@ func (c *Cacher) Stop() {
return
}
c.stopped = true
c.ready.stop()
c.stopLock.Unlock()
close(c.stopCh)
c.stopWg.Wait()
}
func forgetWatcher(c *Cacher, index int, triggerValue string, triggerSupported bool) func() {
return func() {
func forgetWatcher(c *Cacher, w *cacheWatcher, index int, triggerValue string, triggerSupported bool) func(bool) {
return func(drainWatcher bool) {
c.Lock()
defer c.Unlock()
w.setDrainInputBufferLocked(drainWatcher)
// It's possible that the watcher is already not in the structure (e.g. in case of
// simultaneous Stop() and terminateAllWatchers(), but it is safe to call stopThreadUnsafe()
// simultaneous Stop() and terminateAllWatchers(), but it is safe to call stopLocked()
// on a watcher multiple times.
c.watchers.deleteWatcher(index, triggerValue, triggerSupported, c.stopWatcherThreadUnsafe)
c.watchers.deleteWatcher(index, triggerValue, triggerSupported, c.stopWatcherLocked)
}
}
@@ -1091,7 +1066,9 @@ func filterWithAttrsFunction(key string, p storage.SelectionPredicate) filterWit
// LastSyncResourceVersion returns resource version to which the underlying cache is synced.
func (c *Cacher) LastSyncResourceVersion() (uint64, error) {
c.ready.wait()
if err := c.ready.wait(); err != nil {
return 0, errors.NewServiceUnavailable(err.Error())
}
resourceVersion := c.reflector.LastSyncResourceVersion()
return c.versioner.ParseResourceVersion(resourceVersion)
@@ -1123,7 +1100,12 @@ func (lw *cacherListerWatcher) List(options metav1.ListOptions) (runtime.Object,
Continue: options.Continue,
}
if err := lw.storage.List(context.TODO(), lw.resourcePrefix, storage.ListOptions{ResourceVersionMatch: options.ResourceVersionMatch, Predicate: pred}, list); err != nil {
storageOpts := storage.ListOptions{
ResourceVersionMatch: options.ResourceVersionMatch,
Predicate: pred,
Recursive: true,
}
if err := lw.storage.GetList(context.TODO(), lw.resourcePrefix, storageOpts, list); err != nil {
return nil, err
}
return list, nil
@@ -1134,11 +1116,10 @@ func (lw *cacherListerWatcher) Watch(options metav1.ListOptions) (watch.Interfac
opts := storage.ListOptions{
ResourceVersion: options.ResourceVersion,
Predicate: storage.Everything,
Recursive: true,
ProgressNotify: true,
}
if utilfeature.DefaultFeatureGate.Enabled(features.EfficientWatchResumption) {
opts.ProgressNotify = true
}
return lw.storage.WatchList(context.TODO(), lw.resourcePrefix, opts)
return lw.storage.Watch(context.TODO(), lw.resourcePrefix, opts)
}
// errWatcher implements watch.Interface to return a single error
@@ -1189,7 +1170,7 @@ type cacheWatcher struct {
done chan struct{}
filter filterWithAttrsFunc
stopped bool
forget func()
forget func(bool)
versioner storage.Versioner
// The watcher will be closed by server after the deadline,
// save it here to send bookmark events before that.
@@ -1201,9 +1182,13 @@ type cacheWatcher struct {
// human readable identifier that helps assigning cacheWatcher
// instance with request
identifier string
// drainInputBuffer indicates whether we should delay closing this watcher
// and send all event in the input buffer.
drainInputBuffer bool
}
func newCacheWatcher(chanSize int, filter filterWithAttrsFunc, forget func(), versioner storage.Versioner, deadline time.Time, allowWatchBookmarks bool, objectType reflect.Type, identifier string) *cacheWatcher {
func newCacheWatcher(chanSize int, filter filterWithAttrsFunc, forget func(bool), versioner storage.Versioner, deadline time.Time, allowWatchBookmarks bool, objectType reflect.Type, identifier string) *cacheWatcher {
return &cacheWatcher{
input: make(chan *watchCacheEvent, chanSize),
result: make(chan watch.Event, chanSize),
@@ -1226,16 +1211,29 @@ func (c *cacheWatcher) ResultChan() <-chan watch.Event {
// Implements watch.Interface.
func (c *cacheWatcher) Stop() {
c.forget()
c.forget(false)
}
// we rely on the fact that stopThredUnsafe is actually protected by Cacher.Lock()
func (c *cacheWatcher) stopThreadUnsafe() {
// we rely on the fact that stopLocked is actually protected by Cacher.Lock()
func (c *cacheWatcher) stopLocked() {
if !c.stopped {
c.stopped = true
close(c.done)
// stop without draining the input channel was requested.
if !c.drainInputBuffer {
close(c.done)
}
close(c.input)
}
// Even if the watcher was already stopped, if it previously was
// using draining mode and it's not using it now we need to
// close the done channel now. Otherwise we could leak the
// processing goroutine if it will be trying to put more objects
// into result channel, the channel will be full and there will
// already be noone on the processing the events on the receiving end.
if !c.drainInputBuffer && !c.isDoneChannelClosedLocked() {
close(c.done)
}
}
func (c *cacheWatcher) nonblockingAdd(event *watchCacheEvent) bool {
@@ -1259,8 +1257,8 @@ func (c *cacheWatcher) add(event *watchCacheEvent, timer *time.Timer) bool {
// Since we don't want to block on it infinitely,
// we simply terminate it.
klog.V(1).Infof("Forcing %v watcher close due to unresponsiveness: %v. len(c.input) = %v, len(c.result) = %v", c.objectType.String(), c.identifier, len(c.input), len(c.result))
terminatedWatchersCounter.WithLabelValues(c.objectType.String()).Inc()
c.forget()
metrics.TerminatedWatchersCounter.WithLabelValues(c.objectType.String()).Inc()
c.forget(false)
}
if timer == nil {
@@ -1280,12 +1278,16 @@ func (c *cacheWatcher) add(event *watchCacheEvent, timer *time.Timer) bool {
func (c *cacheWatcher) nextBookmarkTime(now time.Time, bookmarkFrequency time.Duration) (time.Time, bool) {
// We try to send bookmarks:
// (a) roughly every minute
// (b) right before the watcher timeout - for now we simply set it 2s before
//
// (a) right before the watcher timeout - for now we simply set it 2s before
// the deadline
// The former gives us periodicity if the watch breaks due to unexpected
// conditions, the later ensures that on timeout the watcher is as close to
//
// (b) roughly every minute
//
// (b) gives us periodicity if the watch breaks due to unexpected
// conditions, (a) ensures that on timeout the watcher is as close to
// now as possible - this covers 99% of cases.
heartbeatTime := now.Add(bookmarkFrequency)
if c.deadline.IsZero() {
// Timeout is set by our client libraries (e.g. reflector) as well as defaulted by
@@ -1302,20 +1304,33 @@ func (c *cacheWatcher) nextBookmarkTime(now time.Time, bookmarkFrequency time.Du
return heartbeatTime, true
}
func getEventObject(object runtime.Object) runtime.Object {
if _, ok := object.(runtime.CacheableObject); ok {
// setDrainInputBufferLocked if set to true indicates that we should delay closing this watcher
// until we send all events residing in the input buffer.
func (c *cacheWatcher) setDrainInputBufferLocked(drain bool) {
c.drainInputBuffer = drain
}
// isDoneChannelClosed checks if c.done channel is closed
func (c *cacheWatcher) isDoneChannelClosedLocked() bool {
select {
case <-c.done:
return true
default:
}
return false
}
func getMutableObject(object runtime.Object) runtime.Object {
if _, ok := object.(*cachingObject); ok {
// It is safe to return without deep-copy, because the underlying
// object was already deep-copied during construction.
// object will lazily perform deep-copy on the first try to change
// any of its fields.
return object
}
return object.DeepCopyObject()
}
func updateResourceVersionIfNeeded(object runtime.Object, versioner storage.Versioner, resourceVersion uint64) {
if _, ok := object.(*cachingObject); ok {
// We assume that for cachingObject resourceVersion was already propagated before.
return
}
func updateResourceVersion(object runtime.Object, versioner storage.Versioner, resourceVersion uint64) {
if err := versioner.UpdateObject(object, resourceVersion); err != nil {
utilruntime.HandleError(fmt.Errorf("failure to version api object (%d) %#v: %v", resourceVersion, object, err))
}
@@ -1338,13 +1353,17 @@ func (c *cacheWatcher) convertToWatchEvent(event *watchCacheEvent) *watch.Event
switch {
case curObjPasses && !oldObjPasses:
return &watch.Event{Type: watch.Added, Object: getEventObject(event.Object)}
return &watch.Event{Type: watch.Added, Object: getMutableObject(event.Object)}
case curObjPasses && oldObjPasses:
return &watch.Event{Type: watch.Modified, Object: getEventObject(event.Object)}
return &watch.Event{Type: watch.Modified, Object: getMutableObject(event.Object)}
case !curObjPasses && oldObjPasses:
// return a delete event with the previous object content, but with the event's resource version
oldObj := getEventObject(event.PrevObject)
updateResourceVersionIfNeeded(oldObj, c.versioner, event.ResourceVersion)
oldObj := getMutableObject(event.PrevObject)
// We know that if oldObj is cachingObject (which can only be set via
// setCachingObjects), its resourceVersion is already set correctly and
// we don't need to update it. However, since cachingObject efficiently
// handles noop updates, we avoid this microoptimization here.
updateResourceVersion(oldObj, c.versioner, event.ResourceVersion)
return &watch.Event{Type: watch.Deleted, Object: oldObj}
}
@@ -1366,7 +1385,7 @@ func (c *cacheWatcher) sendWatchCacheEvent(event *watchCacheEvent) {
// would give us non-determinism.
// At the same time, we don't want to block infinitely on putting
// to c.result, when c.done is already closed.
//
// This ensures that with c.done already close, we at most once go
// into the next select after this. With that, no matter which
// statement we choose there, we will deliver only consecutive
@@ -1383,8 +1402,10 @@ func (c *cacheWatcher) sendWatchCacheEvent(event *watchCacheEvent) {
}
}
func (c *cacheWatcher) process(ctx context.Context, initEvents []*watchCacheEvent, resourceVersion uint64) {
func (c *cacheWatcher) processInterval(ctx context.Context, cacheInterval *watchCacheInterval, resourceVersion uint64) {
defer utilruntime.HandleCrash()
defer close(c.result)
defer c.Stop()
// Check how long we are processing initEvents.
// As long as these are not processed, we are not processing
@@ -1401,20 +1422,60 @@ func (c *cacheWatcher) process(ctx context.Context, initEvents []*watchCacheEven
// consider increase size of result buffer in those cases.
const initProcessThreshold = 500 * time.Millisecond
startTime := time.Now()
for _, event := range initEvents {
initEventCount := 0
for {
event, err := cacheInterval.Next()
if err != nil {
// An error indicates that the cache interval
// has been invalidated and can no longer serve
// events.
//
// Initially we considered sending an "out-of-history"
// Error event in this case, but because historically
// such events weren't sent out of the watchCache, we
// decided not to. This is still ok, because on watch
// closure, the watcher will try to re-instantiate the
// watch and then will get an explicit "out-of-history"
// window. There is potential for optimization, but for
// now, in order to be on the safe side and not break
// custom clients, the cost of it is something that we
// are fully accepting.
klog.Warningf("couldn't retrieve watch event to serve: %#v", err)
return
}
if event == nil {
break
}
c.sendWatchCacheEvent(event)
// With some events already sent, update resourceVersion so that
// events that were buffered and not yet processed won't be delivered
// to this watcher second time causing going back in time.
resourceVersion = event.ResourceVersion
initEventCount++
}
objType := c.objectType.String()
if len(initEvents) > 0 {
initCounter.WithLabelValues(objType).Add(float64(len(initEvents)))
if initEventCount > 0 {
metrics.InitCounter.WithLabelValues(objType).Add(float64(initEventCount))
}
processingTime := time.Since(startTime)
if processingTime > initProcessThreshold {
klog.V(2).Infof("processing %d initEvents of %s (%s) took %v", len(initEvents), objType, c.identifier, processingTime)
klog.V(2).Infof("processing %d initEvents of %s (%s) took %v", initEventCount, objType, c.identifier, processingTime)
}
defer close(c.result)
defer c.Stop()
c.process(ctx, resourceVersion)
}
func (c *cacheWatcher) process(ctx context.Context, resourceVersion uint64) {
// At this point we already start processing incoming watch events.
// However, the init event can still be processed because their serialization
// and sending to the client happens asynchrnously.
// TODO: As describe in the KEP, we would like to estimate that by delaying
// the initialization signal proportionally to the number of events to
// process, but we're leaving this to the tuning phase.
utilflowcontrol.WatchInitialized(ctx)
for {
select {
case event, ok := <-c.input:
@@ -1430,36 +1491,3 @@ func (c *cacheWatcher) process(ctx context.Context, initEvents []*watchCacheEven
}
}
}
type ready struct {
ok bool
c *sync.Cond
}
func newReady() *ready {
return &ready{c: sync.NewCond(&sync.RWMutex{})}
}
func (r *ready) wait() {
r.c.L.Lock()
for !r.ok {
r.c.Wait()
}
r.c.L.Unlock()
}
// TODO: Make check() function more sophisticated, in particular
// allow it to behave as "waitWithTimeout".
func (r *ready) check() bool {
rwMutex := r.c.L.(*sync.RWMutex)
rwMutex.RLock()
defer rwMutex.RUnlock()
return r.ok
}
func (r *ready) set(ok bool) {
r.c.L.Lock()
defer r.c.L.Unlock()
r.ok = ok
r.c.Broadcast()
}

View File

@@ -60,11 +60,20 @@ type serializationsCache map[runtime.Identifier]*serializationResult
// so that each of those is computed exactly once.
//
// cachingObject implements the metav1.Object interface (accessors for
// all metadata fields). However, setters for all fields except from
// SelfLink (which is set lately in the path) are ignored.
// all metadata fields).
type cachingObject struct {
lock sync.RWMutex
// deepCopied defines whether the object below has already been
// deep copied. The operation is performed lazily on the first
// setXxx operation.
//
// The lazy deep-copy make is useful, as effectively the only
// case when we are setting some fields are ResourceVersion for
// DELETE events, so in all other cases we can effectively avoid
// performing any deep copies.
deepCopied bool
// Object for which serializations are cached.
object metaRuntimeInterface
@@ -80,7 +89,10 @@ type cachingObject struct {
// metav1.Object type.
func newCachingObject(object runtime.Object) (*cachingObject, error) {
if obj, ok := object.(metaRuntimeInterface); ok {
result := &cachingObject{object: obj.DeepCopyObject().(metaRuntimeInterface)}
result := &cachingObject{
object: obj,
deepCopied: false,
}
result.serializations.Store(make(serializationsCache))
return result, nil
}
@@ -125,6 +137,10 @@ func (o *cachingObject) CacheEncode(id runtime.Identifier, encode func(runtime.O
result := o.getSerializationResult(id)
result.once.Do(func() {
buffer := bytes.NewBuffer(nil)
// TODO(wojtek-t): This is currently making a copy to avoid races
// in cases where encoding is making subtle object modifications,
// e.g. #82497
// Figure out if we can somehow avoid this under some conditions.
result.err = encode(o.GetObject(), buffer)
result.raw = buffer.Bytes()
})
@@ -157,7 +173,9 @@ func (o *cachingObject) DeepCopyObject() runtime.Object {
// DeepCopyObject on cachingObject is not expected to be called anywhere.
// However, to be on the safe-side, we implement it, though given the
// cache is only an optimization we ignore copying it.
result := &cachingObject{}
result := &cachingObject{
deepCopied: true,
}
result.serializations.Store(make(serializationsCache))
o.lock.RLock()
@@ -215,6 +233,10 @@ func (o *cachingObject) conditionalSet(isNoop func() bool, set func()) {
if isNoop() {
return
}
if !o.deepCopied {
o.object = o.object.DeepCopyObject().(metaRuntimeInterface)
o.deepCopied = true
}
o.invalidateCacheLocked()
set()
}
@@ -373,17 +395,6 @@ func (o *cachingObject) SetOwnerReferences(references []metav1.OwnerReference) {
func() { o.object.SetOwnerReferences(references) },
)
}
func (o *cachingObject) GetClusterName() string {
o.lock.RLock()
defer o.lock.RUnlock()
return o.object.GetClusterName()
}
func (o *cachingObject) SetClusterName(clusterName string) {
o.conditionalSet(
func() bool { return o.object.GetClusterName() == clusterName },
func() { o.object.SetClusterName(clusterName) },
)
}
func (o *cachingObject) GetManagedFields() []metav1.ManagedFieldsEntry {
o.lock.RLock()
defer o.lock.RUnlock()

View File

@@ -1,95 +0,0 @@
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cacher
import (
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
)
/*
* By default, all the following metrics are defined as falling under
* ALPHA stability level https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/1209-metrics-stability/20190404-kubernetes-control-plane-metrics-stability.md#stability-classes)
*
* Promoting the stability level of the metric is a responsibility of the component owner, since it
* involves explicitly acknowledging support for the metric across multiple releases, in accordance with
* the metric stability policy.
*/
var (
initCounter = metrics.NewCounterVec(
&metrics.CounterOpts{
Name: "apiserver_init_events_total",
Help: "Counter of init events processed in watchcache broken by resource type.",
StabilityLevel: metrics.ALPHA,
},
[]string{"resource"},
)
terminatedWatchersCounter = metrics.NewCounterVec(
&metrics.CounterOpts{
Name: "apiserver_terminated_watchers_total",
Help: "Counter of watchers closed due to unresponsiveness broken by resource type.",
StabilityLevel: metrics.ALPHA,
},
[]string{"resource"},
)
watchCacheCapacityIncreaseTotal = metrics.NewCounterVec(
&metrics.CounterOpts{
Name: "watch_cache_capacity_increase_total",
Help: "Total number of watch cache capacity increase events broken by resource type.",
StabilityLevel: metrics.ALPHA,
},
[]string{"resource"},
)
watchCacheCapacityDecreaseTotal = metrics.NewCounterVec(
&metrics.CounterOpts{
Name: "watch_cache_capacity_decrease_total",
Help: "Total number of watch cache capacity decrease events broken by resource type.",
StabilityLevel: metrics.ALPHA,
},
[]string{"resource"},
)
watchCacheCapacity = metrics.NewGaugeVec(
&metrics.GaugeOpts{
Name: "watch_cache_capacity",
Help: "Total capacity of watch cache broken by resource type.",
StabilityLevel: metrics.ALPHA,
},
[]string{"resource"},
)
)
func init() {
legacyregistry.MustRegister(initCounter)
legacyregistry.MustRegister(terminatedWatchersCounter)
legacyregistry.MustRegister(watchCacheCapacityIncreaseTotal)
legacyregistry.MustRegister(watchCacheCapacityDecreaseTotal)
legacyregistry.MustRegister(watchCacheCapacity)
}
// recordsWatchCacheCapacityChange record watchCache capacity resize(increase or decrease) operations.
func recordsWatchCacheCapacityChange(objType string, old, new int) {
if old < new {
watchCacheCapacityIncreaseTotal.WithLabelValues(objType).Inc()
return
}
watchCacheCapacityDecreaseTotal.WithLabelValues(objType).Inc()
watchCacheCapacity.WithLabelValues(objType).Set(float64(new))
}

View File

@@ -0,0 +1,8 @@
# See the OWNERS docs at https://go.k8s.io/owners
approvers:
- sig-instrumentation-approvers
reviewers:
- sig-instrumentation-reviewers
labels:
- sig/instrumentation

View File

@@ -0,0 +1,174 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"sync"
compbasemetrics "k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
)
const (
namespace = "apiserver"
subsystem = "watch_cache"
)
/*
* By default, all the following metrics are defined as falling under
* ALPHA stability level https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/1209-metrics-stability/kubernetes-control-plane-metrics-stability.md#stability-classes)
*
* Promoting the stability level of the metric is a responsibility of the component owner, since it
* involves explicitly acknowledging support for the metric across multiple releases, in accordance with
* the metric stability policy.
*/
var (
listCacheCount = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Namespace: namespace,
Name: "cache_list_total",
Help: "Number of LIST requests served from watch cache",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"resource_prefix", "index"},
)
listCacheNumFetched = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Namespace: namespace,
Name: "cache_list_fetched_objects_total",
Help: "Number of objects read from watch cache in the course of serving a LIST request",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"resource_prefix", "index"},
)
listCacheNumReturned = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Namespace: namespace,
Name: "cache_list_returned_objects_total",
Help: "Number of objects returned for a LIST request from watch cache",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"resource_prefix"},
)
InitCounter = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Namespace: namespace,
Name: "init_events_total",
Help: "Counter of init events processed in watch cache broken by resource type.",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"resource"},
)
EventsCounter = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "events_dispatched_total",
Help: "Counter of events dispatched in watch cache broken by resource type.",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"resource"},
)
TerminatedWatchersCounter = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Namespace: namespace,
Name: "terminated_watchers_total",
Help: "Counter of watchers closed due to unresponsiveness broken by resource type.",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"resource"},
)
watchCacheCapacityIncreaseTotal = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Subsystem: subsystem,
Name: "capacity_increase_total",
Help: "Total number of watch cache capacity increase events broken by resource type.",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"resource"},
)
watchCacheCapacityDecreaseTotal = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Subsystem: subsystem,
Name: "capacity_decrease_total",
Help: "Total number of watch cache capacity decrease events broken by resource type.",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"resource"},
)
WatchCacheCapacity = compbasemetrics.NewGaugeVec(
&compbasemetrics.GaugeOpts{
Subsystem: subsystem,
Name: "capacity",
Help: "Total capacity of watch cache broken by resource type.",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"resource"},
)
WatchCacheInitializations = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "initializations_total",
Help: "Counter of watch cache initializations broken by resource type.",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"resource"},
)
)
var registerMetrics sync.Once
// Register all metrics.
func Register() {
// Register the metrics.
registerMetrics.Do(func() {
legacyregistry.MustRegister(listCacheCount)
legacyregistry.MustRegister(listCacheNumFetched)
legacyregistry.MustRegister(listCacheNumReturned)
legacyregistry.MustRegister(InitCounter)
legacyregistry.MustRegister(EventsCounter)
legacyregistry.MustRegister(TerminatedWatchersCounter)
legacyregistry.MustRegister(watchCacheCapacityIncreaseTotal)
legacyregistry.MustRegister(watchCacheCapacityDecreaseTotal)
legacyregistry.MustRegister(WatchCacheCapacity)
legacyregistry.MustRegister(WatchCacheInitializations)
})
}
// RecordListCacheMetrics notes various metrics of the cost to serve a LIST request
func RecordListCacheMetrics(resourcePrefix, indexName string, numFetched, numReturned int) {
listCacheCount.WithLabelValues(resourcePrefix, indexName).Inc()
listCacheNumFetched.WithLabelValues(resourcePrefix, indexName).Add(float64(numFetched))
listCacheNumReturned.WithLabelValues(resourcePrefix).Add(float64(numReturned))
}
// RecordsWatchCacheCapacityChange record watchCache capacity resize(increase or decrease) operations.
func RecordsWatchCacheCapacityChange(objType string, old, new int) {
WatchCacheCapacity.WithLabelValues(objType).Set(float64(new))
if old < new {
WatchCacheCapacity.WithLabelValues(objType).Inc()
return
}
watchCacheCapacityDecreaseTotal.WithLabelValues(objType).Inc()
}

96
vendor/k8s.io/apiserver/pkg/storage/cacher/ready.go generated vendored Normal file
View File

@@ -0,0 +1,96 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cacher
import (
"fmt"
"sync"
)
type status int
const (
Pending status = iota
Ready
Stopped
)
// ready is a three state condition variable that blocks until is Ready if is not Stopped.
// Its initial state is Pending.
type ready struct {
state status
c *sync.Cond
}
func newReady() *ready {
return &ready{
c: sync.NewCond(&sync.RWMutex{}),
state: Pending,
}
}
// wait blocks until it is Ready or Stopped, it returns an error if is Stopped.
func (r *ready) wait() error {
r.c.L.Lock()
defer r.c.L.Unlock()
for r.state == Pending {
r.c.Wait()
}
switch r.state {
case Ready:
return nil
case Stopped:
return fmt.Errorf("apiserver cacher is stopped")
default:
return fmt.Errorf("unexpected apiserver cache state: %v", r.state)
}
}
// check returns true only if it is Ready.
func (r *ready) check() bool {
// TODO: Make check() function more sophisticated, in particular
// allow it to behave as "waitWithTimeout".
rwMutex := r.c.L.(*sync.RWMutex)
rwMutex.RLock()
defer rwMutex.RUnlock()
return r.state == Ready
}
// set the state to Pending (false) or Ready (true), it does not have effect if the state is Stopped.
func (r *ready) set(ok bool) {
r.c.L.Lock()
defer r.c.L.Unlock()
if r.state == Stopped {
return
}
if ok {
r.state = Ready
} else {
r.state = Pending
}
r.c.Broadcast()
}
// stop the condition variable and set it as Stopped. This state is irreversible.
func (r *ready) stop() {
r.c.L.Lock()
defer r.c.L.Unlock()
if r.state != Stopped {
r.state = Stopped
r.c.Broadcast()
}
}

View File

@@ -19,6 +19,8 @@ package cacher
import (
"sync"
"time"
"k8s.io/utils/clock"
)
const (
@@ -28,13 +30,14 @@ const (
// timeBudget implements a budget of time that you can use and is
// periodically being refreshed. The pattern to use it is:
// budget := newTimeBudget(...)
// ...
// timeout := budget.takeAvailable()
// // Now you can spend at most timeout on doing stuff
// ...
// // If you didn't use all timeout, return what you didn't use
// budget.returnUnused(<unused part of timeout>)
//
// budget := newTimeBudget(...)
// ...
// timeout := budget.takeAvailable()
// // Now you can spend at most timeout on doing stuff
// ...
// // If you didn't use all timeout, return what you didn't use
// budget.returnUnused(<unused part of timeout>)
//
// NOTE: It's not recommended to be used concurrently from multiple threads -
// if first user takes the whole timeout, the second one will get 0 timeout
@@ -46,42 +49,39 @@ type timeBudget interface {
type timeBudgetImpl struct {
sync.Mutex
budget time.Duration
refresh time.Duration
clock clock.Clock
budget time.Duration
maxBudget time.Duration
refresh time.Duration
// last store last access time
last time.Time
}
func newTimeBudget(stopCh <-chan struct{}) timeBudget {
func newTimeBudget() timeBudget {
result := &timeBudgetImpl{
clock: clock.RealClock{},
budget: time.Duration(0),
refresh: refreshPerSecond,
maxBudget: maxBudget,
}
go result.periodicallyRefresh(stopCh)
result.last = result.clock.Now()
return result
}
func (t *timeBudgetImpl) periodicallyRefresh(stopCh <-chan struct{}) {
ticker := time.NewTicker(time.Second)
defer ticker.Stop()
for {
select {
case <-ticker.C:
t.Lock()
if t.budget = t.budget + t.refresh; t.budget > t.maxBudget {
t.budget = t.maxBudget
}
t.Unlock()
case <-stopCh:
return
}
}
}
func (t *timeBudgetImpl) takeAvailable() time.Duration {
t.Lock()
defer t.Unlock()
// budget accumulated since last access
now := t.clock.Now()
acc := now.Sub(t.last).Seconds() * t.refresh.Seconds()
if acc < 0 {
acc = 0
}
// update current budget and store the current time
if t.budget = t.budget + time.Duration(acc*1e9); t.budget > t.maxBudget {
t.budget = t.maxBudget
}
t.last = now
result := t.budget
t.budget = time.Duration(0)
return result
@@ -94,6 +94,8 @@ func (t *timeBudgetImpl) returnUnused(unused time.Duration) {
// We used more than allowed.
return
}
// add the unused time directly to the budget
// takeAvailable() will take into account the elapsed time
if t.budget = t.budget + unused; t.budget > t.maxBudget {
t.budget = t.maxBudget
}

View File

@@ -18,6 +18,7 @@ package cacher
import (
"fmt"
"math"
"reflect"
"sort"
"sync"
@@ -27,11 +28,12 @@ import (
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/clock"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/apiserver/pkg/storage"
"k8s.io/apiserver/pkg/storage/cacher/metrics"
"k8s.io/client-go/tools/cache"
"k8s.io/klog/v2"
"k8s.io/utils/clock"
utiltrace "k8s.io/utils/trace"
)
@@ -189,6 +191,9 @@ type watchCache struct {
// cacher's objectType.
objectType reflect.Type
// For testing cache interval invalidation.
indexValidator indexValidator
}
func newWatchCache(
@@ -217,8 +222,10 @@ func newWatchCache(
objectType: objectType,
}
objType := objectType.String()
watchCacheCapacity.WithLabelValues(objType).Set(float64(wc.capacity))
metrics.WatchCacheCapacity.WithLabelValues(objType).Set(float64(wc.capacity))
wc.cond = sync.NewCond(wc.RLocker())
wc.indexValidator = wc.isIndexValidLocked
return wc
}
@@ -380,7 +387,7 @@ func (w *watchCache) doCacheResizeLocked(capacity int) {
newCache[i%capacity] = w.cache[i%w.capacity]
}
w.cache = newCache
recordsWatchCacheCapacityChange(w.objectType.String(), w.capacity, capacity)
metrics.RecordsWatchCacheCapacityChange(w.objectType.String(), w.capacity, capacity)
w.capacity = capacity
}
@@ -420,17 +427,27 @@ func (w *watchCache) List() []interface{} {
// You HAVE TO explicitly call w.RUnlock() after this function.
func (w *watchCache) waitUntilFreshAndBlock(resourceVersion uint64, trace *utiltrace.Trace) error {
startTime := w.clock.Now()
go func() {
// Wake us up when the time limit has expired. The docs
// promise that time.After (well, NewTimer, which it calls)
// will wait *at least* the duration given. Since this go
// routine starts sometime after we record the start time, and
// it will wake up the loop below sometime after the broadcast,
// we don't need to worry about waking it up before the time
// has expired accidentally.
<-w.clock.After(blockTimeout)
w.cond.Broadcast()
}()
// In case resourceVersion is 0, we accept arbitrarily stale result.
// As a result, the condition in the below for loop will never be
// satisfied (w.resourceVersion is never negative), this call will
// never hit the w.cond.Wait().
// As a result - we can optimize the code by not firing the wakeup
// function (and avoid starting a gorotuine), especially given that
// resourceVersion=0 is the most common case.
if resourceVersion > 0 {
go func() {
// Wake us up when the time limit has expired. The docs
// promise that time.After (well, NewTimer, which it calls)
// will wait *at least* the duration given. Since this go
// routine starts sometime after we record the start time, and
// it will wake up the loop below sometime after the broadcast,
// we don't need to worry about waking it up before the time
// has expired accidentally.
<-w.clock.After(blockTimeout)
w.cond.Broadcast()
}()
}
w.RLock()
if trace != nil {
@@ -449,12 +466,13 @@ func (w *watchCache) waitUntilFreshAndBlock(resourceVersion uint64, trace *utilt
return nil
}
// WaitUntilFreshAndList returns list of pointers to <storeElement> objects.
func (w *watchCache) WaitUntilFreshAndList(resourceVersion uint64, matchValues []storage.MatchValue, trace *utiltrace.Trace) ([]interface{}, uint64, error) {
// WaitUntilFreshAndList returns list of pointers to `storeElement` objects along
// with their ResourceVersion and the name of the index, if any, that was used.
func (w *watchCache) WaitUntilFreshAndList(resourceVersion uint64, matchValues []storage.MatchValue, trace *utiltrace.Trace) ([]interface{}, uint64, string, error) {
err := w.waitUntilFreshAndBlock(resourceVersion, trace)
defer w.RUnlock()
if err != nil {
return nil, 0, err
return nil, 0, "", err
}
// This isn't the place where we do "final filtering" - only some "prefiltering" is happening here. So the only
@@ -463,10 +481,10 @@ func (w *watchCache) WaitUntilFreshAndList(resourceVersion uint64, matchValues [
// TODO: if multiple indexes match, return the one with the fewest items, so as to do as much filtering as possible.
for _, matchValue := range matchValues {
if result, err := w.store.ByIndex(matchValue.IndexName, matchValue.Value); err == nil {
return result, w.resourceVersion, nil
return result, w.resourceVersion, matchValue.IndexName, nil
}
}
return w.store.List(), w.resourceVersion, nil
return w.store.List(), w.resourceVersion, "", nil
}
// WaitUntilFreshAndGet returns a pointers to <storeElement> object.
@@ -557,7 +575,74 @@ func (w *watchCache) SetOnReplace(onReplace func()) {
w.onReplace = onReplace
}
func (w *watchCache) GetAllEventsSinceThreadUnsafe(resourceVersion uint64) ([]*watchCacheEvent, error) {
func (w *watchCache) Resync() error {
// Nothing to do
return nil
}
func (w *watchCache) currentCapacity() int {
w.Lock()
defer w.Unlock()
return w.capacity
}
const (
// minWatchChanSize is the min size of channels used by the watch.
// We keep that set to 10 for "backward compatibility" until we
// convince ourselves based on some metrics that decreasing is safe.
minWatchChanSize = 10
// maxWatchChanSizeWithIndexAndTriger is the max size of the channel
// used by the watch using the index and trigger selector.
maxWatchChanSizeWithIndexAndTrigger = 10
// maxWatchChanSizeWithIndexWithoutTrigger is the max size of the channel
// used by the watch using the index but without triggering selector.
// We keep that set to 1000 for "backward compatibility", until we
// convinced ourselves based on some metrics that decreasing is safe.
maxWatchChanSizeWithIndexWithoutTrigger = 1000
// maxWatchChanSizeWithoutIndex is the max size of the channel
// used by the watch not using the index.
// TODO(wojtek-t): Figure out if the value shouldn't be higher.
maxWatchChanSizeWithoutIndex = 100
)
func (w *watchCache) suggestedWatchChannelSize(indexExists, triggerUsed bool) int {
// To estimate the channel size we use a heuristic that a channel
// should roughly be able to keep one second of history.
// We don't have an exact data, but given we store updates from
// the last <eventFreshDuration>, we approach it by dividing the
// capacity by the length of the history window.
chanSize := int(math.Ceil(float64(w.currentCapacity()) / eventFreshDuration.Seconds()))
// Finally we adjust the size to avoid ending with too low or
// to large values.
if chanSize < minWatchChanSize {
chanSize = minWatchChanSize
}
var maxChanSize int
switch {
case indexExists && triggerUsed:
maxChanSize = maxWatchChanSizeWithIndexAndTrigger
case indexExists && !triggerUsed:
maxChanSize = maxWatchChanSizeWithIndexWithoutTrigger
case !indexExists:
maxChanSize = maxWatchChanSizeWithoutIndex
}
if chanSize > maxChanSize {
chanSize = maxChanSize
}
return chanSize
}
// isIndexValidLocked checks if a given index is still valid.
// This assumes that the lock is held.
func (w *watchCache) isIndexValidLocked(index int) bool {
return index >= w.startIndex
}
// getAllEventsSinceLocked returns a watchCacheInterval that can be used to
// retrieve events since a certain resourceVersion. This function assumes to
// be called under the watchCache lock.
func (w *watchCache) getAllEventsSinceLocked(resourceVersion uint64) (*watchCacheInterval, error) {
size := w.endIndex - w.startIndex
var oldest uint64
switch {
@@ -583,27 +668,11 @@ func (w *watchCache) GetAllEventsSinceThreadUnsafe(resourceVersion uint64) ([]*w
// current state and only then start watching from that point.
//
// TODO: In v2 api, we should stop returning the current state - #13969.
allItems := w.store.List()
result := make([]*watchCacheEvent, len(allItems))
for i, item := range allItems {
elem, ok := item.(*storeElement)
if !ok {
return nil, fmt.Errorf("not a storeElement: %v", elem)
}
objLabels, objFields, err := w.getAttrsFunc(elem.Object)
if err != nil {
return nil, err
}
result[i] = &watchCacheEvent{
Type: watch.Added,
Object: elem.Object,
ObjLabels: objLabels,
ObjFields: objFields,
Key: elem.Key,
ResourceVersion: w.resourceVersion,
}
ci, err := newCacheIntervalFromStore(w.resourceVersion, w.store, w.getAttrsFunc)
if err != nil {
return nil, err
}
return result, nil
return ci, nil
}
if resourceVersion < oldest-1 {
return nil, errors.NewResourceExpired(fmt.Sprintf("too old resource version: %d (%d)", resourceVersion, oldest-1))
@@ -614,20 +683,9 @@ func (w *watchCache) GetAllEventsSinceThreadUnsafe(resourceVersion uint64) ([]*w
return w.cache[(w.startIndex+i)%w.capacity].ResourceVersion > resourceVersion
}
first := sort.Search(size, f)
result := make([]*watchCacheEvent, size-first)
for i := 0; i < size-first; i++ {
result[i] = w.cache[(w.startIndex+first+i)%w.capacity]
indexerFunc := func(i int) *watchCacheEvent {
return w.cache[i%w.capacity]
}
return result, nil
}
func (w *watchCache) GetAllEventsSince(resourceVersion uint64) ([]*watchCacheEvent, error) {
w.RLock()
defer w.RUnlock()
return w.GetAllEventsSinceThreadUnsafe(resourceVersion)
}
func (w *watchCache) Resync() error {
// Nothing to do
return nil
ci := newCacheInterval(w.startIndex+first, w.endIndex, indexerFunc, w.indexValidator, &w.RWMutex)
return ci, nil
}

View File

@@ -0,0 +1,226 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cacher
import (
"fmt"
"sync"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/tools/cache"
)
// watchCacheInterval serves as an abstraction over a source
// of watchCacheEvents. It maintains a window of events over
// an underlying source and these events can be served using
// the exposed Next() API. The main intent for doing things
// this way is to introduce an upper bound of memory usage
// for starting a watch and reduce the maximum possible time
// interval for which the lock would be held while events are
// copied over.
//
// The source of events for the interval is typically either
// the watchCache circular buffer, if events being retrieved
// need to be for resource versions > 0 or the underlying
// implementation of Store, if resource version = 0.
//
// Furthermore, an interval can be either valid or invalid at
// any given point of time. The notion of validity makes sense
// only in cases where the window of events in the underlying
// source can change over time - i.e. for watchCache circular
// buffer. When the circular buffer is full and an event needs
// to be popped off, watchCache::startIndex is incremented. In
// this case, an interval tracking that popped event is valid
// only if it has already been copied to its internal buffer.
// However, for efficiency we perform that lazily and we mark
// an interval as invalid iff we need to copy events from the
// watchCache and we end up needing events that have already
// been popped off. This translates to the following condition:
//
// watchCacheInterval::startIndex >= watchCache::startIndex.
//
// When this condition becomes false, the interval is no longer
// valid and should not be used to retrieve and serve elements
// from the underlying source.
type watchCacheInterval struct {
// startIndex denotes the starting point of the interval
// being considered. The value is the index in the actual
// source of watchCacheEvents. If the source of events is
// the watchCache, then this must be used modulo capacity.
startIndex int
// endIndex denotes the ending point of the interval being
// considered. The value is the index in the actual source
// of events. If the source of the events is the watchCache,
// then this should be used modulo capacity.
endIndex int
// indexer is meant to inject behaviour for how an event must
// be retrieved from the underlying source given an index.
indexer indexerFunc
// indexValidator is used to check if a given index is still
// valid perspective. If it is deemed that the index is not
// valid, then this interval can no longer be used to serve
// events. Use of indexValidator is warranted only in cases
// where the window of events in the underlying source can
// change over time. Furthermore, an interval is invalid if
// its startIndex no longer coincides with the startIndex of
// underlying source.
indexValidator indexValidator
// buffer holds watchCacheEvents that this interval returns on
// a call to Next(). This exists mainly to reduce acquiring the
// lock on each invocation of Next().
buffer *watchCacheIntervalBuffer
// lock effectively protects access to the underlying source
// of events through - indexer and indexValidator.
//
// Given that indexer and indexValidator only read state, if
// possible, Locker obtained through RLocker() is provided.
lock sync.Locker
}
type attrFunc func(runtime.Object) (labels.Set, fields.Set, error)
type indexerFunc func(int) *watchCacheEvent
type indexValidator func(int) bool
func newCacheInterval(startIndex, endIndex int, indexer indexerFunc, indexValidator indexValidator, locker sync.Locker) *watchCacheInterval {
return &watchCacheInterval{
startIndex: startIndex,
endIndex: endIndex,
indexer: indexer,
indexValidator: indexValidator,
buffer: &watchCacheIntervalBuffer{buffer: make([]*watchCacheEvent, bufferSize)},
lock: locker,
}
}
// newCacheIntervalFromStore is meant to handle the case of rv=0, such that the events
// returned by Next() need to be events from a List() done on the underlying store of
// the watch cache.
func newCacheIntervalFromStore(resourceVersion uint64, store cache.Indexer, getAttrsFunc attrFunc) (*watchCacheInterval, error) {
buffer := &watchCacheIntervalBuffer{}
allItems := store.List()
buffer.buffer = make([]*watchCacheEvent, len(allItems))
for i, item := range allItems {
elem, ok := item.(*storeElement)
if !ok {
return nil, fmt.Errorf("not a storeElement: %v", elem)
}
objLabels, objFields, err := getAttrsFunc(elem.Object)
if err != nil {
return nil, err
}
buffer.buffer[i] = &watchCacheEvent{
Type: watch.Added,
Object: elem.Object,
ObjLabels: objLabels,
ObjFields: objFields,
Key: elem.Key,
ResourceVersion: resourceVersion,
}
buffer.endIndex++
}
ci := &watchCacheInterval{
startIndex: 0,
// Simulate that we already have all the events we're looking for.
endIndex: 0,
buffer: buffer,
}
return ci, nil
}
// Next returns the next item in the cache interval provided the cache
// interval is still valid. An error is returned if the interval is
// invalidated.
func (wci *watchCacheInterval) Next() (*watchCacheEvent, error) {
// if there are items in the buffer to return, return from
// the buffer.
if event, exists := wci.buffer.next(); exists {
return event, nil
}
// check if there are still other events in this interval
// that can be processed.
if wci.startIndex >= wci.endIndex {
return nil, nil
}
wci.lock.Lock()
defer wci.lock.Unlock()
if valid := wci.indexValidator(wci.startIndex); !valid {
return nil, fmt.Errorf("cache interval invalidated, interval startIndex: %d", wci.startIndex)
}
wci.fillBuffer()
if event, exists := wci.buffer.next(); exists {
return event, nil
}
return nil, nil
}
func (wci *watchCacheInterval) fillBuffer() {
wci.buffer.startIndex = 0
wci.buffer.endIndex = 0
for wci.startIndex < wci.endIndex && !wci.buffer.isFull() {
event := wci.indexer(wci.startIndex)
if event == nil {
break
}
wci.buffer.buffer[wci.buffer.endIndex] = event
wci.buffer.endIndex++
wci.startIndex++
}
}
const bufferSize = 100
// watchCacheIntervalBuffer is used to reduce acquiring
// the lock on each invocation of watchCacheInterval.Next().
type watchCacheIntervalBuffer struct {
// buffer is used to hold watchCacheEvents that
// the interval returns on a call to Next().
buffer []*watchCacheEvent
// The first element of buffer is defined by startIndex,
// its last element is defined by endIndex.
startIndex int
endIndex int
}
// next returns the next event present in the interval buffer provided
// it is not empty.
func (wcib *watchCacheIntervalBuffer) next() (*watchCacheEvent, bool) {
if wcib.isEmpty() {
return nil, false
}
next := wcib.buffer[wcib.startIndex]
wcib.startIndex++
return next, true
}
func (wcib *watchCacheIntervalBuffer) isFull() bool {
return wcib.endIndex >= bufferSize
}
func (wcib *watchCacheIntervalBuffer) isEmpty() bool {
return wcib.startIndex == wcib.endIndex
}