Upgrade k8s package verison (#5358)

* upgrade k8s package version Signed-off-by: hongzhouzi <hongzhouzi@kubesphere.io> * Script upgrade and code formatting. Signed-off-by: hongzhouzi <hongzhouzi@kubesphere.io> Signed-off-by: hongzhouzi <hongzhouzi@kubesphere.io>
2022-11-15 14:56:38 +08:00
parent 5f91c1663a
commit 44167aa47a
3106 changed files with 321340 additions and 172080 deletions
--- a/vendor/k8s.io/apiserver/pkg/storage/cacher/cacher.go
+++ b/vendor/k8s.io/apiserver/pkg/storage/cacher/cacher.go
@@ -31,20 +31,23 @@ import (
 	"k8s.io/apimachinery/pkg/fields"
 	"k8s.io/apimachinery/pkg/labels"
 	"k8s.io/apimachinery/pkg/runtime"
-	"k8s.io/apimachinery/pkg/util/clock"
 	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
 	"k8s.io/apimachinery/pkg/util/wait"
 	"k8s.io/apimachinery/pkg/watch"
+	endpointsrequest "k8s.io/apiserver/pkg/endpoints/request"
 	"k8s.io/apiserver/pkg/features"
 	"k8s.io/apiserver/pkg/storage"
+	"k8s.io/apiserver/pkg/storage/cacher/metrics"
 	utilfeature "k8s.io/apiserver/pkg/util/feature"
+	utilflowcontrol "k8s.io/apiserver/pkg/util/flowcontrol"
 	"k8s.io/client-go/tools/cache"
 	"k8s.io/klog/v2"
+	"k8s.io/utils/clock"
 	utiltrace "k8s.io/utils/trace"
 )

 var (
-	emptyFunc = func() {}
+	emptyFunc = func(bool) {}
 )

 const (
@@ -144,6 +147,10 @@ func (i *indexedWatchers) deleteWatcher(number int, value string, supported bool
 }

 func (i *indexedWatchers) terminateAll(objectType reflect.Type, done func(*cacheWatcher)) {
+	// note that we don't have to call setDrainInputBufferLocked method on the watchers
+	// because we take advantage of the default value - stop immediately
+	// also watchers that have had already its draining strategy set
+	// are no longer available (they were removed from the allWatchers and the valueWatchers maps)
 	if len(i.allWatchers) > 0 || len(i.valueWatchers) > 0 {
 		klog.Warningf("Terminating all watchers from cacher %v", objectType)
 	}
@@ -180,6 +187,10 @@ func newTimeBucketWatchers(clock clock.Clock, bookmarkFrequency time.Duration) *
 // adds a watcher to the bucket, if the deadline is before the start, it will be
 // added to the first one.
 func (t *watcherBookmarkTimeBuckets) addWatcher(w *cacheWatcher) bool {
+	// note that the returned time can be before t.createTime,
+	// especially in cases when the nextBookmarkTime method
+	// give us the zero value of type Time
+	// so buckedID can hold a negative value
 	nextTime, ok := w.nextBookmarkTime(t.clock.Now(), t.bookmarkFrequency)
 	if !ok {
 		return false
@@ -190,7 +201,7 @@ func (t *watcherBookmarkTimeBuckets) addWatcher(w *cacheWatcher) bool {
 	if bucketID < t.startBucketID {
 		bucketID = t.startBucketID
 	}
-	watchers, _ := t.watchersBuckets[bucketID]
+	watchers := t.watchersBuckets[bucketID]
 	t.watchersBuckets[bucketID] = append(watchers, w)
 	return true
 }
@@ -230,6 +241,8 @@ type Cacher struct {
 	// Incoming events that should be dispatched to watchers.
 	incoming chan watchCacheEvent

+	resourcePrefix string
+
 	sync.RWMutex

 	// Before accessing the cacher's cache, wait for the ready to be ok.
@@ -292,6 +305,8 @@ type Cacher struct {
 	watchersToStop []*cacheWatcher
 	// Maintain a timeout queue to send the bookmark event before the watcher times out.
 	bookmarkWatchers *watcherBookmarkTimeBuckets
+	// expiredBookmarkWatchers is a list of watchers that were expired and need to be schedule for a next bookmark event
+	expiredBookmarkWatchers []*cacheWatcher
 }

 // NewCacherFromConfig creates a new Cacher responsible for servicing WATCH and LIST requests from
@@ -328,6 +343,7 @@ func NewCacherFromConfig(config Config) (*Cacher, error) {
 	}
 	objType := reflect.TypeOf(obj)
 	cacher := &Cacher{
+		resourcePrefix: config.ResourcePrefix,
 		ready:          newReady(),
 		storage:        config.Storage,
 		objectType:     objType,
@@ -341,7 +357,7 @@ func NewCacherFromConfig(config Config) (*Cacher, error) {
 		},
 		// TODO: Figure out the correct value for the buffer size.
 		incoming:              make(chan watchCacheEvent, 100),
-		dispatchTimeoutBudget: newTimeBudget(stopCh),
+		dispatchTimeoutBudget: newTimeBudget(),
 		// We need to (potentially) stop both:
 		// - wait.Until go-routine
 		// - reflector.ListAndWatch
@@ -369,6 +385,10 @@ func NewCacherFromConfig(config Config) (*Cacher, error) {
 	// Configure reflector's pager to for an appropriate pagination chunk size for fetching data from
 	// storage. The pager falls back to full list if paginated list calls fail due to an "Expired" error.
 	reflector.WatchListPageSize = storageWatchListPageSize
+	// When etcd loses leader for 3 cycles, it returns error "no leader".
+	// We don't want to terminate all watchers as recreating all watchers puts high load on api-server.
+	// In most of the cases, leader is reelected within few cycles.
+	reflector.MaxInternalErrorRetryDuration = time.Second * 30

 	cacher.watchCache = watchCache
 	cacher.reflector = reflector
@@ -403,6 +423,7 @@ func (c *Cacher) startCaching(stopChannel <-chan struct{}) {
 		successfulList = true
 		c.ready.set(true)
 		klog.V(1).Infof("cacher (%v): initialized", c.objectType.String())
+		metrics.WatchCacheInitializations.WithLabelValues(c.objectType.String()).Inc()
 	})
 	defer func() {
 		if successfulList {
@@ -456,7 +477,9 @@ func (c *Cacher) Watch(ctx context.Context, key string, opts storage.ListOptions
 		return nil, err
 	}

-	c.ready.wait()
+	if err := c.ready.wait(); err != nil {
+		return nil, errors.NewServiceUnavailable(err.Error())
+	}

 	triggerValue, triggerSupported := "", false
 	if c.indexedTrigger != nil {
@@ -469,21 +492,12 @@ func (c *Cacher) Watch(ctx context.Context, key string, opts storage.ListOptions
 		}
 	}

-	// If there is indexedTrigger defined, but triggerSupported is false,
-	// we can't narrow the amount of events significantly at this point.
-	//
-	// That said, currently indexedTrigger is defined only for couple resources:
-	// Pods, Nodes, Secrets and ConfigMaps and there is only a constant
-	// number of watchers for which triggerSupported is false (excluding those
-	// issued explicitly by users).
-	// Thus, to reduce the risk of those watchers blocking all watchers of a
-	// given resource in the system, we increase the sizes of buffers for them.
-	chanSize := 10
-	if c.indexedTrigger != nil && !triggerSupported {
-		// TODO: We should tune this value and ideally make it dependent on the
-		// number of objects of a given type and/or their churn.
-		chanSize = 1000
-	}
+	// It boils down to a tradeoff between:
+	// - having it as small as possible to reduce memory usage
+	// - having it large enough to ensure that watchers that need to process
+	//   a bunch of changes have enough buffer to avoid from blocking other
+	//   watchers on our watcher having a processing hiccup
+	chanSize := c.watchCache.suggestedWatchChannelSize(c.indexedTrigger != nil, triggerSupported)

 	// Determine watch timeout('0' means deadline is not set, ignore checking)
 	deadline, _ := ctx.Deadline()
@@ -503,7 +517,7 @@ func (c *Cacher) Watch(ctx context.Context, key string, opts storage.ListOptions
 	// underlying watchCache is calling processEvent under its lock.
 	c.watchCache.RLock()
 	defer c.watchCache.RUnlock()
-	initEvents, err := c.watchCache.GetAllEventsSinceThreadUnsafe(watchRV)
+	cacheInterval, err := c.watchCache.getAllEventsSinceLocked(watchRV)
 	if err != nil {
 		// To match the uncached watch implementation, once we have passed authn/authz/admission,
 		// and successfully parsed a resource version, other errors must fail with a watch event of type ERROR,
@@ -511,18 +525,11 @@ func (c *Cacher) Watch(ctx context.Context, key string, opts storage.ListOptions
 		return newErrWatcher(err), nil
 	}

-	// With some events already sent, update resourceVersion so that
-	// events that were buffered and not yet processed won't be delivered
-	// to this watcher second time causing going back in time.
-	if len(initEvents) > 0 {
-		watchRV = initEvents[len(initEvents)-1].ResourceVersion
-	}
-
 	func() {
 		c.Lock()
 		defer c.Unlock()
 		// Update watcher.forget function once we can compute it.
-		watcher.forget = forgetWatcher(c, c.watcherIdx, triggerValue, triggerSupported)
+		watcher.forget = forgetWatcher(c, watcher, c.watcherIdx, triggerValue, triggerSupported)
 		c.watchers.addWatcher(watcher, c.watcherIdx, triggerValue, triggerSupported)

 		// Add it to the queue only when the client support watch bookmarks.
@@ -532,15 +539,10 @@ func (c *Cacher) Watch(ctx context.Context, key string, opts storage.ListOptions
 		c.watcherIdx++
 	}()

-	go watcher.process(ctx, initEvents, watchRV)
+	go watcher.processInterval(ctx, cacheInterval, watchRV)
 	return watcher, nil
 }

-// WatchList implements storage.Interface.
-func (c *Cacher) WatchList(ctx context.Context, key string, opts storage.ListOptions) (watch.Interface, error) {
-	return c.Watch(ctx, key, opts)
-}
-
 // Get implements storage.Interface.
 func (c *Cacher) Get(ctx context.Context, key string, opts storage.GetOptions, objPtr runtime.Object) error {
 	if opts.ResourceVersion == "" {
@@ -565,7 +567,9 @@ func (c *Cacher) Get(ctx context.Context, key string, opts storage.GetOptions, o

 	// Do not create a trace - it's not for free and there are tons
 	// of Get requests. We can add it if it will be really needed.
-	c.ready.wait()
+	if err := c.ready.wait(); err != nil {
+		return errors.NewServiceUnavailable(err.Error())
+	}

 	objVal, err := conversion.EnforcePtr(objPtr)
 	if err != nil {
@@ -592,95 +596,46 @@ func (c *Cacher) Get(ctx context.Context, key string, opts storage.GetOptions, o
 	return nil
 }

-// GetToList implements storage.Interface.
-func (c *Cacher) GetToList(ctx context.Context, key string, opts storage.ListOptions, listObj runtime.Object) error {
+// NOTICE: Keep in sync with shouldListFromStorage function in
+//
+//	staging/src/k8s.io/apiserver/pkg/util/flowcontrol/request/list_work_estimator.go
+func shouldDelegateList(opts storage.ListOptions) bool {
 	resourceVersion := opts.ResourceVersion
 	pred := opts.Predicate
 	pagingEnabled := utilfeature.DefaultFeatureGate.Enabled(features.APIListChunking)
 	hasContinuation := pagingEnabled && len(pred.Continue) > 0
 	hasLimit := pagingEnabled && pred.Limit > 0 && resourceVersion != "0"
-	if resourceVersion == "" || hasContinuation || hasLimit || opts.ResourceVersionMatch == metav1.ResourceVersionMatchExact {
-		// If resourceVersion is not specified, serve it from underlying
-		// storage (for backward compatibility). If a continuation is
-		// requested, serve it from the underlying storage as well.
-		// Limits are only sent to storage when resourceVersion is non-zero
-		// since the watch cache isn't able to perform continuations, and
-		// limits are ignored when resource version is zero
-		return c.storage.GetToList(ctx, key, opts, listObj)
-	}

-	// If resourceVersion is specified, serve it from cache.
-	// It's guaranteed that the returned value is at least that
-	// fresh as the given resourceVersion.
-	listRV, err := c.versioner.ParseResourceVersion(resourceVersion)
-	if err != nil {
-		return err
-	}
-
-	if listRV == 0 && !c.ready.check() {
-		// If Cacher is not yet initialized and we don't require any specific
-		// minimal resource version, simply forward the request to storage.
-		return c.storage.GetToList(ctx, key, opts, listObj)
-	}
-
-	trace := utiltrace.New("cacher list", utiltrace.Field{"type", c.objectType.String()})
-	defer trace.LogIfLong(500 * time.Millisecond)
-
-	c.ready.wait()
-	trace.Step("Ready")
-
-	// List elements with at least 'listRV' from cache.
-	listPtr, err := meta.GetItemsPtr(listObj)
-	if err != nil {
-		return err
-	}
-	listVal, err := conversion.EnforcePtr(listPtr)
-	if err != nil {
-		return err
-	}
-	if listVal.Kind() != reflect.Slice {
-		return fmt.Errorf("need a pointer to slice, got %v", listVal.Kind())
-	}
-	filter := filterWithAttrsFunction(key, pred)
-
-	obj, exists, readResourceVersion, err := c.watchCache.WaitUntilFreshAndGet(listRV, key, trace)
-	if err != nil {
-		return err
-	}
-	trace.Step("Got from cache")
-
-	if exists {
-		elem, ok := obj.(*storeElement)
-		if !ok {
-			return fmt.Errorf("non *storeElement returned from storage: %v", obj)
-		}
-		if filter(elem.Key, elem.Labels, elem.Fields) {
-			listVal.Set(reflect.Append(listVal, reflect.ValueOf(elem.Object).Elem()))
-		}
-	}
-	if c.versioner != nil {
-		if err := c.versioner.UpdateList(listObj, readResourceVersion, "", nil); err != nil {
-			return err
-		}
-	}
-	return nil
+	// If resourceVersion is not specified, serve it from underlying
+	// storage (for backward compatibility). If a continuation is
+	// requested, serve it from the underlying storage as well.
+	// Limits are only sent to storage when resourceVersion is non-zero
+	// since the watch cache isn't able to perform continuations, and
+	// limits are ignored when resource version is zero
+	return resourceVersion == "" || hasContinuation || hasLimit || opts.ResourceVersionMatch == metav1.ResourceVersionMatchExact
 }

-// List implements storage.Interface.
-func (c *Cacher) List(ctx context.Context, key string, opts storage.ListOptions, listObj runtime.Object) error {
+func (c *Cacher) listItems(listRV uint64, key string, pred storage.SelectionPredicate, trace *utiltrace.Trace, recursive bool) ([]interface{}, uint64, string, error) {
+	if !recursive {
+		obj, exists, readResourceVersion, err := c.watchCache.WaitUntilFreshAndGet(listRV, key, trace)
+		if err != nil {
+			return nil, 0, "", err
+		}
+		if exists {
+			return []interface{}{obj}, readResourceVersion, "", nil
+		}
+		return nil, readResourceVersion, "", nil
+	}
+	return c.watchCache.WaitUntilFreshAndList(listRV, pred.MatcherIndex(), trace)
+}
+
+// GetList implements storage.Interface
+func (c *Cacher) GetList(ctx context.Context, key string, opts storage.ListOptions, listObj runtime.Object) error {
+	recursive := opts.Recursive
 	resourceVersion := opts.ResourceVersion
 	pred := opts.Predicate
-	pagingEnabled := utilfeature.DefaultFeatureGate.Enabled(features.APIListChunking)
-	hasContinuation := pagingEnabled && len(pred.Continue) > 0
-	hasLimit := pagingEnabled && pred.Limit > 0 && resourceVersion != "0"
-	if resourceVersion == "" || hasContinuation || hasLimit || opts.ResourceVersionMatch == metav1.ResourceVersionMatchExact {
-		// If resourceVersion is not specified, serve it from underlying
-		// storage (for backward compatibility). If a continuation is
-		// requested, serve it from the underlying storage as well.
-		// Limits are only sent to storage when resourceVersion is non-zero
-		// since the watch cache isn't able to perform continuations, and
-		// limits are ignored when resource version is zero.
-		return c.storage.List(ctx, key, opts, listObj)
+	if shouldDelegateList(opts) {
+		return c.storage.GetList(ctx, key, opts, listObj)
 	}

 	// If resourceVersion is specified, serve it from cache.
@@ -694,13 +649,17 @@ func (c *Cacher) List(ctx context.Context, key string, opts storage.ListOptions,
 	if listRV == 0 && !c.ready.check() {
 		// If Cacher is not yet initialized and we don't require any specific
 		// minimal resource version, simply forward the request to storage.
-		return c.storage.List(ctx, key, opts, listObj)
+		return c.storage.GetList(ctx, key, opts, listObj)
 	}

-	trace := utiltrace.New("cacher list", utiltrace.Field{"type", c.objectType.String()})
+	trace := utiltrace.New("cacher list",
+		utiltrace.Field{"audit-id", endpointsrequest.GetAuditIDTruncated(ctx)},
+		utiltrace.Field{Key: "type", Value: c.objectType.String()})
 	defer trace.LogIfLong(500 * time.Millisecond)

-	c.ready.wait()
+	if err := c.ready.wait(); err != nil {
+		return errors.NewServiceUnavailable(err.Error())
+	}
 	trace.Step("Ready")

 	// List elements with at least 'listRV' from cache.
@@ -717,11 +676,11 @@ func (c *Cacher) List(ctx context.Context, key string, opts storage.ListOptions,
 	}
 	filter := filterWithAttrsFunction(key, pred)

-	objs, readResourceVersion, err := c.watchCache.WaitUntilFreshAndList(listRV, pred.MatcherIndex(), trace)
+	objs, readResourceVersion, indexUsed, err := c.listItems(listRV, key, pred, trace, recursive)
 	if err != nil {
 		return err
 	}
-	trace.Step("Listed items from cache", utiltrace.Field{"count", len(objs)})
+	trace.Step("Listed items from cache", utiltrace.Field{Key: "count", Value: len(objs)})
 	if len(objs) > listVal.Cap() && pred.Label.Empty() && pred.Field.Empty() {
 		// Resize the slice appropriately, since we already know that none
 		// of the elements will be filtered out.
@@ -737,18 +696,19 @@ func (c *Cacher) List(ctx context.Context, key string, opts storage.ListOptions,
 			listVal.Set(reflect.Append(listVal, reflect.ValueOf(elem.Object).Elem()))
 		}
 	}
-	trace.Step("Filtered items", utiltrace.Field{"count", listVal.Len()})
+	trace.Step("Filtered items", utiltrace.Field{Key: "count", Value: listVal.Len()})
 	if c.versioner != nil {
 		if err := c.versioner.UpdateList(listObj, readResourceVersion, "", nil); err != nil {
 			return err
 		}
 	}
+	metrics.RecordListCacheMetrics(c.resourcePrefix, indexUsed, len(objs), listVal.Len())
 	return nil
 }

 // GuaranteedUpdate implements storage.Interface.
 func (c *Cacher) GuaranteedUpdate(
-	ctx context.Context, key string, ptrToType runtime.Object, ignoreNotFound bool,
+	ctx context.Context, key string, destination runtime.Object, ignoreNotFound bool,
 	preconditions *storage.Preconditions, tryUpdate storage.UpdateFunc, _ runtime.Object) error {
 	// Ignore the suggestion and try to pass down the current version of the object
 	// read from cache.
@@ -758,10 +718,10 @@ func (c *Cacher) GuaranteedUpdate(
 		// DeepCopy the object since we modify resource version when serializing the
 		// current object.
 		currObj := elem.(*storeElement).Object.DeepCopyObject()
-		return c.storage.GuaranteedUpdate(ctx, key, ptrToType, ignoreNotFound, preconditions, tryUpdate, currObj)
+		return c.storage.GuaranteedUpdate(ctx, key, destination, ignoreNotFound, preconditions, tryUpdate, currObj)
 	}
 	// If we couldn't get the object, fallback to no-suggestion.
-	return c.storage.GuaranteedUpdate(ctx, key, ptrToType, ignoreNotFound, preconditions, tryUpdate, nil)
+	return c.storage.GuaranteedUpdate(ctx, key, destination, ignoreNotFound, preconditions, tryUpdate, nil)
 }

 // Count implements storage.Interface.
@@ -828,6 +788,7 @@ func (c *Cacher) dispatchEvents() {
 				c.dispatchEvent(&event)
 			}
 			lastProcessedResourceVersion = event.ResourceVersion
+			metrics.EventsCounter.WithLabelValues(c.objectType.String()).Inc()
 		case <-bookmarkTimer.C():
 			bookmarkTimer.Reset(wait.Jitter(time.Second, 0.25))
 			// Never send a bookmark event if we did not see an event here, this is fine
@@ -872,11 +833,11 @@ func setCachingObjects(event *watchCacheEvent, versioner storage.Versioner) {
 		// Don't wrap Object for delete events - these are not to deliver any
 		// events. Only wrap PrevObject.
 		if object, err := newCachingObject(event.PrevObject); err == nil {
-			// Update resource version of the underlying object.
+			// Update resource version of the object.
 			// event.PrevObject is used to deliver DELETE watch events and
 			// for them, we set resourceVersion to <current> instead of
 			// the resourceVersion of the last modification of the object.
-			updateResourceVersionIfNeeded(object.object, versioner, event.ResourceVersion)
+			updateResourceVersion(object, versioner, event.ResourceVersion)
 			event.PrevObject = object
 		} else {
 			klog.Errorf("couldn't create cachingObject from: %#v", event.Object)
@@ -905,14 +866,14 @@ func (c *Cacher) dispatchEvent(event *watchCacheEvent) {
 		// from it justifies increased memory usage, so for now we drop the cached
 		// serializations after dispatching this event.
 		//
-		// Given the deep-copies that are done to create cachingObjects,
-		// we try to cache serializations only if there are at least 3 watchers.
-		if len(c.watchersBuffer) >= 3 {
-			// Make a shallow copy to allow overwriting Object and PrevObject.
-			wcEvent := *event
-			setCachingObjects(&wcEvent, c.versioner)
-			event = &wcEvent
-		}
+		// Given that CachingObject is just wrapping the object and not perfoming
+		// deep-copying (until some field is explicitly being modified), we create
+		// it unconditionally to ensure safety and reduce deep-copying.
+		//
+		// Make a shallow copy to allow overwriting Object and PrevObject.
+		wcEvent := *event
+		setCachingObjects(&wcEvent, c.versioner)
+		event = &wcEvent

 		c.blockedWatchers = c.blockedWatchers[:0]
 		for _, watcher := range c.watchersBuffer {
@@ -928,8 +889,11 @@ func (c *Cacher) dispatchEvent(event *watchCacheEvent) {
 			timeout := c.dispatchTimeoutBudget.takeAvailable()
 			c.timer.Reset(timeout)

-			// Make sure every watcher will try to send event without blocking first,
-			// even if the timer has already expired.
+			// Send event to all blocked watchers. As long as timer is running,
+			// `add` will wait for the watcher to unblock. After timeout,
+			// `add` will not wait, but immediately close a still blocked watcher.
+			// Hence, every watcher gets the chance to unblock itself while timer
+			// is running, not only the first ones in the list.
 			timer := c.timer
 			for _, watcher := range c.blockedWatchers {
 				if !watcher.add(event, timer) {
@@ -950,7 +914,7 @@ func (c *Cacher) dispatchEvent(event *watchCacheEvent) {
 	}
 }

-func (c *Cacher) startDispatchingBookmarkEvents() {
+func (c *Cacher) startDispatchingBookmarkEventsLocked() {
 	// Pop already expired watchers. However, explicitly ignore stopped ones,
 	// as we don't delete watcher from bookmarkWatchers when it is stopped.
 	for _, watchers := range c.bookmarkWatchers.popExpiredWatchers() {
@@ -961,8 +925,7 @@ func (c *Cacher) startDispatchingBookmarkEvents() {
 				continue
 			}
 			c.watchersBuffer = append(c.watchersBuffer, watcher)
-			// Requeue the watcher for the next bookmark if needed.
-			c.bookmarkWatchers.addWatcher(watcher)
+			c.expiredBookmarkWatchers = append(c.expiredBookmarkWatchers, watcher)
 		}
 	}
 }
@@ -987,7 +950,7 @@ func (c *Cacher) startDispatching(event *watchCacheEvent) {
 	c.watchersBuffer = c.watchersBuffer[:0]

 	if event.Type == watch.Bookmark {
-		c.startDispatchingBookmarkEvents()
+		c.startDispatchingBookmarkEventsLocked()
 		// return here to reduce following code indentation and diff
 		return
 	}
@@ -1028,22 +991,31 @@ func (c *Cacher) finishDispatching() {
 	defer c.Unlock()
 	c.dispatching = false
 	for _, watcher := range c.watchersToStop {
-		watcher.stopThreadUnsafe()
+		watcher.stopLocked()
 	}
 	c.watchersToStop = c.watchersToStop[:0]
+
+	for _, watcher := range c.expiredBookmarkWatchers {
+		if watcher.stopped {
+			continue
+		}
+		// requeue the watcher for the next bookmark if needed.
+		c.bookmarkWatchers.addWatcher(watcher)
+	}
+	c.expiredBookmarkWatchers = c.expiredBookmarkWatchers[:0]
 }

 func (c *Cacher) terminateAllWatchers() {
 	c.Lock()
 	defer c.Unlock()
-	c.watchers.terminateAll(c.objectType, c.stopWatcherThreadUnsafe)
+	c.watchers.terminateAll(c.objectType, c.stopWatcherLocked)
 }

-func (c *Cacher) stopWatcherThreadUnsafe(watcher *cacheWatcher) {
+func (c *Cacher) stopWatcherLocked(watcher *cacheWatcher) {
 	if c.dispatching {
 		c.watchersToStop = append(c.watchersToStop, watcher)
 	} else {
-		watcher.stopThreadUnsafe()
+		watcher.stopLocked()
 	}
 }

@@ -1062,20 +1034,23 @@ func (c *Cacher) Stop() {
 		return
 	}
 	c.stopped = true
+	c.ready.stop()
 	c.stopLock.Unlock()
 	close(c.stopCh)
 	c.stopWg.Wait()
 }

-func forgetWatcher(c *Cacher, index int, triggerValue string, triggerSupported bool) func() {
-	return func() {
+func forgetWatcher(c *Cacher, w *cacheWatcher, index int, triggerValue string, triggerSupported bool) func(bool) {
+	return func(drainWatcher bool) {
 		c.Lock()
 		defer c.Unlock()

+		w.setDrainInputBufferLocked(drainWatcher)
+
 		// It's possible that the watcher is already not in the structure (e.g. in case of
-		// simultaneous Stop() and terminateAllWatchers(), but it is safe to call stopThreadUnsafe()
+		// simultaneous Stop() and terminateAllWatchers(), but it is safe to call stopLocked()
 		// on a watcher multiple times.
-		c.watchers.deleteWatcher(index, triggerValue, triggerSupported, c.stopWatcherThreadUnsafe)
+		c.watchers.deleteWatcher(index, triggerValue, triggerSupported, c.stopWatcherLocked)
 	}
 }

@@ -1091,7 +1066,9 @@ func filterWithAttrsFunction(key string, p storage.SelectionPredicate) filterWit

 // LastSyncResourceVersion returns resource version to which the underlying cache is synced.
 func (c *Cacher) LastSyncResourceVersion() (uint64, error) {
-	c.ready.wait()
+	if err := c.ready.wait(); err != nil {
+		return 0, errors.NewServiceUnavailable(err.Error())
+	}

 	resourceVersion := c.reflector.LastSyncResourceVersion()
 	return c.versioner.ParseResourceVersion(resourceVersion)
@@ -1123,7 +1100,12 @@ func (lw *cacherListerWatcher) List(options metav1.ListOptions) (runtime.Object,
 		Continue: options.Continue,
 	}

-	if err := lw.storage.List(context.TODO(), lw.resourcePrefix, storage.ListOptions{ResourceVersionMatch: options.ResourceVersionMatch, Predicate: pred}, list); err != nil {
+	storageOpts := storage.ListOptions{
+		ResourceVersionMatch: options.ResourceVersionMatch,
+		Predicate:            pred,
+		Recursive:            true,
+	}
+	if err := lw.storage.GetList(context.TODO(), lw.resourcePrefix, storageOpts, list); err != nil {
 		return nil, err
 	}
 	return list, nil
@@ -1134,11 +1116,10 @@ func (lw *cacherListerWatcher) Watch(options metav1.ListOptions) (watch.Interfac
 	opts := storage.ListOptions{
 		ResourceVersion: options.ResourceVersion,
 		Predicate:       storage.Everything,
+		Recursive:       true,
+		ProgressNotify:  true,
 	}
-	if utilfeature.DefaultFeatureGate.Enabled(features.EfficientWatchResumption) {
-		opts.ProgressNotify = true
-	}
-	return lw.storage.WatchList(context.TODO(), lw.resourcePrefix, opts)
+	return lw.storage.Watch(context.TODO(), lw.resourcePrefix, opts)
 }

 // errWatcher implements watch.Interface to return a single error
@@ -1189,7 +1170,7 @@ type cacheWatcher struct {
 	done      chan struct{}
 	filter    filterWithAttrsFunc
 	stopped   bool
-	forget    func()
+	forget    func(bool)
 	versioner storage.Versioner
 	// The watcher will be closed by server after the deadline,
 	// save it here to send bookmark events before that.
@@ -1201,9 +1182,13 @@ type cacheWatcher struct {
 	// human readable identifier that helps assigning cacheWatcher
 	// instance with request
 	identifier string
+
+	// drainInputBuffer indicates whether we should delay closing this watcher
+	// and send all event in the input buffer.
+	drainInputBuffer bool
 }

-func newCacheWatcher(chanSize int, filter filterWithAttrsFunc, forget func(), versioner storage.Versioner, deadline time.Time, allowWatchBookmarks bool, objectType reflect.Type, identifier string) *cacheWatcher {
+func newCacheWatcher(chanSize int, filter filterWithAttrsFunc, forget func(bool), versioner storage.Versioner, deadline time.Time, allowWatchBookmarks bool, objectType reflect.Type, identifier string) *cacheWatcher {
 	return &cacheWatcher{
 		input:               make(chan *watchCacheEvent, chanSize),
 		result:              make(chan watch.Event, chanSize),
@@ -1226,16 +1211,29 @@ func (c *cacheWatcher) ResultChan() <-chan watch.Event {

 // Implements watch.Interface.
 func (c *cacheWatcher) Stop() {
-	c.forget()
+	c.forget(false)
 }

-// we rely on the fact that stopThredUnsafe is actually protected by Cacher.Lock()
-func (c *cacheWatcher) stopThreadUnsafe() {
+// we rely on the fact that stopLocked is actually protected by Cacher.Lock()
+func (c *cacheWatcher) stopLocked() {
 	if !c.stopped {
 		c.stopped = true
-		close(c.done)
+		// stop without draining the input channel was requested.
+		if !c.drainInputBuffer {
+			close(c.done)
+		}
 		close(c.input)
 	}
+
+	// Even if the watcher was already stopped, if it previously was
+	// using draining mode and it's not using it now we need to
+	// close the done channel now. Otherwise we could leak the
+	// processing goroutine if it will be trying to put more objects
+	// into result channel, the channel will be full and there will
+	// already be noone on the processing the events on the receiving end.
+	if !c.drainInputBuffer && !c.isDoneChannelClosedLocked() {
+		close(c.done)
+	}
 }

 func (c *cacheWatcher) nonblockingAdd(event *watchCacheEvent) bool {
@@ -1259,8 +1257,8 @@ func (c *cacheWatcher) add(event *watchCacheEvent, timer *time.Timer) bool {
 		// Since we don't want to block on it infinitely,
 		// we simply terminate it.
 		klog.V(1).Infof("Forcing %v watcher close due to unresponsiveness: %v. len(c.input) = %v, len(c.result) = %v", c.objectType.String(), c.identifier, len(c.input), len(c.result))
-		terminatedWatchersCounter.WithLabelValues(c.objectType.String()).Inc()
-		c.forget()
+		metrics.TerminatedWatchersCounter.WithLabelValues(c.objectType.String()).Inc()
+		c.forget(false)
 	}

 	if timer == nil {
@@ -1280,12 +1278,16 @@ func (c *cacheWatcher) add(event *watchCacheEvent, timer *time.Timer) bool {

 func (c *cacheWatcher) nextBookmarkTime(now time.Time, bookmarkFrequency time.Duration) (time.Time, bool) {
 	// We try to send bookmarks:
-	// (a) roughly every minute
-	// (b) right before the watcher timeout - for now we simply set it 2s before
+	//
+	// (a) right before the watcher timeout - for now we simply set it 2s before
 	//     the deadline
-	// The former gives us periodicity if the watch breaks due to unexpected
-	// conditions, the later ensures that on timeout the watcher is as close to
+	//
+	// (b) roughly every minute
+	//
+	// (b) gives us periodicity if the watch breaks due to unexpected
+	// conditions, (a) ensures that on timeout the watcher is as close to
 	// now as possible - this covers 99% of cases.
+
 	heartbeatTime := now.Add(bookmarkFrequency)
 	if c.deadline.IsZero() {
 		// Timeout is set by our client libraries (e.g. reflector) as well as defaulted by
@@ -1302,20 +1304,33 @@ func (c *cacheWatcher) nextBookmarkTime(now time.Time, bookmarkFrequency time.Du
 	return heartbeatTime, true
 }

-func getEventObject(object runtime.Object) runtime.Object {
-	if _, ok := object.(runtime.CacheableObject); ok {
+// setDrainInputBufferLocked if set to true indicates that we should delay closing this watcher
+// until we send all events residing in the input buffer.
+func (c *cacheWatcher) setDrainInputBufferLocked(drain bool) {
+	c.drainInputBuffer = drain
+}
+
+// isDoneChannelClosed checks if c.done channel is closed
+func (c *cacheWatcher) isDoneChannelClosedLocked() bool {
+	select {
+	case <-c.done:
+		return true
+	default:
+	}
+	return false
+}
+
+func getMutableObject(object runtime.Object) runtime.Object {
+	if _, ok := object.(*cachingObject); ok {
 		// It is safe to return without deep-copy, because the underlying
-		// object was already deep-copied during construction.
+		// object will lazily perform deep-copy on the first try to change
+		// any of its fields.
 		return object
 	}
 	return object.DeepCopyObject()
 }

-func updateResourceVersionIfNeeded(object runtime.Object, versioner storage.Versioner, resourceVersion uint64) {
-	if _, ok := object.(*cachingObject); ok {
-		// We assume that for cachingObject resourceVersion was already propagated before.
-		return
-	}
+func updateResourceVersion(object runtime.Object, versioner storage.Versioner, resourceVersion uint64) {
 	if err := versioner.UpdateObject(object, resourceVersion); err != nil {
 		utilruntime.HandleError(fmt.Errorf("failure to version api object (%d) %#v: %v", resourceVersion, object, err))
 	}
@@ -1338,13 +1353,17 @@ func (c *cacheWatcher) convertToWatchEvent(event *watchCacheEvent) *watch.Event

 	switch {
 	case curObjPasses && !oldObjPasses:
-		return &watch.Event{Type: watch.Added, Object: getEventObject(event.Object)}
+		return &watch.Event{Type: watch.Added, Object: getMutableObject(event.Object)}
 	case curObjPasses && oldObjPasses:
-		return &watch.Event{Type: watch.Modified, Object: getEventObject(event.Object)}
+		return &watch.Event{Type: watch.Modified, Object: getMutableObject(event.Object)}
 	case !curObjPasses && oldObjPasses:
 		// return a delete event with the previous object content, but with the event's resource version
-		oldObj := getEventObject(event.PrevObject)
-		updateResourceVersionIfNeeded(oldObj, c.versioner, event.ResourceVersion)
+		oldObj := getMutableObject(event.PrevObject)
+		// We know that if oldObj is cachingObject (which can only be set via
+		// setCachingObjects), its resourceVersion is already set correctly and
+		// we don't need to update it. However, since cachingObject efficiently
+		// handles noop updates, we avoid this microoptimization here.
+		updateResourceVersion(oldObj, c.versioner, event.ResourceVersion)
 		return &watch.Event{Type: watch.Deleted, Object: oldObj}
 	}

@@ -1366,7 +1385,7 @@ func (c *cacheWatcher) sendWatchCacheEvent(event *watchCacheEvent) {
 	// would give us non-determinism.
 	// At the same time, we don't want to block infinitely on putting
 	// to c.result, when c.done is already closed.
-
+	//
 	// This ensures that with c.done already close, we at most once go
 	// into the next select after this. With that, no matter which
 	// statement we choose there, we will deliver only consecutive
@@ -1383,8 +1402,10 @@ func (c *cacheWatcher) sendWatchCacheEvent(event *watchCacheEvent) {
 	}
 }

-func (c *cacheWatcher) process(ctx context.Context, initEvents []*watchCacheEvent, resourceVersion uint64) {
+func (c *cacheWatcher) processInterval(ctx context.Context, cacheInterval *watchCacheInterval, resourceVersion uint64) {
 	defer utilruntime.HandleCrash()
+	defer close(c.result)
+	defer c.Stop()

 	// Check how long we are processing initEvents.
 	// As long as these are not processed, we are not processing
@@ -1401,20 +1422,60 @@ func (c *cacheWatcher) process(ctx context.Context, initEvents []*watchCacheEven
 	// consider increase size of result buffer in those cases.
 	const initProcessThreshold = 500 * time.Millisecond
 	startTime := time.Now()
-	for _, event := range initEvents {
+
+	initEventCount := 0
+	for {
+		event, err := cacheInterval.Next()
+		if err != nil {
+			// An error indicates that the cache interval
+			// has been invalidated and can no longer serve
+			// events.
+			//
+			// Initially we considered sending an "out-of-history"
+			// Error event in this case, but because historically
+			// such events weren't sent out of the watchCache, we
+			// decided not to. This is still ok, because on watch
+			// closure, the watcher will try to re-instantiate the
+			// watch and then will get an explicit "out-of-history"
+			// window. There is potential for optimization, but for
+			// now, in order to be on the safe side and not break
+			// custom clients, the cost of it is something that we
+			// are fully accepting.
+			klog.Warningf("couldn't retrieve watch event to serve: %#v", err)
+			return
+		}
+		if event == nil {
+			break
+		}
 		c.sendWatchCacheEvent(event)
+		// With some events already sent, update resourceVersion so that
+		// events that were buffered and not yet processed won't be delivered
+		// to this watcher second time causing going back in time.
+		resourceVersion = event.ResourceVersion
+		initEventCount++
 	}
+
 	objType := c.objectType.String()
-	if len(initEvents) > 0 {
-		initCounter.WithLabelValues(objType).Add(float64(len(initEvents)))
+	if initEventCount > 0 {
+		metrics.InitCounter.WithLabelValues(objType).Add(float64(initEventCount))
 	}
 	processingTime := time.Since(startTime)
 	if processingTime > initProcessThreshold {
-		klog.V(2).Infof("processing %d initEvents of %s (%s) took %v", len(initEvents), objType, c.identifier, processingTime)
+		klog.V(2).Infof("processing %d initEvents of %s (%s) took %v", initEventCount, objType, c.identifier, processingTime)
 	}

-	defer close(c.result)
-	defer c.Stop()
+	c.process(ctx, resourceVersion)
+}
+
+func (c *cacheWatcher) process(ctx context.Context, resourceVersion uint64) {
+	// At this point we already start processing incoming watch events.
+	// However, the init event can still be processed because their serialization
+	// and sending to the client happens asynchrnously.
+	// TODO: As describe in the KEP, we would like to estimate that by delaying
+	//   the initialization signal proportionally to the number of events to
+	//   process, but we're leaving this to the tuning phase.
+	utilflowcontrol.WatchInitialized(ctx)
+
 	for {
 		select {
 		case event, ok := <-c.input:
@@ -1430,36 +1491,3 @@ func (c *cacheWatcher) process(ctx context.Context, initEvents []*watchCacheEven
 		}
 	}
 }
-
-type ready struct {
-	ok bool
-	c  *sync.Cond
-}
-
-func newReady() *ready {
-	return &ready{c: sync.NewCond(&sync.RWMutex{})}
-}
-
-func (r *ready) wait() {
-	r.c.L.Lock()
-	for !r.ok {
-		r.c.Wait()
-	}
-	r.c.L.Unlock()
-}
-
-// TODO: Make check() function more sophisticated, in particular
-// allow it to behave as "waitWithTimeout".
-func (r *ready) check() bool {
-	rwMutex := r.c.L.(*sync.RWMutex)
-	rwMutex.RLock()
-	defer rwMutex.RUnlock()
-	return r.ok
-}
-
-func (r *ready) set(ok bool) {
-	r.c.L.Lock()
-	defer r.c.L.Unlock()
-	r.ok = ok
-	r.c.Broadcast()
-}
--- a/vendor/k8s.io/apiserver/pkg/storage/cacher/caching_object.go
+++ b/vendor/k8s.io/apiserver/pkg/storage/cacher/caching_object.go
@@ -60,11 +60,20 @@ type serializationsCache map[runtime.Identifier]*serializationResult
 // so that each of those is computed exactly once.
 //
 // cachingObject implements the metav1.Object interface (accessors for
-// all metadata fields). However, setters for all fields except from
-// SelfLink (which is set lately in the path) are ignored.
+// all metadata fields).
 type cachingObject struct {
 	lock sync.RWMutex

+	// deepCopied defines whether the object below has already been
+	// deep copied. The operation is performed lazily on the first
+	// setXxx operation.
+	//
+	// The lazy deep-copy make is useful, as effectively the only
+	// case when we are setting some fields are ResourceVersion for
+	// DELETE events, so in all other cases we can effectively avoid
+	// performing any deep copies.
+	deepCopied bool
+
 	// Object for which serializations are cached.
 	object metaRuntimeInterface

@@ -80,7 +89,10 @@ type cachingObject struct {
 // metav1.Object type.
 func newCachingObject(object runtime.Object) (*cachingObject, error) {
 	if obj, ok := object.(metaRuntimeInterface); ok {
-		result := &cachingObject{object: obj.DeepCopyObject().(metaRuntimeInterface)}
+		result := &cachingObject{
+			object:     obj,
+			deepCopied: false,
+		}
 		result.serializations.Store(make(serializationsCache))
 		return result, nil
 	}
@@ -125,6 +137,10 @@ func (o *cachingObject) CacheEncode(id runtime.Identifier, encode func(runtime.O
 	result := o.getSerializationResult(id)
 	result.once.Do(func() {
 		buffer := bytes.NewBuffer(nil)
+		// TODO(wojtek-t): This is currently making a copy to avoid races
+		//   in cases where encoding is making subtle object modifications,
+		//   e.g. #82497
+		//   Figure out if we can somehow avoid this under some conditions.
 		result.err = encode(o.GetObject(), buffer)
 		result.raw = buffer.Bytes()
 	})
@@ -157,7 +173,9 @@ func (o *cachingObject) DeepCopyObject() runtime.Object {
 	// DeepCopyObject on cachingObject is not expected to be called anywhere.
 	// However, to be on the safe-side, we implement it, though given the
 	// cache is only an optimization we ignore copying it.
-	result := &cachingObject{}
+	result := &cachingObject{
+		deepCopied: true,
+	}
 	result.serializations.Store(make(serializationsCache))

 	o.lock.RLock()
@@ -215,6 +233,10 @@ func (o *cachingObject) conditionalSet(isNoop func() bool, set func()) {
 	if isNoop() {
 		return
 	}
+	if !o.deepCopied {
+		o.object = o.object.DeepCopyObject().(metaRuntimeInterface)
+		o.deepCopied = true
+	}
 	o.invalidateCacheLocked()
 	set()
 }
@@ -373,17 +395,6 @@ func (o *cachingObject) SetOwnerReferences(references []metav1.OwnerReference) {
 		func() { o.object.SetOwnerReferences(references) },
 	)
 }
-func (o *cachingObject) GetClusterName() string {
-	o.lock.RLock()
-	defer o.lock.RUnlock()
-	return o.object.GetClusterName()
-}
-func (o *cachingObject) SetClusterName(clusterName string) {
-	o.conditionalSet(
-		func() bool { return o.object.GetClusterName() == clusterName },
-		func() { o.object.SetClusterName(clusterName) },
-	)
-}
 func (o *cachingObject) GetManagedFields() []metav1.ManagedFieldsEntry {
 	o.lock.RLock()
 	defer o.lock.RUnlock()
--- a/vendor/k8s.io/apiserver/pkg/storage/cacher/metrics.go
+++ b/vendor/k8s.io/apiserver/pkg/storage/cacher/metrics.go
@@ -1,95 +0,0 @@
-/*
-Copyright 2020 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-package cacher
-
-import (
-	"k8s.io/component-base/metrics"
-	"k8s.io/component-base/metrics/legacyregistry"
-)
-
-/*
- * By default, all the following metrics are defined as falling under
- * ALPHA stability level https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/1209-metrics-stability/20190404-kubernetes-control-plane-metrics-stability.md#stability-classes)
- *
- * Promoting the stability level of the metric is a responsibility of the component owner, since it
- * involves explicitly acknowledging support for the metric across multiple releases, in accordance with
- * the metric stability policy.
- */
-var (
-	initCounter = metrics.NewCounterVec(
-		&metrics.CounterOpts{
-			Name:           "apiserver_init_events_total",
-			Help:           "Counter of init events processed in watchcache broken by resource type.",
-			StabilityLevel: metrics.ALPHA,
-		},
-		[]string{"resource"},
-	)
-
-	terminatedWatchersCounter = metrics.NewCounterVec(
-		&metrics.CounterOpts{
-			Name:           "apiserver_terminated_watchers_total",
-			Help:           "Counter of watchers closed due to unresponsiveness broken by resource type.",
-			StabilityLevel: metrics.ALPHA,
-		},
-		[]string{"resource"},
-	)
-
-	watchCacheCapacityIncreaseTotal = metrics.NewCounterVec(
-		&metrics.CounterOpts{
-			Name:           "watch_cache_capacity_increase_total",
-			Help:           "Total number of watch cache capacity increase events broken by resource type.",
-			StabilityLevel: metrics.ALPHA,
-		},
-		[]string{"resource"},
-	)
-
-	watchCacheCapacityDecreaseTotal = metrics.NewCounterVec(
-		&metrics.CounterOpts{
-			Name:           "watch_cache_capacity_decrease_total",
-			Help:           "Total number of watch cache capacity decrease events broken by resource type.",
-			StabilityLevel: metrics.ALPHA,
-		},
-		[]string{"resource"},
-	)
-
-	watchCacheCapacity = metrics.NewGaugeVec(
-		&metrics.GaugeOpts{
-			Name:           "watch_cache_capacity",
-			Help:           "Total capacity of watch cache broken by resource type.",
-			StabilityLevel: metrics.ALPHA,
-		},
-		[]string{"resource"},
-	)
-)
-
-func init() {
-	legacyregistry.MustRegister(initCounter)
-	legacyregistry.MustRegister(terminatedWatchersCounter)
-	legacyregistry.MustRegister(watchCacheCapacityIncreaseTotal)
-	legacyregistry.MustRegister(watchCacheCapacityDecreaseTotal)
-	legacyregistry.MustRegister(watchCacheCapacity)
-}
-
-// recordsWatchCacheCapacityChange record watchCache capacity resize(increase or decrease) operations.
-func recordsWatchCacheCapacityChange(objType string, old, new int) {
-	if old < new {
-		watchCacheCapacityIncreaseTotal.WithLabelValues(objType).Inc()
-		return
-	}
-	watchCacheCapacityDecreaseTotal.WithLabelValues(objType).Inc()
-	watchCacheCapacity.WithLabelValues(objType).Set(float64(new))
-}
--- a/vendor/k8s.io/apiserver/pkg/storage/cacher/metrics/OWNERS
+++ b/vendor/k8s.io/apiserver/pkg/storage/cacher/metrics/OWNERS
@@ -0,0 +1,8 @@
+# See the OWNERS docs at https://go.k8s.io/owners
+
+approvers:
+  - sig-instrumentation-approvers
+reviewers:
+  - sig-instrumentation-reviewers
+labels:
+  - sig/instrumentation
--- a/vendor/k8s.io/apiserver/pkg/storage/cacher/metrics/metrics.go
+++ b/vendor/k8s.io/apiserver/pkg/storage/cacher/metrics/metrics.go
@@ -0,0 +1,174 @@
+/*
+Copyright 2021 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package metrics
+
+import (
+	"sync"
+
+	compbasemetrics "k8s.io/component-base/metrics"
+	"k8s.io/component-base/metrics/legacyregistry"
+)
+
+const (
+	namespace = "apiserver"
+	subsystem = "watch_cache"
+)
+
+/*
+ * By default, all the following metrics are defined as falling under
+ * ALPHA stability level https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/1209-metrics-stability/kubernetes-control-plane-metrics-stability.md#stability-classes)
+ *
+ * Promoting the stability level of the metric is a responsibility of the component owner, since it
+ * involves explicitly acknowledging support for the metric across multiple releases, in accordance with
+ * the metric stability policy.
+ */
+var (
+	listCacheCount = compbasemetrics.NewCounterVec(
+		&compbasemetrics.CounterOpts{
+			Namespace:      namespace,
+			Name:           "cache_list_total",
+			Help:           "Number of LIST requests served from watch cache",
+			StabilityLevel: compbasemetrics.ALPHA,
+		},
+		[]string{"resource_prefix", "index"},
+	)
+	listCacheNumFetched = compbasemetrics.NewCounterVec(
+		&compbasemetrics.CounterOpts{
+			Namespace:      namespace,
+			Name:           "cache_list_fetched_objects_total",
+			Help:           "Number of objects read from watch cache in the course of serving a LIST request",
+			StabilityLevel: compbasemetrics.ALPHA,
+		},
+		[]string{"resource_prefix", "index"},
+	)
+	listCacheNumReturned = compbasemetrics.NewCounterVec(
+		&compbasemetrics.CounterOpts{
+			Namespace:      namespace,
+			Name:           "cache_list_returned_objects_total",
+			Help:           "Number of objects returned for a LIST request from watch cache",
+			StabilityLevel: compbasemetrics.ALPHA,
+		},
+		[]string{"resource_prefix"},
+	)
+	InitCounter = compbasemetrics.NewCounterVec(
+		&compbasemetrics.CounterOpts{
+			Namespace:      namespace,
+			Name:           "init_events_total",
+			Help:           "Counter of init events processed in watch cache broken by resource type.",
+			StabilityLevel: compbasemetrics.ALPHA,
+		},
+		[]string{"resource"},
+	)
+
+	EventsCounter = compbasemetrics.NewCounterVec(
+		&compbasemetrics.CounterOpts{
+			Namespace:      namespace,
+			Subsystem:      subsystem,
+			Name:           "events_dispatched_total",
+			Help:           "Counter of events dispatched in watch cache broken by resource type.",
+			StabilityLevel: compbasemetrics.ALPHA,
+		},
+		[]string{"resource"},
+	)
+
+	TerminatedWatchersCounter = compbasemetrics.NewCounterVec(
+		&compbasemetrics.CounterOpts{
+			Namespace:      namespace,
+			Name:           "terminated_watchers_total",
+			Help:           "Counter of watchers closed due to unresponsiveness broken by resource type.",
+			StabilityLevel: compbasemetrics.ALPHA,
+		},
+		[]string{"resource"},
+	)
+
+	watchCacheCapacityIncreaseTotal = compbasemetrics.NewCounterVec(
+		&compbasemetrics.CounterOpts{
+			Subsystem:      subsystem,
+			Name:           "capacity_increase_total",
+			Help:           "Total number of watch cache capacity increase events broken by resource type.",
+			StabilityLevel: compbasemetrics.ALPHA,
+		},
+		[]string{"resource"},
+	)
+
+	watchCacheCapacityDecreaseTotal = compbasemetrics.NewCounterVec(
+		&compbasemetrics.CounterOpts{
+			Subsystem:      subsystem,
+			Name:           "capacity_decrease_total",
+			Help:           "Total number of watch cache capacity decrease events broken by resource type.",
+			StabilityLevel: compbasemetrics.ALPHA,
+		},
+		[]string{"resource"},
+	)
+
+	WatchCacheCapacity = compbasemetrics.NewGaugeVec(
+		&compbasemetrics.GaugeOpts{
+			Subsystem:      subsystem,
+			Name:           "capacity",
+			Help:           "Total capacity of watch cache broken by resource type.",
+			StabilityLevel: compbasemetrics.ALPHA,
+		},
+		[]string{"resource"},
+	)
+
+	WatchCacheInitializations = compbasemetrics.NewCounterVec(
+		&compbasemetrics.CounterOpts{
+			Namespace:      namespace,
+			Subsystem:      subsystem,
+			Name:           "initializations_total",
+			Help:           "Counter of watch cache initializations broken by resource type.",
+			StabilityLevel: compbasemetrics.ALPHA,
+		},
+		[]string{"resource"},
+	)
+)
+
+var registerMetrics sync.Once
+
+// Register all metrics.
+func Register() {
+	// Register the metrics.
+	registerMetrics.Do(func() {
+		legacyregistry.MustRegister(listCacheCount)
+		legacyregistry.MustRegister(listCacheNumFetched)
+		legacyregistry.MustRegister(listCacheNumReturned)
+		legacyregistry.MustRegister(InitCounter)
+		legacyregistry.MustRegister(EventsCounter)
+		legacyregistry.MustRegister(TerminatedWatchersCounter)
+		legacyregistry.MustRegister(watchCacheCapacityIncreaseTotal)
+		legacyregistry.MustRegister(watchCacheCapacityDecreaseTotal)
+		legacyregistry.MustRegister(WatchCacheCapacity)
+		legacyregistry.MustRegister(WatchCacheInitializations)
+	})
+}
+
+// RecordListCacheMetrics notes various metrics of the cost to serve a LIST request
+func RecordListCacheMetrics(resourcePrefix, indexName string, numFetched, numReturned int) {
+	listCacheCount.WithLabelValues(resourcePrefix, indexName).Inc()
+	listCacheNumFetched.WithLabelValues(resourcePrefix, indexName).Add(float64(numFetched))
+	listCacheNumReturned.WithLabelValues(resourcePrefix).Add(float64(numReturned))
+}
+
+// RecordsWatchCacheCapacityChange record watchCache capacity resize(increase or decrease) operations.
+func RecordsWatchCacheCapacityChange(objType string, old, new int) {
+	WatchCacheCapacity.WithLabelValues(objType).Set(float64(new))
+	if old < new {
+		WatchCacheCapacity.WithLabelValues(objType).Inc()
+		return
+	}
+	watchCacheCapacityDecreaseTotal.WithLabelValues(objType).Inc()
+}
--- a/vendor/k8s.io/apiserver/pkg/storage/cacher/ready.go
+++ b/vendor/k8s.io/apiserver/pkg/storage/cacher/ready.go
@@ -0,0 +1,96 @@
+/*
+Copyright 2022 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package cacher
+
+import (
+	"fmt"
+	"sync"
+)
+
+type status int
+
+const (
+	Pending status = iota
+	Ready
+	Stopped
+)
+
+// ready is a three state condition variable that blocks until is Ready if is not Stopped.
+// Its initial state is Pending.
+type ready struct {
+	state status
+	c     *sync.Cond
+}
+
+func newReady() *ready {
+	return &ready{
+		c:     sync.NewCond(&sync.RWMutex{}),
+		state: Pending,
+	}
+}
+
+// wait blocks until it is Ready or Stopped, it returns an error if is Stopped.
+func (r *ready) wait() error {
+	r.c.L.Lock()
+	defer r.c.L.Unlock()
+	for r.state == Pending {
+		r.c.Wait()
+	}
+	switch r.state {
+	case Ready:
+		return nil
+	case Stopped:
+		return fmt.Errorf("apiserver cacher is stopped")
+	default:
+		return fmt.Errorf("unexpected apiserver cache state: %v", r.state)
+	}
+}
+
+// check returns true only if it is Ready.
+func (r *ready) check() bool {
+	// TODO: Make check() function more sophisticated, in particular
+	// allow it to behave as "waitWithTimeout".
+	rwMutex := r.c.L.(*sync.RWMutex)
+	rwMutex.RLock()
+	defer rwMutex.RUnlock()
+	return r.state == Ready
+}
+
+// set the state to Pending (false) or Ready (true), it does not have effect if the state is Stopped.
+func (r *ready) set(ok bool) {
+	r.c.L.Lock()
+	defer r.c.L.Unlock()
+	if r.state == Stopped {
+		return
+	}
+	if ok {
+		r.state = Ready
+	} else {
+		r.state = Pending
+	}
+	r.c.Broadcast()
+}
+
+// stop the condition variable and set it as Stopped. This state is irreversible.
+func (r *ready) stop() {
+	r.c.L.Lock()
+	defer r.c.L.Unlock()
+	if r.state != Stopped {
+		r.state = Stopped
+		r.c.Broadcast()
+	}
+}
--- a/vendor/k8s.io/apiserver/pkg/storage/cacher/time_budget.go
+++ b/vendor/k8s.io/apiserver/pkg/storage/cacher/time_budget.go
@@ -19,6 +19,8 @@ package cacher
 import (
 	"sync"
 	"time"
+
+	"k8s.io/utils/clock"
 )

 const (
@@ -28,13 +30,14 @@ const (

 // timeBudget implements a budget of time that you can use and is
 // periodically being refreshed. The pattern to use it is:
-//   budget := newTimeBudget(...)
-//   ...
-//   timeout := budget.takeAvailable()
-//   // Now you can spend at most timeout on doing stuff
-//   ...
-//   // If you didn't use all timeout, return what you didn't use
-//   budget.returnUnused(<unused part of timeout>)
+//
+//	budget := newTimeBudget(...)
+//	...
+//	timeout := budget.takeAvailable()
+//	// Now you can spend at most timeout on doing stuff
+//	...
+//	// If you didn't use all timeout, return what you didn't use
+//	budget.returnUnused(<unused part of timeout>)
 //
 // NOTE: It's not recommended to be used concurrently from multiple threads -
 // if first user takes the whole timeout, the second one will get 0 timeout
@@ -46,42 +49,39 @@ type timeBudget interface {

 type timeBudgetImpl struct {
 	sync.Mutex
-	budget time.Duration
-
-	refresh   time.Duration
+	clock     clock.Clock
+	budget    time.Duration
 	maxBudget time.Duration
+	refresh   time.Duration
+	// last store last access time
+	last time.Time
 }

-func newTimeBudget(stopCh <-chan struct{}) timeBudget {
+func newTimeBudget() timeBudget {
 	result := &timeBudgetImpl{
+		clock:     clock.RealClock{},
 		budget:    time.Duration(0),
 		refresh:   refreshPerSecond,
 		maxBudget: maxBudget,
 	}
-	go result.periodicallyRefresh(stopCh)
+	result.last = result.clock.Now()
 	return result
 }

-func (t *timeBudgetImpl) periodicallyRefresh(stopCh <-chan struct{}) {
-	ticker := time.NewTicker(time.Second)
-	defer ticker.Stop()
-	for {
-		select {
-		case <-ticker.C:
-			t.Lock()
-			if t.budget = t.budget + t.refresh; t.budget > t.maxBudget {
-				t.budget = t.maxBudget
-			}
-			t.Unlock()
-		case <-stopCh:
-			return
-		}
-	}
-}
-
 func (t *timeBudgetImpl) takeAvailable() time.Duration {
 	t.Lock()
 	defer t.Unlock()
+	// budget accumulated since last access
+	now := t.clock.Now()
+	acc := now.Sub(t.last).Seconds() * t.refresh.Seconds()
+	if acc < 0 {
+		acc = 0
+	}
+	// update current budget and store the current time
+	if t.budget = t.budget + time.Duration(acc*1e9); t.budget > t.maxBudget {
+		t.budget = t.maxBudget
+	}
+	t.last = now
 	result := t.budget
 	t.budget = time.Duration(0)
 	return result
@@ -94,6 +94,8 @@ func (t *timeBudgetImpl) returnUnused(unused time.Duration) {
 		// We used more than allowed.
 		return
 	}
+	// add the unused time directly to the budget
+	// takeAvailable() will take into account the elapsed time
 	if t.budget = t.budget + unused; t.budget > t.maxBudget {
 		t.budget = t.maxBudget
 	}
--- a/vendor/k8s.io/apiserver/pkg/storage/cacher/watch_cache.go
+++ b/vendor/k8s.io/apiserver/pkg/storage/cacher/watch_cache.go
@@ -18,6 +18,7 @@ package cacher

 import (
 	"fmt"
+	"math"
 	"reflect"
 	"sort"
 	"sync"
@@ -27,11 +28,12 @@ import (
 	"k8s.io/apimachinery/pkg/fields"
 	"k8s.io/apimachinery/pkg/labels"
 	"k8s.io/apimachinery/pkg/runtime"
-	"k8s.io/apimachinery/pkg/util/clock"
 	"k8s.io/apimachinery/pkg/watch"
 	"k8s.io/apiserver/pkg/storage"
+	"k8s.io/apiserver/pkg/storage/cacher/metrics"
 	"k8s.io/client-go/tools/cache"
 	"k8s.io/klog/v2"
+	"k8s.io/utils/clock"
 	utiltrace "k8s.io/utils/trace"
 )

@@ -189,6 +191,9 @@ type watchCache struct {

 	// cacher's objectType.
 	objectType reflect.Type
+
+	// For testing cache interval invalidation.
+	indexValidator indexValidator
 }

 func newWatchCache(
@@ -217,8 +222,10 @@ func newWatchCache(
 		objectType:          objectType,
 	}
 	objType := objectType.String()
-	watchCacheCapacity.WithLabelValues(objType).Set(float64(wc.capacity))
+	metrics.WatchCacheCapacity.WithLabelValues(objType).Set(float64(wc.capacity))
 	wc.cond = sync.NewCond(wc.RLocker())
+	wc.indexValidator = wc.isIndexValidLocked
+
 	return wc
 }

@@ -380,7 +387,7 @@ func (w *watchCache) doCacheResizeLocked(capacity int) {
 		newCache[i%capacity] = w.cache[i%w.capacity]
 	}
 	w.cache = newCache
-	recordsWatchCacheCapacityChange(w.objectType.String(), w.capacity, capacity)
+	metrics.RecordsWatchCacheCapacityChange(w.objectType.String(), w.capacity, capacity)
 	w.capacity = capacity
 }

@@ -420,17 +427,27 @@ func (w *watchCache) List() []interface{} {
 // You HAVE TO explicitly call w.RUnlock() after this function.
 func (w *watchCache) waitUntilFreshAndBlock(resourceVersion uint64, trace *utiltrace.Trace) error {
 	startTime := w.clock.Now()
-	go func() {
-		// Wake us up when the time limit has expired.  The docs
-		// promise that time.After (well, NewTimer, which it calls)
-		// will wait *at least* the duration given. Since this go
-		// routine starts sometime after we record the start time, and
-		// it will wake up the loop below sometime after the broadcast,
-		// we don't need to worry about waking it up before the time
-		// has expired accidentally.
-		<-w.clock.After(blockTimeout)
-		w.cond.Broadcast()
-	}()
+
+	// In case resourceVersion is 0, we accept arbitrarily stale result.
+	// As a result, the condition in the below for loop will never be
+	// satisfied (w.resourceVersion is never negative), this call will
+	// never hit the w.cond.Wait().
+	// As a result - we can optimize the code by not firing the wakeup
+	// function (and avoid starting a gorotuine), especially given that
+	// resourceVersion=0 is the most common case.
+	if resourceVersion > 0 {
+		go func() {
+			// Wake us up when the time limit has expired.  The docs
+			// promise that time.After (well, NewTimer, which it calls)
+			// will wait *at least* the duration given. Since this go
+			// routine starts sometime after we record the start time, and
+			// it will wake up the loop below sometime after the broadcast,
+			// we don't need to worry about waking it up before the time
+			// has expired accidentally.
+			<-w.clock.After(blockTimeout)
+			w.cond.Broadcast()
+		}()
+	}

 	w.RLock()
 	if trace != nil {
@@ -449,12 +466,13 @@ func (w *watchCache) waitUntilFreshAndBlock(resourceVersion uint64, trace *utilt
 	return nil
 }

-// WaitUntilFreshAndList returns list of pointers to <storeElement> objects.
-func (w *watchCache) WaitUntilFreshAndList(resourceVersion uint64, matchValues []storage.MatchValue, trace *utiltrace.Trace) ([]interface{}, uint64, error) {
+// WaitUntilFreshAndList returns list of pointers to `storeElement` objects along
+// with their ResourceVersion and the name of the index, if any, that was used.
+func (w *watchCache) WaitUntilFreshAndList(resourceVersion uint64, matchValues []storage.MatchValue, trace *utiltrace.Trace) ([]interface{}, uint64, string, error) {
 	err := w.waitUntilFreshAndBlock(resourceVersion, trace)
 	defer w.RUnlock()
 	if err != nil {
-		return nil, 0, err
+		return nil, 0, "", err
 	}

 	// This isn't the place where we do "final filtering" - only some "prefiltering" is happening here. So the only
@@ -463,10 +481,10 @@ func (w *watchCache) WaitUntilFreshAndList(resourceVersion uint64, matchValues [
 	// TODO: if multiple indexes match, return the one with the fewest items, so as to do as much filtering as possible.
 	for _, matchValue := range matchValues {
 		if result, err := w.store.ByIndex(matchValue.IndexName, matchValue.Value); err == nil {
-			return result, w.resourceVersion, nil
+			return result, w.resourceVersion, matchValue.IndexName, nil
 		}
 	}
-	return w.store.List(), w.resourceVersion, nil
+	return w.store.List(), w.resourceVersion, "", nil
 }

 // WaitUntilFreshAndGet returns a pointers to <storeElement> object.
@@ -557,7 +575,74 @@ func (w *watchCache) SetOnReplace(onReplace func()) {
 	w.onReplace = onReplace
 }

-func (w *watchCache) GetAllEventsSinceThreadUnsafe(resourceVersion uint64) ([]*watchCacheEvent, error) {
+func (w *watchCache) Resync() error {
+	// Nothing to do
+	return nil
+}
+
+func (w *watchCache) currentCapacity() int {
+	w.Lock()
+	defer w.Unlock()
+	return w.capacity
+}
+
+const (
+	// minWatchChanSize is the min size of channels used by the watch.
+	// We keep that set to 10 for "backward compatibility" until we
+	// convince ourselves based on some metrics that decreasing is safe.
+	minWatchChanSize = 10
+	// maxWatchChanSizeWithIndexAndTriger is the max size of the channel
+	// used by the watch using the index and trigger selector.
+	maxWatchChanSizeWithIndexAndTrigger = 10
+	// maxWatchChanSizeWithIndexWithoutTrigger is the max size of the channel
+	// used by the watch using the index but without triggering selector.
+	// We keep that set to 1000 for "backward compatibility", until we
+	// convinced ourselves based on some metrics that decreasing is safe.
+	maxWatchChanSizeWithIndexWithoutTrigger = 1000
+	// maxWatchChanSizeWithoutIndex is the max size of the channel
+	// used by the watch not using the index.
+	// TODO(wojtek-t): Figure out if the value shouldn't be higher.
+	maxWatchChanSizeWithoutIndex = 100
+)
+
+func (w *watchCache) suggestedWatchChannelSize(indexExists, triggerUsed bool) int {
+	// To estimate the channel size we use a heuristic that a channel
+	// should roughly be able to keep one second of history.
+	// We don't have an exact data, but given we store updates from
+	// the last <eventFreshDuration>, we approach it by dividing the
+	// capacity by the length of the history window.
+	chanSize := int(math.Ceil(float64(w.currentCapacity()) / eventFreshDuration.Seconds()))
+
+	// Finally we adjust the size to avoid ending with too low or
+	// to large values.
+	if chanSize < minWatchChanSize {
+		chanSize = minWatchChanSize
+	}
+	var maxChanSize int
+	switch {
+	case indexExists && triggerUsed:
+		maxChanSize = maxWatchChanSizeWithIndexAndTrigger
+	case indexExists && !triggerUsed:
+		maxChanSize = maxWatchChanSizeWithIndexWithoutTrigger
+	case !indexExists:
+		maxChanSize = maxWatchChanSizeWithoutIndex
+	}
+	if chanSize > maxChanSize {
+		chanSize = maxChanSize
+	}
+	return chanSize
+}
+
+// isIndexValidLocked checks if a given index is still valid.
+// This assumes that the lock is held.
+func (w *watchCache) isIndexValidLocked(index int) bool {
+	return index >= w.startIndex
+}
+
+// getAllEventsSinceLocked returns a watchCacheInterval that can be used to
+// retrieve events since a certain resourceVersion. This function assumes to
+// be called under the watchCache lock.
+func (w *watchCache) getAllEventsSinceLocked(resourceVersion uint64) (*watchCacheInterval, error) {
 	size := w.endIndex - w.startIndex
 	var oldest uint64
 	switch {
@@ -583,27 +668,11 @@ func (w *watchCache) GetAllEventsSinceThreadUnsafe(resourceVersion uint64) ([]*w
 		// current state and only then start watching from that point.
 		//
 		// TODO: In v2 api, we should stop returning the current state - #13969.
-		allItems := w.store.List()
-		result := make([]*watchCacheEvent, len(allItems))
-		for i, item := range allItems {
-			elem, ok := item.(*storeElement)
-			if !ok {
-				return nil, fmt.Errorf("not a storeElement: %v", elem)
-			}
-			objLabels, objFields, err := w.getAttrsFunc(elem.Object)
-			if err != nil {
-				return nil, err
-			}
-			result[i] = &watchCacheEvent{
-				Type:            watch.Added,
-				Object:          elem.Object,
-				ObjLabels:       objLabels,
-				ObjFields:       objFields,
-				Key:             elem.Key,
-				ResourceVersion: w.resourceVersion,
-			}
+		ci, err := newCacheIntervalFromStore(w.resourceVersion, w.store, w.getAttrsFunc)
+		if err != nil {
+			return nil, err
 		}
-		return result, nil
+		return ci, nil
 	}
 	if resourceVersion < oldest-1 {
 		return nil, errors.NewResourceExpired(fmt.Sprintf("too old resource version: %d (%d)", resourceVersion, oldest-1))
@@ -614,20 +683,9 @@ func (w *watchCache) GetAllEventsSinceThreadUnsafe(resourceVersion uint64) ([]*w
 		return w.cache[(w.startIndex+i)%w.capacity].ResourceVersion > resourceVersion
 	}
 	first := sort.Search(size, f)
-	result := make([]*watchCacheEvent, size-first)
-	for i := 0; i < size-first; i++ {
-		result[i] = w.cache[(w.startIndex+first+i)%w.capacity]
+	indexerFunc := func(i int) *watchCacheEvent {
+		return w.cache[i%w.capacity]
 	}
-	return result, nil
-}
-
-func (w *watchCache) GetAllEventsSince(resourceVersion uint64) ([]*watchCacheEvent, error) {
-	w.RLock()
-	defer w.RUnlock()
-	return w.GetAllEventsSinceThreadUnsafe(resourceVersion)
-}
-
-func (w *watchCache) Resync() error {
-	// Nothing to do
-	return nil
+	ci := newCacheInterval(w.startIndex+first, w.endIndex, indexerFunc, w.indexValidator, &w.RWMutex)
+	return ci, nil
 }
--- a/vendor/k8s.io/apiserver/pkg/storage/cacher/watch_cache_interval.go
+++ b/vendor/k8s.io/apiserver/pkg/storage/cacher/watch_cache_interval.go
@@ -0,0 +1,226 @@
+/*
+Copyright 2021 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package cacher
+
+import (
+	"fmt"
+	"sync"
+
+	"k8s.io/apimachinery/pkg/fields"
+	"k8s.io/apimachinery/pkg/labels"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/watch"
+	"k8s.io/client-go/tools/cache"
+)
+
+// watchCacheInterval serves as an abstraction over a source
+// of watchCacheEvents. It maintains a window of events over
+// an underlying source and these events can be served using
+// the exposed Next() API. The main intent for doing things
+// this way is to introduce an upper bound of memory usage
+// for starting a watch and reduce the maximum possible time
+// interval for which the lock would be held while events are
+// copied over.
+//
+// The source of events for the interval is typically either
+// the watchCache circular buffer, if events being retrieved
+// need to be for resource versions > 0 or the underlying
+// implementation of Store, if resource version = 0.
+//
+// Furthermore, an interval can be either valid or invalid at
+// any given point of time. The notion of validity makes sense
+// only in cases where the window of events in the underlying
+// source can change over time - i.e. for watchCache circular
+// buffer. When the circular buffer is full and an event needs
+// to be popped off, watchCache::startIndex is incremented. In
+// this case, an interval tracking that popped event is valid
+// only if it has already been copied to its internal buffer.
+// However, for efficiency we perform that lazily and we mark
+// an interval as invalid iff we need to copy events from the
+// watchCache and we end up needing events that have already
+// been popped off. This translates to the following condition:
+//
+//	watchCacheInterval::startIndex >= watchCache::startIndex.
+//
+// When this condition becomes false, the interval is no longer
+// valid and should not be used to retrieve and serve elements
+// from the underlying source.
+type watchCacheInterval struct {
+	// startIndex denotes the starting point of the interval
+	// being considered. The value is the index in the actual
+	// source of watchCacheEvents. If the source of events is
+	// the watchCache, then this must be used modulo capacity.
+	startIndex int
+
+	// endIndex denotes the ending point of the interval being
+	// considered. The value is the index in the actual source
+	// of events. If the source of the events is the watchCache,
+	// then this should be used modulo capacity.
+	endIndex int
+
+	// indexer is meant to inject behaviour for how an event must
+	// be retrieved from the underlying source given an index.
+	indexer indexerFunc
+
+	// indexValidator is used to check if a given index is still
+	// valid perspective. If it is deemed that the index is not
+	// valid, then this interval can no longer be used to serve
+	// events. Use of indexValidator is warranted only in cases
+	// where the window of events in the underlying source can
+	// change over time. Furthermore, an interval is invalid if
+	// its startIndex no longer coincides with the startIndex of
+	// underlying source.
+	indexValidator indexValidator
+
+	// buffer holds watchCacheEvents that this interval returns on
+	// a call to Next(). This exists mainly to reduce acquiring the
+	// lock on each invocation of Next().
+	buffer *watchCacheIntervalBuffer
+
+	// lock effectively protects access to the underlying source
+	// of events through - indexer and indexValidator.
+	//
+	// Given that indexer and indexValidator only read state, if
+	// possible, Locker obtained through RLocker() is provided.
+	lock sync.Locker
+}
+
+type attrFunc func(runtime.Object) (labels.Set, fields.Set, error)
+type indexerFunc func(int) *watchCacheEvent
+type indexValidator func(int) bool
+
+func newCacheInterval(startIndex, endIndex int, indexer indexerFunc, indexValidator indexValidator, locker sync.Locker) *watchCacheInterval {
+	return &watchCacheInterval{
+		startIndex:     startIndex,
+		endIndex:       endIndex,
+		indexer:        indexer,
+		indexValidator: indexValidator,
+		buffer:         &watchCacheIntervalBuffer{buffer: make([]*watchCacheEvent, bufferSize)},
+		lock:           locker,
+	}
+}
+
+// newCacheIntervalFromStore is meant to handle the case of rv=0, such that the events
+// returned by Next() need to be events from a List() done on the underlying store of
+// the watch cache.
+func newCacheIntervalFromStore(resourceVersion uint64, store cache.Indexer, getAttrsFunc attrFunc) (*watchCacheInterval, error) {
+	buffer := &watchCacheIntervalBuffer{}
+	allItems := store.List()
+	buffer.buffer = make([]*watchCacheEvent, len(allItems))
+	for i, item := range allItems {
+		elem, ok := item.(*storeElement)
+		if !ok {
+			return nil, fmt.Errorf("not a storeElement: %v", elem)
+		}
+		objLabels, objFields, err := getAttrsFunc(elem.Object)
+		if err != nil {
+			return nil, err
+		}
+		buffer.buffer[i] = &watchCacheEvent{
+			Type:            watch.Added,
+			Object:          elem.Object,
+			ObjLabels:       objLabels,
+			ObjFields:       objFields,
+			Key:             elem.Key,
+			ResourceVersion: resourceVersion,
+		}
+		buffer.endIndex++
+	}
+	ci := &watchCacheInterval{
+		startIndex: 0,
+		// Simulate that we already have all the events we're looking for.
+		endIndex: 0,
+		buffer:   buffer,
+	}
+
+	return ci, nil
+}
+
+// Next returns the next item in the cache interval provided the cache
+// interval is still valid. An error is returned if the interval is
+// invalidated.
+func (wci *watchCacheInterval) Next() (*watchCacheEvent, error) {
+	// if there are items in the buffer to return, return from
+	// the buffer.
+	if event, exists := wci.buffer.next(); exists {
+		return event, nil
+	}
+	// check if there are still other events in this interval
+	// that can be processed.
+	if wci.startIndex >= wci.endIndex {
+		return nil, nil
+	}
+	wci.lock.Lock()
+	defer wci.lock.Unlock()
+
+	if valid := wci.indexValidator(wci.startIndex); !valid {
+		return nil, fmt.Errorf("cache interval invalidated, interval startIndex: %d", wci.startIndex)
+	}
+
+	wci.fillBuffer()
+	if event, exists := wci.buffer.next(); exists {
+		return event, nil
+	}
+	return nil, nil
+}
+
+func (wci *watchCacheInterval) fillBuffer() {
+	wci.buffer.startIndex = 0
+	wci.buffer.endIndex = 0
+	for wci.startIndex < wci.endIndex && !wci.buffer.isFull() {
+		event := wci.indexer(wci.startIndex)
+		if event == nil {
+			break
+		}
+		wci.buffer.buffer[wci.buffer.endIndex] = event
+		wci.buffer.endIndex++
+		wci.startIndex++
+	}
+}
+
+const bufferSize = 100
+
+// watchCacheIntervalBuffer is used to reduce acquiring
+// the lock on each invocation of watchCacheInterval.Next().
+type watchCacheIntervalBuffer struct {
+	// buffer is used to hold watchCacheEvents that
+	// the interval returns on a call to Next().
+	buffer []*watchCacheEvent
+	// The first element of buffer is defined by startIndex,
+	// its last element is defined by endIndex.
+	startIndex int
+	endIndex   int
+}
+
+// next returns the next event present in the interval buffer provided
+// it is not empty.
+func (wcib *watchCacheIntervalBuffer) next() (*watchCacheEvent, bool) {
+	if wcib.isEmpty() {
+		return nil, false
+	}
+	next := wcib.buffer[wcib.startIndex]
+	wcib.startIndex++
+	return next, true
+}
+
+func (wcib *watchCacheIntervalBuffer) isFull() bool {
+	return wcib.endIndex >= bufferSize
+}
+
+func (wcib *watchCacheIntervalBuffer) isEmpty() bool {
+	return wcib.startIndex == wcib.endIndex
+}