update dependencies (#6267)

Signed-off-by: hongming <coder.scala@gmail.com>
This commit is contained in:
hongming
2024-11-06 10:27:06 +08:00
committed by GitHub
parent faf255a084
commit cfebd96a1f
4263 changed files with 341374 additions and 132036 deletions

View File

@@ -21,10 +21,12 @@ import (
"fmt"
"net/http"
"reflect"
"strings"
"sync"
"time"
"go.opentelemetry.io/otel/attribute"
"google.golang.org/grpc/metadata"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
@@ -41,16 +43,18 @@ import (
"k8s.io/apiserver/pkg/features"
"k8s.io/apiserver/pkg/storage"
"k8s.io/apiserver/pkg/storage/cacher/metrics"
etcdfeature "k8s.io/apiserver/pkg/storage/feature"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/tools/cache"
"k8s.io/component-base/tracing"
"k8s.io/klog/v2"
"k8s.io/utils/clock"
"k8s.io/utils/ptr"
)
var (
emptyFunc = func(bool) {}
emptyFunc = func(bool) {}
coreNamespaceResource = schema.GroupResource{Group: "", Resource: "namespaces"}
)
const (
@@ -397,10 +401,18 @@ func NewCacherFromConfig(config Config) (*Cacher, error) {
// so that future reuse does not get a spurious timeout.
<-cacher.timer.C
}
progressRequester := newConditionalProgressRequester(config.Storage.RequestWatchProgress, config.Clock)
var contextMetadata metadata.MD
if utilfeature.DefaultFeatureGate.Enabled(features.SeparateCacheWatchRPC) {
// Add grpc context metadata to watch and progress notify requests done by cacher to:
// * Prevent starvation of watch opened by cacher, by moving it to separate Watch RPC than watch request that bypass cacher.
// * Ensure that progress notification requests are executed on the same Watch RPC as their watch, which is required for it to work.
contextMetadata = metadata.New(map[string]string{"source": "cache"})
}
progressRequester := newConditionalProgressRequester(config.Storage.RequestWatchProgress, config.Clock, contextMetadata)
watchCache := newWatchCache(
config.KeyFunc, cacher.processEvent, config.GetAttrsFunc, config.Versioner, config.Indexers, config.Clock, config.GroupResource, progressRequester)
listerWatcher := NewListerWatcher(config.Storage, config.ResourcePrefix, config.NewListFunc)
listerWatcher := NewListerWatcher(config.Storage, config.ResourcePrefix, config.NewListFunc, contextMetadata)
reflectorName := "storage/cacher.go:" + config.ResourcePrefix
reflector := cache.NewNamedReflector(reflectorName, listerWatcher, obj, watchCache, 0)
@@ -413,7 +425,7 @@ func NewCacherFromConfig(config Config) (*Cacher, error) {
reflector.MaxInternalErrorRetryDuration = time.Second * 30
// since the watch-list is provided by the watch cache instruct
// the reflector to issue a regular LIST against the store
reflector.UseWatchList = false
reflector.UseWatchList = ptr.To(false)
cacher.watchCache = watchCache
cacher.reflector = reflector
@@ -513,7 +525,8 @@ func (c *Cacher) Watch(ctx context.Context, key string, opts storage.ListOptions
if !utilfeature.DefaultFeatureGate.Enabled(features.WatchList) && opts.SendInitialEvents != nil {
opts.SendInitialEvents = nil
}
if opts.SendInitialEvents == nil && opts.ResourceVersion == "" {
// TODO: we should eventually get rid of this legacy case
if utilfeature.DefaultFeatureGate.Enabled(features.WatchFromStorageWithoutResourceVersion) && opts.SendInitialEvents == nil && opts.ResourceVersion == "" {
return c.storage.Watch(ctx, key, opts)
}
requestedWatchRV, err := c.versioner.ParseResourceVersion(opts.ResourceVersion)
@@ -521,9 +534,18 @@ func (c *Cacher) Watch(ctx context.Context, key string, opts storage.ListOptions
return nil, err
}
readyGeneration, err := c.ready.waitAndReadGeneration(ctx)
if err != nil {
return nil, errors.NewServiceUnavailable(err.Error())
var readyGeneration int
if utilfeature.DefaultFeatureGate.Enabled(features.ResilientWatchCacheInitialization) {
var ok bool
readyGeneration, ok = c.ready.checkAndReadGeneration()
if !ok {
return nil, errors.NewTooManyRequests("storage is (re)initializing", 1)
}
} else {
readyGeneration, err = c.ready.waitAndReadGeneration(ctx)
if err != nil {
return nil, errors.NewServiceUnavailable(err.Error())
}
}
// determine the namespace and name scope of the watch, first from the request, secondarily from the field selector
@@ -539,12 +561,19 @@ func (c *Cacher) Watch(ctx context.Context, key string, opts storage.ListOptions
scope.name = selectorName
}
// for request like '/api/v1/watch/namespaces/*', set scope.namespace to empty.
// namespaces don't populate metadata.namespace in ObjFields.
if c.groupResource == coreNamespaceResource && len(scope.namespace) > 0 && scope.namespace == scope.name {
scope.namespace = ""
}
triggerValue, triggerSupported := "", false
if c.indexedTrigger != nil {
for _, field := range pred.IndexFields {
if field == c.indexedTrigger.indexName {
if value, ok := pred.Field.RequiresExactMatch(field); ok {
triggerValue, triggerSupported = value, true
break
}
}
}
@@ -557,14 +586,20 @@ func (c *Cacher) Watch(ctx context.Context, key string, opts storage.ListOptions
// watchers on our watcher having a processing hiccup
chanSize := c.watchCache.suggestedWatchChannelSize(c.indexedTrigger != nil, triggerSupported)
// Determine a function that computes the bookmarkAfterResourceVersion
bookmarkAfterResourceVersionFn, err := c.getBookmarkAfterResourceVersionLockedFunc(ctx, requestedWatchRV, opts)
// client-go is going to fall back to a standard LIST on any error
// returned for watch-list requests
if isListWatchRequest(opts) && !etcdfeature.DefaultFeatureSupportChecker.Supports(storage.RequestWatchProgress) {
return newErrWatcher(fmt.Errorf("a watch stream was requested by the client but the required storage feature %s is disabled", storage.RequestWatchProgress)), nil
}
// Determine the ResourceVersion to which the watch cache must be synchronized
requiredResourceVersion, err := c.getWatchCacheResourceVersion(ctx, requestedWatchRV, opts)
if err != nil {
return newErrWatcher(err), nil
}
// Determine a function that computes the watchRV we should start from
startWatchResourceVersionFn, err := c.getStartResourceVersionForWatchLockedFunc(ctx, requestedWatchRV, opts)
// Determine a function that computes the bookmarkAfterResourceVersion
bookmarkAfterResourceVersionFn, err := c.getBookmarkAfterResourceVersionLockedFunc(requestedWatchRV, requiredResourceVersion, opts)
if err != nil {
return newErrWatcher(err), nil
}
@@ -580,7 +615,7 @@ func (c *Cacher) Watch(ctx context.Context, key string, opts storage.ListOptions
// to compute watcher.forget function (which has to happen under lock).
watcher := newCacheWatcher(
chanSize,
filterWithAttrsFunction(key, pred),
filterWithAttrsAndPrefixFunction(key, pred),
emptyFunc,
c.versioner,
deadline,
@@ -596,7 +631,7 @@ func (c *Cacher) Watch(ctx context.Context, key string, opts storage.ListOptions
// moreover even though the c.waitUntilWatchCacheFreshAndForceAllEvents acquires a lock
// it is safe to release the lock after the method finishes because we don't require
// any atomicity between the call to the method and further calls that actually get the events.
forceAllEvents, err := c.waitUntilWatchCacheFreshAndForceAllEvents(ctx, requestedWatchRV, opts)
err = c.waitUntilWatchCacheFreshAndForceAllEvents(ctx, requiredResourceVersion, opts)
if err != nil {
return newErrWatcher(err), nil
}
@@ -609,13 +644,8 @@ func (c *Cacher) Watch(ctx context.Context, key string, opts storage.ListOptions
c.watchCache.RLock()
defer c.watchCache.RUnlock()
startWatchRV := startWatchResourceVersionFn()
var cacheInterval *watchCacheInterval
if forceAllEvents {
cacheInterval, err = c.watchCache.getIntervalFromStoreLocked()
} else {
cacheInterval, err = c.watchCache.getAllEventsSinceLocked(startWatchRV)
}
cacheInterval, err = c.watchCache.getAllEventsSinceLocked(requiredResourceVersion, key, opts)
if err != nil {
// To match the uncached watch implementation, once we have passed authn/authz/admission,
// and successfully parsed a resource version, other errors must fail with a watch event of type ERROR,
@@ -657,7 +687,7 @@ func (c *Cacher) Watch(ctx context.Context, key string, opts storage.ListOptions
return newImmediateCloseWatcher(), nil
}
go watcher.processInterval(ctx, cacheInterval, startWatchRV)
go watcher.processInterval(ctx, cacheInterval, requiredResourceVersion)
return watcher, nil
}
@@ -669,6 +699,14 @@ func (c *Cacher) Get(ctx context.Context, key string, opts storage.GetOptions, o
return c.storage.Get(ctx, key, opts, objPtr)
}
if utilfeature.DefaultFeatureGate.Enabled(features.ResilientWatchCacheInitialization) {
if !c.ready.check() {
// If Cache is not initialized, delegate Get requests to storage
// as described in https://kep.k8s.io/4568
return c.storage.Get(ctx, key, opts, objPtr)
}
}
// If resourceVersion is specified, serve it from cache.
// It's guaranteed that the returned value is at least that
// fresh as the given resourceVersion.
@@ -677,16 +715,18 @@ func (c *Cacher) Get(ctx context.Context, key string, opts storage.GetOptions, o
return err
}
if getRV == 0 && !c.ready.check() {
// If Cacher is not yet initialized and we don't require any specific
// minimal resource version, simply forward the request to storage.
return c.storage.Get(ctx, key, opts, objPtr)
}
// Do not create a trace - it's not for free and there are tons
// of Get requests. We can add it if it will be really needed.
if err := c.ready.wait(ctx); err != nil {
return errors.NewServiceUnavailable(err.Error())
if !utilfeature.DefaultFeatureGate.Enabled(features.ResilientWatchCacheInitialization) {
if getRV == 0 && !c.ready.check() {
// If Cacher is not yet initialized and we don't require any specific
// minimal resource version, simply forward the request to storage.
return c.storage.Get(ctx, key, opts, objPtr)
}
if err := c.ready.wait(ctx); err != nil {
return errors.NewServiceUnavailable(err.Error())
}
}
objVal, err := conversion.EnforcePtr(objPtr)
@@ -722,17 +762,40 @@ func shouldDelegateList(opts storage.ListOptions) bool {
pred := opts.Predicate
match := opts.ResourceVersionMatch
consistentListFromCacheEnabled := utilfeature.DefaultFeatureGate.Enabled(features.ConsistentListFromCache)
requestWatchProgressSupported := etcdfeature.DefaultFeatureSupportChecker.Supports(storage.RequestWatchProgress)
// Serve consistent reads from storage if ConsistentListFromCache is disabled
consistentReadFromStorage := resourceVersion == "" && !consistentListFromCacheEnabled
consistentReadFromStorage := resourceVersion == "" && !(consistentListFromCacheEnabled && requestWatchProgressSupported)
// Watch cache doesn't support continuations, so serve them from etcd.
hasContinuation := len(pred.Continue) > 0
// Serve paginated requests about revision "0" from watch cache to avoid overwhelming etcd.
hasLimit := pred.Limit > 0 && resourceVersion != "0"
// Watch cache only supports ResourceVersionMatchNotOlderThan (default).
unsupportedMatch := match != "" && match != metav1.ResourceVersionMatchNotOlderThan
// see https://kubernetes.io/docs/reference/using-api/api-concepts/#semantics-for-get-and-list
isLegacyExactMatch := opts.Predicate.Limit > 0 && match == "" && len(resourceVersion) > 0 && resourceVersion != "0"
unsupportedMatch := match != "" && match != metav1.ResourceVersionMatchNotOlderThan || isLegacyExactMatch
return consistentReadFromStorage || hasContinuation || hasLimit || unsupportedMatch
return consistentReadFromStorage || hasContinuation || unsupportedMatch
}
// computeListLimit determines whether the cacher should
// apply a limit to an incoming LIST request and returns its value.
//
// note that this function doesn't check RVM nor the Continuation token.
// these parameters are validated by the shouldDelegateList function.
//
// as of today, the limit is ignored for requests that set RV == 0
func computeListLimit(opts storage.ListOptions) int64 {
if opts.Predicate.Limit <= 0 || opts.ResourceVersion == "0" {
return 0
}
return opts.Predicate.Limit
}
func shouldDelegateListOnNotReadyCache(opts storage.ListOptions) bool {
pred := opts.Predicate
noLabelSelector := pred.Label == nil || pred.Label.Empty()
noFieldSelector := pred.Field == nil || pred.Field.Empty()
hasLimit := pred.Limit > 0
return noLabelSelector && noFieldSelector && hasLimit
}
func (c *Cacher) listItems(ctx context.Context, listRV uint64, key string, pred storage.SelectionPredicate, recursive bool) ([]interface{}, uint64, string, error) {
@@ -746,7 +809,7 @@ func (c *Cacher) listItems(ctx context.Context, listRV uint64, key string, pred
}
return nil, readResourceVersion, "", nil
}
return c.watchCache.WaitUntilFreshAndList(ctx, listRV, pred.MatcherIndex())
return c.watchCache.WaitUntilFreshAndList(ctx, listRV, key, pred.MatcherIndex(ctx))
}
// GetList implements storage.Interface
@@ -762,12 +825,31 @@ func (c *Cacher) GetList(ctx context.Context, key string, opts storage.ListOptio
if err != nil {
return err
}
if listRV == 0 && !c.ready.check() {
// If Cacher is not yet initialized and we don't require any specific
// minimal resource version, simply forward the request to storage.
return c.storage.GetList(ctx, key, opts, listObj)
if utilfeature.DefaultFeatureGate.Enabled(features.ResilientWatchCacheInitialization) {
if !c.ready.check() && shouldDelegateListOnNotReadyCache(opts) {
// If Cacher is not initialized, delegate List requests to storage
// as described in https://kep.k8s.io/4568
return c.storage.GetList(ctx, key, opts, listObj)
}
} else {
if listRV == 0 && !c.ready.check() {
// If Cacher is not yet initialized and we don't require any specific
// minimal resource version, simply forward the request to storage.
return c.storage.GetList(ctx, key, opts, listObj)
}
}
if listRV == 0 && utilfeature.DefaultFeatureGate.Enabled(features.ConsistentListFromCache) {
// For recursive lists, we need to make sure the key ended with "/" so that we only
// get children "directories". e.g. if we have key "/a", "/a/b", "/ab", getting keys
// with prefix "/a" will return all three, while with prefix "/a/" will return only
// "/a/b" which is the correct answer.
preparedKey := key
if opts.Recursive && !strings.HasSuffix(key, "/") {
preparedKey += "/"
}
requestWatchProgressSupported := etcdfeature.DefaultFeatureSupportChecker.Supports(storage.RequestWatchProgress)
consistentRead := resourceVersion == "" && utilfeature.DefaultFeatureGate.Enabled(features.ConsistentListFromCache) && requestWatchProgressSupported
if consistentRead {
listRV, err = storage.GetCurrentResourceVersionFromStorage(ctx, c.storage, c.newListFunc, c.resourcePrefix, c.objectType.String())
if err != nil {
return err
@@ -779,8 +861,16 @@ func (c *Cacher) GetList(ctx context.Context, key string, opts storage.ListOptio
attribute.Stringer("type", c.groupResource))
defer span.End(500 * time.Millisecond)
if err := c.ready.wait(ctx); err != nil {
return errors.NewServiceUnavailable(err.Error())
if utilfeature.DefaultFeatureGate.Enabled(features.ResilientWatchCacheInitialization) {
if !c.ready.check() {
// If Cacher is not initialized, reject List requests
// as described in https://kep.k8s.io/4568
return errors.NewTooManyRequests("storage is (re)initializing", 1)
}
} else {
if err := c.ready.wait(ctx); err != nil {
return errors.NewServiceUnavailable(err.Error())
}
}
span.AddEvent("Ready")
@@ -796,25 +886,47 @@ func (c *Cacher) GetList(ctx context.Context, key string, opts storage.ListOptio
if listVal.Kind() != reflect.Slice {
return fmt.Errorf("need a pointer to slice, got %v", listVal.Kind())
}
filter := filterWithAttrsFunction(key, pred)
objs, readResourceVersion, indexUsed, err := c.listItems(ctx, listRV, key, pred, recursive)
objs, readResourceVersion, indexUsed, err := c.listItems(ctx, listRV, preparedKey, pred, recursive)
success := "true"
fallback := "false"
if err != nil {
if consistentRead {
if storage.IsTooLargeResourceVersion(err) {
fallback = "true"
err = c.storage.GetList(ctx, key, opts, listObj)
}
if err != nil {
success = "false"
}
metrics.ConsistentReadTotal.WithLabelValues(c.resourcePrefix, success, fallback).Add(1)
}
return err
}
if consistentRead {
metrics.ConsistentReadTotal.WithLabelValues(c.resourcePrefix, success, fallback).Add(1)
}
span.AddEvent("Listed items from cache", attribute.Int("count", len(objs)))
// store pointer of eligible objects,
// Why not directly put object in the items of listObj?
// the elements in ListObject are Struct type, making slice will bring excessive memory consumption.
// so we try to delay this action as much as possible
var selectedObjects []runtime.Object
for _, obj := range objs {
var lastSelectedObjectKey string
var hasMoreListItems bool
limit := computeListLimit(opts)
for i, obj := range objs {
elem, ok := obj.(*storeElement)
if !ok {
return fmt.Errorf("non *storeElement returned from storage: %v", obj)
}
if filter(elem.Key, elem.Labels, elem.Fields) {
if pred.MatchesObjectAttributes(elem.Labels, elem.Fields) {
selectedObjects = append(selectedObjects, elem.Object)
lastSelectedObjectKey = elem.Key
}
if limit > 0 && int64(len(selectedObjects)) >= limit {
hasMoreListItems = i < len(objs)-1
break
}
}
if len(selectedObjects) == 0 {
@@ -830,7 +942,12 @@ func (c *Cacher) GetList(ctx context.Context, key string, opts storage.ListOptio
}
span.AddEvent("Filtered items", attribute.Int("count", listVal.Len()))
if c.versioner != nil {
if err := c.versioner.UpdateList(listObj, readResourceVersion, "", nil); err != nil {
continueValue, remainingItemCount, err := storage.PrepareContinueToken(lastSelectedObjectKey, key, int64(readResourceVersion), int64(len(objs)), hasMoreListItems, opts)
if err != nil {
return err
}
if err = c.versioner.UpdateList(listObj, readResourceVersion, continueValue, remainingItemCount); err != nil {
return err
}
}
@@ -861,6 +978,14 @@ func (c *Cacher) Count(pathPrefix string) (int64, error) {
return c.storage.Count(pathPrefix)
}
// ReadinessCheck implements storage.Interface.
func (c *Cacher) ReadinessCheck() error {
if !c.ready.check() {
return storage.ErrStorageNotReady
}
return nil
}
// baseObjectThreadUnsafe omits locking for cachingObject.
func baseObjectThreadUnsafe(object runtime.Object) runtime.Object {
if co, ok := object.(*cachingObject); ok {
@@ -899,7 +1024,23 @@ func (c *Cacher) dispatchEvents() {
bookmarkTimer := c.clock.NewTimer(wait.Jitter(time.Second, 0.25))
defer bookmarkTimer.Stop()
// The internal informer populates the RV as soon as it conducts
// The first successful sync with the underlying store.
// The cache must wait until this first sync is completed to be deemed ready.
// Since we cannot send a bookmark when the lastProcessedResourceVersion is 0,
// we poll aggressively for the first list RV before entering the dispatch loop.
lastProcessedResourceVersion := uint64(0)
if err := wait.PollUntilContextCancel(wait.ContextForChannel(c.stopCh), 10*time.Millisecond, true, func(_ context.Context) (bool, error) {
if rv := c.watchCache.getListResourceVersion(); rv != 0 {
lastProcessedResourceVersion = rv
return true, nil
}
return false, nil
}); err != nil {
// given the function above never returns error,
// the non-empty error means that the stopCh was closed
return
}
for {
select {
case event, ok := <-c.incoming:
@@ -923,29 +1064,6 @@ func (c *Cacher) dispatchEvents() {
metrics.EventsCounter.WithLabelValues(c.groupResource.String()).Inc()
case <-bookmarkTimer.C():
bookmarkTimer.Reset(wait.Jitter(time.Second, 0.25))
// Never send a bookmark event if we did not see an event here, this is fine
// because we don't provide any guarantees on sending bookmarks.
//
// Just pop closed watchers and requeue others if needed.
//
// TODO(#115478): rework the following logic
// in a way that would allow more
// efficient cleanup of closed watchers
if lastProcessedResourceVersion == 0 {
func() {
c.Lock()
defer c.Unlock()
for _, watchers := range c.bookmarkWatchers.popExpiredWatchersThreadUnsafe() {
for _, watcher := range watchers {
if watcher.stopped {
continue
}
c.bookmarkWatchers.addWatcherThreadUnsafe(watcher)
}
}
}()
continue
}
bookmarkEvent := &watchCacheEvent{
Type: watch.Bookmark,
Object: c.newFunc(),
@@ -1225,7 +1343,7 @@ func forgetWatcher(c *Cacher, w *cacheWatcher, index int, scope namespacedName,
}
}
func filterWithAttrsFunction(key string, p storage.SelectionPredicate) filterWithAttrsFunc {
func filterWithAttrsAndPrefixFunction(key string, p storage.SelectionPredicate) filterWithAttrsFunc {
filterFunc := func(objKey string, label labels.Set, field fields.Set) bool {
if !hasPathPrefix(objKey, key) {
return false
@@ -1249,59 +1367,76 @@ func (c *Cacher) LastSyncResourceVersion() (uint64, error) {
// spits a ResourceVersion after which the bookmark event will be delivered.
//
// The returned function must be called under the watchCache lock.
func (c *Cacher) getBookmarkAfterResourceVersionLockedFunc(ctx context.Context, parsedResourceVersion uint64, opts storage.ListOptions) (func() uint64, error) {
if opts.SendInitialEvents == nil || !*opts.SendInitialEvents || !opts.Predicate.AllowWatchBookmarks {
func (c *Cacher) getBookmarkAfterResourceVersionLockedFunc(parsedResourceVersion, requiredResourceVersion uint64, opts storage.ListOptions) (func() uint64, error) {
if !isListWatchRequest(opts) {
return func() uint64 { return 0 }, nil
}
return c.getCommonResourceVersionLockedFunc(ctx, parsedResourceVersion, opts)
}
// getStartResourceVersionForWatchLockedFunc returns a function that
// spits a ResourceVersion the watch will be started from.
// Depending on the input parameters the semantics of the returned ResourceVersion are:
// - start at Exact (return parsedWatchResourceVersion)
// - start at Most Recent (return an RV from etcd)
// - start at Any (return the current watchCache's RV)
//
// The returned function must be called under the watchCache lock.
func (c *Cacher) getStartResourceVersionForWatchLockedFunc(ctx context.Context, parsedWatchResourceVersion uint64, opts storage.ListOptions) (func() uint64, error) {
if opts.SendInitialEvents == nil || *opts.SendInitialEvents {
return func() uint64 { return parsedWatchResourceVersion }, nil
}
return c.getCommonResourceVersionLockedFunc(ctx, parsedWatchResourceVersion, opts)
}
// getCommonResourceVersionLockedFunc a helper that simply computes a ResourceVersion
// based on the input parameters. Please examine callers of this method to get more context.
//
// The returned function must be called under the watchCache lock.
func (c *Cacher) getCommonResourceVersionLockedFunc(ctx context.Context, parsedWatchResourceVersion uint64, opts storage.ListOptions) (func() uint64, error) {
switch {
case len(opts.ResourceVersion) == 0:
rv, err := storage.GetCurrentResourceVersionFromStorage(ctx, c.storage, c.newListFunc, c.resourcePrefix, c.objectType.String())
if err != nil {
return nil, err
}
return func() uint64 { return rv }, nil
case parsedWatchResourceVersion == 0:
return func() uint64 { return requiredResourceVersion }, nil
case parsedResourceVersion == 0:
// here we assume that watchCache locked is already held
return func() uint64 { return c.watchCache.resourceVersion }, nil
default:
return func() uint64 { return parsedWatchResourceVersion }, nil
return func() uint64 { return parsedResourceVersion }, nil
}
}
func isListWatchRequest(opts storage.ListOptions) bool {
return opts.SendInitialEvents != nil && *opts.SendInitialEvents && opts.Predicate.AllowWatchBookmarks
}
// getWatchCacheResourceVersion returns a ResourceVersion to which the watch cache must be synchronized to
//
// Depending on the input parameters, the semantics of the returned ResourceVersion are:
// - must be at Exact RV (when parsedWatchResourceVersion > 0)
// - can be at Any RV (when parsedWatchResourceVersion = 0)
// - must be at Most Recent RV (return an RV from etcd)
//
// note that the above semantic is enforced by the API validation (defined elsewhere):
//
// if SendInitiaEvents != nil => ResourceVersionMatch = NotOlderThan
// if ResourceVersionmatch != nil => ResourceVersionMatch = NotOlderThan & SendInitialEvents != nil
func (c *Cacher) getWatchCacheResourceVersion(ctx context.Context, parsedWatchResourceVersion uint64, opts storage.ListOptions) (uint64, error) {
if len(opts.ResourceVersion) != 0 {
return parsedWatchResourceVersion, nil
}
// legacy case
if !utilfeature.DefaultFeatureGate.Enabled(features.WatchFromStorageWithoutResourceVersion) && opts.SendInitialEvents == nil && opts.ResourceVersion == "" {
return 0, nil
}
rv, err := storage.GetCurrentResourceVersionFromStorage(ctx, c.storage, c.newListFunc, c.resourcePrefix, c.objectType.String())
return rv, err
}
// waitUntilWatchCacheFreshAndForceAllEvents waits until cache is at least
// as fresh as given requestedWatchRV if sendInitialEvents was requested.
// Additionally, it instructs the caller whether it should ask for
// all events from the cache (full state) or not.
func (c *Cacher) waitUntilWatchCacheFreshAndForceAllEvents(ctx context.Context, requestedWatchRV uint64, opts storage.ListOptions) (bool, error) {
// otherwise, we allow for establishing the connection because the clients
// can wait for events without unnecessary blocking.
func (c *Cacher) waitUntilWatchCacheFreshAndForceAllEvents(ctx context.Context, requestedWatchRV uint64, opts storage.ListOptions) error {
if opts.SendInitialEvents != nil && *opts.SendInitialEvents {
// Here be dragons:
// Since the etcd feature checker needs to check all members
// to determine whether a given feature is supported,
// we may receive a positive response even if the feature is not supported.
//
// In this very rare scenario, the worst case will be that this
// request will wait for 3 seconds before it fails.
if etcdfeature.DefaultFeatureSupportChecker.Supports(storage.RequestWatchProgress) && c.watchCache.notFresh(requestedWatchRV) {
c.watchCache.waitingUntilFresh.Add()
defer c.watchCache.waitingUntilFresh.Remove()
}
err := c.watchCache.waitUntilFreshAndBlock(ctx, requestedWatchRV)
defer c.watchCache.RUnlock()
return err == nil, err
return err
}
return false, nil
return nil
}
// Wait blocks until the cacher is Ready or Stopped, it returns an error if Stopped.
func (c *Cacher) Wait(ctx context.Context) error {
return c.ready.wait(ctx)
}
// errWatcher implements watch.Interface to return a single error

View File

@@ -19,6 +19,8 @@ package cacher
import (
"context"
"google.golang.org/grpc/metadata"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
@@ -30,17 +32,19 @@ import (
// listerWatcher opaques storage.Interface to expose cache.ListerWatcher.
type listerWatcher struct {
storage storage.Interface
resourcePrefix string
newListFunc func() runtime.Object
storage storage.Interface
resourcePrefix string
newListFunc func() runtime.Object
contextMetadata metadata.MD
}
// NewListerWatcher returns a storage.Interface backed ListerWatcher.
func NewListerWatcher(storage storage.Interface, resourcePrefix string, newListFunc func() runtime.Object) cache.ListerWatcher {
func NewListerWatcher(storage storage.Interface, resourcePrefix string, newListFunc func() runtime.Object, contextMetadata metadata.MD) cache.ListerWatcher {
return &listerWatcher{
storage: storage,
resourcePrefix: resourcePrefix,
newListFunc: newListFunc,
storage: storage,
resourcePrefix: resourcePrefix,
newListFunc: newListFunc,
contextMetadata: contextMetadata,
}
}
@@ -59,7 +63,11 @@ func (lw *listerWatcher) List(options metav1.ListOptions) (runtime.Object, error
Predicate: pred,
Recursive: true,
}
if err := lw.storage.GetList(context.TODO(), lw.resourcePrefix, storageOpts, list); err != nil {
ctx := context.Background()
if lw.contextMetadata != nil {
ctx = metadata.NewOutgoingContext(ctx, lw.contextMetadata)
}
if err := lw.storage.GetList(ctx, lw.resourcePrefix, storageOpts, list); err != nil {
return nil, err
}
return list, nil
@@ -73,5 +81,9 @@ func (lw *listerWatcher) Watch(options metav1.ListOptions) (watch.Interface, err
Recursive: true,
ProgressNotify: true,
}
return lw.storage.Watch(context.TODO(), lw.resourcePrefix, opts)
ctx := context.Background()
if lw.contextMetadata != nil {
ctx = metadata.NewOutgoingContext(ctx, lw.contextMetadata)
}
return lw.storage.Watch(ctx, lw.resourcePrefix, opts)
}

View File

@@ -106,6 +106,17 @@ var (
[]string{"resource"},
)
watchCacheResourceVersion = compbasemetrics.NewGaugeVec(
&compbasemetrics.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "resource_version",
Help: "Current resource version of watch cache broken by resource type.",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"resource"},
)
watchCacheCapacityIncreaseTotal = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Subsystem: subsystem,
@@ -146,6 +157,25 @@ var (
},
[]string{"resource"},
)
WatchCacheReadWait = compbasemetrics.NewHistogramVec(
&compbasemetrics.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "read_wait_seconds",
Help: "Histogram of time spent waiting for a watch cache to become fresh.",
StabilityLevel: compbasemetrics.ALPHA,
Buckets: []float64{0.005, 0.025, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.25, 1.5, 2, 3},
}, []string{"resource"})
ConsistentReadTotal = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "consistent_read_total",
Help: "Counter for consistent reads from cache.",
StabilityLevel: compbasemetrics.ALPHA,
}, []string{"resource", "success", "fallback"})
)
var registerMetrics sync.Once
@@ -161,10 +191,13 @@ func Register() {
legacyregistry.MustRegister(EventsReceivedCounter)
legacyregistry.MustRegister(EventsCounter)
legacyregistry.MustRegister(TerminatedWatchersCounter)
legacyregistry.MustRegister(watchCacheResourceVersion)
legacyregistry.MustRegister(watchCacheCapacityIncreaseTotal)
legacyregistry.MustRegister(watchCacheCapacityDecreaseTotal)
legacyregistry.MustRegister(WatchCacheCapacity)
legacyregistry.MustRegister(WatchCacheInitializations)
legacyregistry.MustRegister(WatchCacheReadWait)
legacyregistry.MustRegister(ConsistentReadTotal)
})
}
@@ -175,6 +208,11 @@ func RecordListCacheMetrics(resourcePrefix, indexName string, numFetched, numRet
listCacheNumReturned.WithLabelValues(resourcePrefix).Add(float64(numReturned))
}
// RecordResourceVersion sets the current resource version for a given resource type.
func RecordResourceVersion(resourcePrefix string, resourceVersion uint64) {
watchCacheResourceVersion.WithLabelValues(resourcePrefix).Set(float64(resourceVersion))
}
// RecordsWatchCacheCapacityChange record watchCache capacity resize(increase or decrease) operations.
func RecordsWatchCacheCapacityChange(objType string, old, new int) {
WatchCacheCapacity.WithLabelValues(objType).Set(float64(new))

View File

@@ -44,17 +44,3 @@ func hasPathPrefix(s, pathPrefix string) bool {
}
return false
}
func max(a, b int) int {
if a > b {
return a
}
return b
}
func min(a, b int) int {
if a < b {
return a
}
return b
}

View File

@@ -33,6 +33,7 @@ import (
"k8s.io/apiserver/pkg/features"
"k8s.io/apiserver/pkg/storage"
"k8s.io/apiserver/pkg/storage/cacher/metrics"
etcdfeature "k8s.io/apiserver/pkg/storage/feature"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/tools/cache"
"k8s.io/component-base/tracing"
@@ -311,25 +312,26 @@ func (w *watchCache) processEvent(event watch.Event, resourceVersion uint64, upd
RecordTime: w.clock.Now(),
}
// We can call w.store.Get() outside of a critical section,
// because the w.store itself is thread-safe and the only
// place where w.store is modified is below (via updateFunc)
// and these calls are serialized because reflector is processing
// events one-by-one.
previous, exists, err := w.store.Get(elem)
if err != nil {
return err
}
if exists {
previousElem := previous.(*storeElement)
wcEvent.PrevObject = previousElem.Object
wcEvent.PrevObjLabels = previousElem.Labels
wcEvent.PrevObjFields = previousElem.Fields
}
if err := func() error {
// TODO: We should consider moving this lock below after the watchCacheEvent
// is created. In such situation, the only problematic scenario is Replace()
// happening after getting object from store and before acquiring a lock.
// Maybe introduce another lock for this purpose.
w.Lock()
defer w.Unlock()
previous, exists, err := w.store.Get(elem)
if err != nil {
return err
}
if exists {
previousElem := previous.(*storeElement)
wcEvent.PrevObject = previousElem.Object
wcEvent.PrevObjLabels = previousElem.Labels
wcEvent.PrevObjFields = previousElem.Fields
}
w.updateCache(wcEvent)
w.resourceVersion = resourceVersion
defer w.cond.Broadcast()
@@ -346,6 +348,7 @@ func (w *watchCache) processEvent(event watch.Event, resourceVersion uint64, upd
if w.eventHandler != nil {
w.eventHandler(wcEvent)
}
metrics.RecordResourceVersion(w.groupResource.String(), resourceVersion)
return nil
}
@@ -428,6 +431,7 @@ func (w *watchCache) UpdateResourceVersion(resourceVersion string) {
}
w.eventHandler(wcEvent)
}
metrics.RecordResourceVersion(w.groupResource.String(), rv)
}
// List returns list of pointers to <storeElement> objects.
@@ -440,6 +444,11 @@ func (w *watchCache) List() []interface{} {
// You HAVE TO explicitly call w.RUnlock() after this function.
func (w *watchCache) waitUntilFreshAndBlock(ctx context.Context, resourceVersion uint64) error {
startTime := w.clock.Now()
defer func() {
if resourceVersion > 0 {
metrics.WatchCacheReadWait.WithContext(ctx).WithLabelValues(w.groupResource.String()).Observe(w.clock.Since(startTime).Seconds())
}
}()
// In case resourceVersion is 0, we accept arbitrarily stale result.
// As a result, the condition in the below for loop will never be
@@ -492,21 +501,44 @@ func (s sortableStoreElements) Swap(i, j int) {
// WaitUntilFreshAndList returns list of pointers to `storeElement` objects along
// with their ResourceVersion and the name of the index, if any, that was used.
func (w *watchCache) WaitUntilFreshAndList(ctx context.Context, resourceVersion uint64, matchValues []storage.MatchValue) ([]interface{}, uint64, string, error) {
var err error
if utilfeature.DefaultFeatureGate.Enabled(features.ConsistentListFromCache) && w.notFresh(resourceVersion) {
func (w *watchCache) WaitUntilFreshAndList(ctx context.Context, resourceVersion uint64, key string, matchValues []storage.MatchValue) ([]interface{}, uint64, string, error) {
items, rv, index, err := w.waitUntilFreshAndListItems(ctx, resourceVersion, key, matchValues)
if err != nil {
return nil, 0, "", err
}
var result []interface{}
for _, item := range items {
elem, ok := item.(*storeElement)
if !ok {
return nil, 0, "", fmt.Errorf("non *storeElement returned from storage: %v", item)
}
if !hasPathPrefix(elem.Key, key) {
continue
}
result = append(result, item)
}
sort.Sort(sortableStoreElements(result))
return result, rv, index, nil
}
func (w *watchCache) waitUntilFreshAndListItems(ctx context.Context, resourceVersion uint64, key string, matchValues []storage.MatchValue) (result []interface{}, rv uint64, index string, err error) {
requestWatchProgressSupported := etcdfeature.DefaultFeatureSupportChecker.Supports(storage.RequestWatchProgress)
if utilfeature.DefaultFeatureGate.Enabled(features.ConsistentListFromCache) && requestWatchProgressSupported && w.notFresh(resourceVersion) {
w.waitingUntilFresh.Add()
err = w.waitUntilFreshAndBlock(ctx, resourceVersion)
w.waitingUntilFresh.Remove()
} else {
err = w.waitUntilFreshAndBlock(ctx, resourceVersion)
}
defer w.RUnlock()
if err != nil {
return nil, 0, "", err
return result, rv, index, err
}
result, rv, index, err := func() ([]interface{}, uint64, string, error) {
result, rv, index, err = func() ([]interface{}, uint64, string, error) {
// This isn't the place where we do "final filtering" - only some "prefiltering" is happening here. So the only
// requirement here is to NOT miss anything that should be returned. We can return as many non-matching items as we
// want - they will be filtered out later. The fact that we return less things is only further performance improvement.
@@ -519,7 +551,6 @@ func (w *watchCache) WaitUntilFreshAndList(ctx context.Context, resourceVersion
return w.store.List(), w.resourceVersion, "", nil
}()
sort.Sort(sortableStoreElements(result))
return result, rv, index, err
}
@@ -531,7 +562,14 @@ func (w *watchCache) notFresh(resourceVersion uint64) bool {
// WaitUntilFreshAndGet returns a pointers to <storeElement> object.
func (w *watchCache) WaitUntilFreshAndGet(ctx context.Context, resourceVersion uint64, key string) (interface{}, bool, uint64, error) {
err := w.waitUntilFreshAndBlock(ctx, resourceVersion)
var err error
if utilfeature.DefaultFeatureGate.Enabled(features.ConsistentListFromCache) && w.notFresh(resourceVersion) {
w.waitingUntilFresh.Add()
err = w.waitUntilFreshAndBlock(ctx, resourceVersion)
w.waitingUntilFresh.Remove()
} else {
err = w.waitUntilFreshAndBlock(ctx, resourceVersion)
}
defer w.RUnlock()
if err != nil {
return nil, false, 0, err
@@ -614,7 +652,9 @@ func (w *watchCache) Replace(objs []interface{}, resourceVersion string) error {
w.onReplace()
}
w.cond.Broadcast()
klog.V(3).Infof("Replace watchCache (rev: %v) ", resourceVersion)
metrics.RecordResourceVersion(w.groupResource.String(), version)
klog.V(3).Infof("Replaced watchCache (rev: %v) ", resourceVersion)
return nil
}
@@ -629,6 +669,12 @@ func (w *watchCache) Resync() error {
return nil
}
func (w *watchCache) getListResourceVersion() uint64 {
w.RLock()
defer w.RUnlock()
return w.listResourceVersion
}
func (w *watchCache) currentCapacity() int {
w.RLock()
defer w.RUnlock()
@@ -691,7 +737,12 @@ func (w *watchCache) isIndexValidLocked(index int) bool {
// getAllEventsSinceLocked returns a watchCacheInterval that can be used to
// retrieve events since a certain resourceVersion. This function assumes to
// be called under the watchCache lock.
func (w *watchCache) getAllEventsSinceLocked(resourceVersion uint64) (*watchCacheInterval, error) {
func (w *watchCache) getAllEventsSinceLocked(resourceVersion uint64, key string, opts storage.ListOptions) (*watchCacheInterval, error) {
_, matchesSingle := opts.Predicate.MatchesSingle()
if opts.SendInitialEvents != nil && *opts.SendInitialEvents {
return w.getIntervalFromStoreLocked(key, matchesSingle)
}
size := w.endIndex - w.startIndex
var oldest uint64
switch {
@@ -711,13 +762,19 @@ func (w *watchCache) getAllEventsSinceLocked(resourceVersion uint64) (*watchCach
}
if resourceVersion == 0 {
// resourceVersion = 0 means that we don't require any specific starting point
// and we would like to start watching from ~now.
// However, to keep backward compatibility, we additionally need to return the
// current state and only then start watching from that point.
//
// TODO: In v2 api, we should stop returning the current state - #13969.
return w.getIntervalFromStoreLocked()
if opts.SendInitialEvents == nil {
// resourceVersion = 0 means that we don't require any specific starting point
// and we would like to start watching from ~now.
// However, to keep backward compatibility, we additionally need to return the
// current state and only then start watching from that point.
//
// TODO: In v2 api, we should stop returning the current state - #13969.
return w.getIntervalFromStoreLocked(key, matchesSingle)
}
// SendInitialEvents = false and resourceVersion = 0
// means that the request would like to start watching
// from Any resourceVersion
resourceVersion = w.resourceVersion
}
if resourceVersion < oldest-1 {
return nil, errors.NewResourceExpired(fmt.Sprintf("too old resource version: %d (%d)", resourceVersion, oldest-1))
@@ -731,15 +788,15 @@ func (w *watchCache) getAllEventsSinceLocked(resourceVersion uint64) (*watchCach
indexerFunc := func(i int) *watchCacheEvent {
return w.cache[i%w.capacity]
}
ci := newCacheInterval(w.startIndex+first, w.endIndex, indexerFunc, w.indexValidator, &w.RWMutex)
ci := newCacheInterval(w.startIndex+first, w.endIndex, indexerFunc, w.indexValidator, w.RWMutex.RLocker())
return ci, nil
}
// getIntervalFromStoreLocked returns a watchCacheInterval
// that covers the entire storage state.
// This function assumes to be called under the watchCache lock.
func (w *watchCache) getIntervalFromStoreLocked() (*watchCacheInterval, error) {
ci, err := newCacheIntervalFromStore(w.resourceVersion, w.store, w.getAttrsFunc)
func (w *watchCache) getIntervalFromStoreLocked(key string, matchesSingle bool) (*watchCacheInterval, error) {
ci, err := newCacheIntervalFromStore(w.resourceVersion, w.store, w.getAttrsFunc, key, matchesSingle)
if err != nil {
return nil, err
}

View File

@@ -18,6 +18,7 @@ package cacher
import (
"fmt"
"sort"
"sync"
"k8s.io/apimachinery/pkg/fields"
@@ -114,12 +115,40 @@ func newCacheInterval(startIndex, endIndex int, indexer indexerFunc, indexValida
}
}
type sortableWatchCacheEvents []*watchCacheEvent
func (s sortableWatchCacheEvents) Len() int {
return len(s)
}
func (s sortableWatchCacheEvents) Less(i, j int) bool {
return s[i].Key < s[j].Key
}
func (s sortableWatchCacheEvents) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}
// newCacheIntervalFromStore is meant to handle the case of rv=0, such that the events
// returned by Next() need to be events from a List() done on the underlying store of
// the watch cache.
func newCacheIntervalFromStore(resourceVersion uint64, store cache.Indexer, getAttrsFunc attrFunc) (*watchCacheInterval, error) {
// The items returned in the interval will be sorted by Key.
func newCacheIntervalFromStore(resourceVersion uint64, store cache.Indexer, getAttrsFunc attrFunc, key string, matchesSingle bool) (*watchCacheInterval, error) {
buffer := &watchCacheIntervalBuffer{}
allItems := store.List()
var allItems []interface{}
if matchesSingle {
item, exists, err := store.GetByKey(key)
if err != nil {
return nil, err
}
if exists {
allItems = append(allItems, item)
}
} else {
allItems = store.List()
}
buffer.buffer = make([]*watchCacheEvent, len(allItems))
for i, item := range allItems {
elem, ok := item.(*storeElement)
@@ -140,6 +169,7 @@ func newCacheIntervalFromStore(resourceVersion uint64, store cache.Indexer, getA
}
buffer.endIndex++
}
sort.Sort(sortableWatchCacheEvents(buffer.buffer))
ci := &watchCacheInterval{
startIndex: 0,
// Simulate that we already have all the events we're looking for.

View File

@@ -21,6 +21,8 @@ import (
"sync"
"time"
"google.golang.org/grpc/metadata"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/wait"
@@ -34,19 +36,20 @@ const (
progressRequestPeriod = 100 * time.Millisecond
)
func newConditionalProgressRequester(requestWatchProgress WatchProgressRequester, clock TickerFactory) *conditionalProgressRequester {
func newConditionalProgressRequester(requestWatchProgress WatchProgressRequester, clock TickerFactory, contextMetadata metadata.MD) *conditionalProgressRequester {
pr := &conditionalProgressRequester{
clock: clock,
requestWatchProgress: requestWatchProgress,
contextMetadata: contextMetadata,
}
pr.cond = sync.NewCond(pr.mux.RLocker())
pr.cond = sync.NewCond(&pr.mux)
return pr
}
type WatchProgressRequester func(ctx context.Context) error
type TickerFactory interface {
NewTicker(time.Duration) clock.Ticker
NewTimer(time.Duration) clock.Timer
}
// conditionalProgressRequester will request progress notification if there
@@ -54,8 +57,9 @@ type TickerFactory interface {
type conditionalProgressRequester struct {
clock TickerFactory
requestWatchProgress WatchProgressRequester
contextMetadata metadata.MD
mux sync.RWMutex
mux sync.Mutex
cond *sync.Cond
waiting int
stopped bool
@@ -63,6 +67,9 @@ type conditionalProgressRequester struct {
func (pr *conditionalProgressRequester) Run(stopCh <-chan struct{}) {
ctx := wait.ContextForChannel(stopCh)
if pr.contextMetadata != nil {
ctx = metadata.NewOutgoingContext(ctx, pr.contextMetadata)
}
go func() {
defer utilruntime.HandleCrash()
<-stopCh
@@ -71,12 +78,12 @@ func (pr *conditionalProgressRequester) Run(stopCh <-chan struct{}) {
pr.stopped = true
pr.cond.Signal()
}()
ticker := pr.clock.NewTicker(progressRequestPeriod)
defer ticker.Stop()
timer := pr.clock.NewTimer(progressRequestPeriod)
defer timer.Stop()
for {
stopped := func() bool {
pr.mux.RLock()
defer pr.mux.RUnlock()
pr.mux.Lock()
defer pr.mux.Unlock()
for pr.waiting == 0 && !pr.stopped {
pr.cond.Wait()
}
@@ -87,15 +94,17 @@ func (pr *conditionalProgressRequester) Run(stopCh <-chan struct{}) {
}
select {
case <-ticker.C():
case <-timer.C():
shouldRequest := func() bool {
pr.mux.RLock()
defer pr.mux.RUnlock()
pr.mux.Lock()
defer pr.mux.Unlock()
return pr.waiting > 0 && !pr.stopped
}()
if !shouldRequest {
timer.Reset(0)
continue
}
timer.Reset(progressRequestPeriod)
err := pr.requestWatchProgress(ctx)
if err != nil {
klog.V(4).InfoS("Error requesting bookmark", "err", err)
@@ -117,5 +126,4 @@ func (pr *conditionalProgressRequester) Remove() {
pr.mux.Lock()
defer pr.mux.Unlock()
pr.waiting -= 1
pr.cond.Signal()
}