feat: kubesphere 4.0 (#6115)

* feat: kubesphere 4.0 Signed-off-by: ci-bot <ci-bot@kubesphere.io> * feat: kubesphere 4.0 Signed-off-by: ci-bot <ci-bot@kubesphere.io> --------- Signed-off-by: ci-bot <ci-bot@kubesphere.io> Co-authored-by: ks-ci-bot <ks-ci-bot@example.com> Co-authored-by: joyceliu <joyceliu@yunify.com>
2024-09-06 11:05:52 +08:00
parent b5015ec7b9
commit 447a51f08b
8557 changed files with 546695 additions and 1146174 deletions
--- a/vendor/k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing/interface.go
+++ b/vendor/k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing/interface.go
@@ -18,7 +18,6 @@ package fairqueuing

 import (
 	"context"
-	"time"

 	"k8s.io/apiserver/pkg/util/flowcontrol/debug"
 	"k8s.io/apiserver/pkg/util/flowcontrol/metrics"
@@ -34,7 +33,10 @@ type QueueSetFactory interface {
 	// BeginConstruction does the first phase of creating a QueueSet.
 	// The RatioedGaugePair observes number of requests,
 	// execution covering just the regular phase.
+	// The denominator for the waiting phase is
+	// max(1, QueuingConfig.QueueLengthLimit) X max(1, QueuingConfig.DesiredNumQueues).
 	// The RatioedGauge observes number of seats occupied through all phases of execution.
+	// The denominator for all the ratioed concurrency gauges is supplied later in the DispatchingConfig.
 	// The Gauge observes the seat demand (executing + queued seats).
 	BeginConstruction(QueuingConfig, metrics.RatioedGaugePair, metrics.RatioedGauge, metrics.Gauge) (QueueSetCompleter, error)
 }
@@ -113,8 +115,11 @@ type QueuingConfig struct {
 	Name string

 	// DesiredNumQueues is the number of queues that the API says
-	// should exist now.  This may be zero, in which case
-	// QueueLengthLimit, HandSize, and RequestWaitLimit are ignored.
+	// should exist now.  This may be non-positive, in which case
+	// QueueLengthLimit, and HandSize are ignored.
+	// A value of zero means to respect the ConcurrencyLimit of the DispatchingConfig.
+	// A negative value means to always dispatch immediately upon arrival
+	// (i.e., the requests are "exempt" from limitation).
 	DesiredNumQueues int

 	// QueueLengthLimit is the maximum number of requests that may be waiting in a given queue at a time
@@ -123,14 +128,14 @@ type QueuingConfig struct {
 	// HandSize is a parameter of shuffle sharding.  Upon arrival of a request, a queue is chosen by randomly
 	// dealing a "hand" of this many queues and then picking one of minimum length.
 	HandSize int
-
-	// RequestWaitLimit is the maximum amount of time that a request may wait in a queue.
-	// If, by the end of that time, the request has not been dispatched then it is rejected.
-	RequestWaitLimit time.Duration
 }

 // DispatchingConfig defines the configuration of the dispatching aspect of a QueueSet.
 type DispatchingConfig struct {
 	// ConcurrencyLimit is the maximum number of requests of this QueueSet that may be executing at a time
 	ConcurrencyLimit int
+
+	// ConcurrencyDenominator is used in relative metrics of concurrency.
+	// It equals ConcurrencyLimit except when that is zero.
+	ConcurrencyDenominator int
 }
--- a/vendor/k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing/promise/promise.go
+++ b/vendor/k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing/promise/promise.go
@@ -17,12 +17,13 @@ limitations under the License.
 package promise

 import (
+	"context"
 	"sync"
 )

 // promise implements the WriteOnce interface.
 type promise struct {
-	doneCh  <-chan struct{}
+	doneCtx context.Context
 	doneVal interface{}
 	setCh   chan struct{}
 	onceler sync.Once
@@ -35,12 +36,12 @@ var _ WriteOnce = &promise{}
 //
 // If `initial` is non-nil then that value is Set at creation time.
 //
-// If a `Get` is waiting soon after `doneCh` becomes selectable (which
-// never happens for the nil channel) then `Set(doneVal)` effectively
-// happens at that time.
-func NewWriteOnce(initial interface{}, doneCh <-chan struct{}, doneVal interface{}) WriteOnce {
+// If a `Get` is waiting soon after the channel associated with the
+// `doneCtx` becomes selectable (which never happens for the nil
+// channel) then `Set(doneVal)` effectively happens at that time.
+func NewWriteOnce(initial interface{}, doneCtx context.Context, doneVal interface{}) WriteOnce {
 	p := &promise{
-		doneCh:  doneCh,
+		doneCtx: doneCtx,
 		doneVal: doneVal,
 		setCh:   make(chan struct{}),
 	}
@@ -53,7 +54,7 @@ func NewWriteOnce(initial interface{}, doneCh <-chan struct{}, doneVal interface
 func (p *promise) Get() interface{} {
 	select {
 	case <-p.setCh:
-	case <-p.doneCh:
+	case <-p.doneCtx.Done():
 		p.Set(p.doneVal)
 	}
 	return p.value
--- a/vendor/k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing/queueset/queueset.go
+++ b/vendor/k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing/queueset/queueset.go
@@ -24,6 +24,7 @@ import (
 	"sync"
 	"time"

+	"k8s.io/apimachinery/pkg/util/sets"
 	"k8s.io/apiserver/pkg/util/flowcontrol/debug"
 	fq "k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing"
 	"k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing/eventclock"
@@ -52,7 +53,7 @@ type queueSetFactory struct {
 // - whose Set method is invoked with the queueSet locked, and
 // - whose Get method is invoked with the queueSet not locked.
 // The parameters are the same as for `promise.NewWriteOnce`.
-type promiseFactory func(initial interface{}, doneCh <-chan struct{}, doneVal interface{}) promise.WriteOnce
+type promiseFactory func(initial interface{}, doneCtx context.Context, doneVal interface{}) promise.WriteOnce

 // promiseFactoryFactory returns the promiseFactory to use for the given queueSet
 type promiseFactoryFactory func(*queueSet) promiseFactory
@@ -138,6 +139,10 @@ type queueSet struct {
 	// from that queue.
 	totRequestsExecuting int

+	// requestsExecutingSet is the set of requests executing in the real world IF
+	// there are no queues; otherwise the requests are tracked in the queues.
+	requestsExecutingSet sets.Set[*request]
+
 	// totSeatsInUse is the number of total "seats" in use by all the
 	// request(s) that are currently executing in this queueset.
 	totSeatsInUse int
@@ -148,6 +153,22 @@ type queueSet struct {

 	// enqueues is the number of requests that have ever been enqueued
 	enqueues int
+
+	// totRequestsDispatched is the total number of requests of this
+	// queueSet that have been processed.
+	totRequestsDispatched int
+
+	// totRequestsRejected is the total number of requests of this
+	// queueSet that have been rejected.
+	totRequestsRejected int
+
+	// totRequestsTimedout is the total number of requests of this
+	// queueSet that have been timeouted.
+	totRequestsTimedout int
+
+	// totRequestsCancelled is the total number of requests of this
+	// queueSet that have been cancelled.
+	totRequestsCancelled int
 }

 // NewQueueSetFactory creates a new QueueSetFactory object
@@ -181,7 +202,7 @@ func (qsf *queueSetFactory) BeginConstruction(qCfg fq.QueuingConfig, reqsGaugePa
 // calls for one, and returns a non-nil error if the given config is
 // invalid.
 func checkConfig(qCfg fq.QueuingConfig) (*shufflesharding.Dealer, error) {
-	if qCfg.DesiredNumQueues == 0 {
+	if qCfg.DesiredNumQueues <= 0 {
 		return nil, nil
 	}
 	dealer, err := shufflesharding.NewDealer(qCfg.DesiredNumQueues, qCfg.HandSize)
@@ -203,6 +224,7 @@ func (qsc *queueSetCompleter) Complete(dCfg fq.DispatchingConfig) fq.QueueSet {
 			qCfg:                     qsc.qCfg,
 			currentR:                 0,
 			lastRealTime:             qsc.factory.clock.Now(),
+			requestsExecutingSet:     sets.New[*request](),
 		}
 		qs.promiseFactory = qsc.factory.promiseFactoryFactory(qs)
 	}
@@ -214,7 +236,7 @@ func (qsc *queueSetCompleter) Complete(dCfg fq.DispatchingConfig) fq.QueueSet {
 func createQueues(n, baseIndex int) []*queue {
 	fqqueues := make([]*queue, n)
 	for i := 0; i < n; i++ {
-		fqqueues[i] = &queue{index: baseIndex + i, requests: newRequestFIFO()}
+		fqqueues[i] = &queue{index: baseIndex + i, requestsWaiting: newRequestFIFO(), requestsExecuting: sets.New[*request]()}
 	}
 	return fqqueues
 }
@@ -250,7 +272,6 @@ func (qs *queueSet) setConfiguration(ctx context.Context, qCfg fq.QueuingConfig,
 	} else {
 		qCfg.QueueLengthLimit = qs.qCfg.QueueLengthLimit
 		qCfg.HandSize = qs.qCfg.HandSize
-		qCfg.RequestWaitLimit = qs.qCfg.RequestWaitLimit
 	}

 	qs.qCfg = qCfg
@@ -264,8 +285,8 @@ func (qs *queueSet) setConfiguration(ctx context.Context, qCfg fq.QueuingConfig,
 		qll *= qCfg.DesiredNumQueues
 	}
 	qs.reqsGaugePair.RequestsWaiting.SetDenominator(float64(qll))
-	qs.reqsGaugePair.RequestsExecuting.SetDenominator(float64(dCfg.ConcurrencyLimit))
-	qs.execSeatsGauge.SetDenominator(float64(dCfg.ConcurrencyLimit))
+	qs.reqsGaugePair.RequestsExecuting.SetDenominator(float64(dCfg.ConcurrencyDenominator))
+	qs.execSeatsGauge.SetDenominator(float64(dCfg.ConcurrencyDenominator))

 	qs.dispatchAsMuchAsPossibleLocked()
 }
@@ -278,9 +299,6 @@ const (
 	// Serve this one
 	decisionExecute requestDecision = iota

-	// Reject this one due to APF queuing considerations
-	decisionReject
-
 	// This one's context timed out / was canceled
 	decisionCancel
 )
@@ -304,6 +322,7 @@ func (qs *queueSet) StartRequest(ctx context.Context, workEstimate *fqrequest.Wo
 		if !qs.canAccommodateSeatsLocked(workEstimate.MaxSeats()) {
 			klog.V(5).Infof("QS(%s): rejecting request %q %#+v %#+v because %d seats are asked for, %d seats are in use (%d are executing) and the limit is %d",
 				qs.qCfg.Name, fsName, descr1, descr2, workEstimate, qs.totSeatsInUse, qs.totRequestsExecuting, qs.dCfg.ConcurrencyLimit)
+			qs.totRequestsRejected++
 			metrics.AddReject(ctx, qs.qCfg.Name, fsName, "concurrency-limit")
 			return nil, qs.isIdleLocked()
 		}
@@ -314,15 +333,15 @@ func (qs *queueSet) StartRequest(ctx context.Context, workEstimate *fqrequest.Wo
 	// ========================================================================
 	// Step 1:
 	// 1) Start with shuffle sharding, to pick a queue.
-	// 2) Reject old requests that have been waiting too long
-	// 3) Reject current request if there is not enough concurrency shares and
+	// 2) Reject current request if there is not enough concurrency shares and
 	// we are at max queue length
-	// 4) If not rejected, create a request and enqueue
-	req = qs.timeoutOldRequestsAndRejectOrEnqueueLocked(ctx, workEstimate, hashValue, flowDistinguisher, fsName, descr1, descr2, queueNoteFn)
+	// 3) If not rejected, create a request and enqueue
+	req = qs.shuffleShardAndRejectOrEnqueueLocked(ctx, workEstimate, hashValue, flowDistinguisher, fsName, descr1, descr2, queueNoteFn)
 	// req == nil means that the request was rejected - no remaining
 	// concurrency shares and at max queue length already
 	if req == nil {
 		klog.V(5).Infof("QS(%s): rejecting request %q %#+v %#+v due to queue full", qs.qCfg.Name, fsName, descr1, descr2)
+		qs.totRequestsRejected++
 		metrics.AddReject(ctx, qs.qCfg.Name, fsName, "queue-full")
 		return nil, qs.isIdleLocked()
 	}
@@ -398,11 +417,7 @@ func (req *request) wait() (bool, bool) {
 	}
 	req.waitStarted = true
 	switch decisionAny {
-	case decisionReject:
-		klog.V(5).Infof("QS(%s): request %#+v %#+v timed out after being enqueued\n", qs.qCfg.Name, req.descr1, req.descr2)
-		metrics.AddReject(req.ctx, qs.qCfg.Name, req.fsName, "time-out")
-		return false, qs.isIdleLocked()
-	case decisionCancel:
+	case decisionCancel: // handle in code following this switch
 	case decisionExecute:
 		klog.V(5).Infof("QS(%s): Dispatching request %#+v %#+v from its queue", qs.qCfg.Name, req.descr1, req.descr2)
 		return true, false
@@ -412,14 +427,17 @@ func (req *request) wait() (bool, bool) {
 	}
 	// TODO(aaron-prindle) add metrics for this case
 	klog.V(5).Infof("QS(%s): Ejecting request %#+v %#+v from its queue", qs.qCfg.Name, req.descr1, req.descr2)
-	// remove the request from the queue as it has timed out
+	// remove the request from the queue as its queue wait time has exceeded
 	queue := req.queue
 	if req.removeFromQueueLocked() != nil {
 		defer qs.boundNextDispatchLocked(queue)
 		qs.totRequestsWaiting--
 		qs.totSeatsWaiting -= req.MaxSeats()
-		metrics.AddReject(req.ctx, qs.qCfg.Name, req.fsName, "cancelled")
+		qs.totRequestsRejected++
+		qs.totRequestsCancelled++
+		metrics.AddReject(req.ctx, qs.qCfg.Name, req.fsName, "time-out")
 		metrics.AddRequestsInQueues(req.ctx, qs.qCfg.Name, req.fsName, -1)
+		metrics.AddSeatsInQueues(req.ctx, qs.qCfg.Name, req.fsName, -req.MaxSeats())
 		req.NoteQueued(false)
 		qs.reqsGaugePair.RequestsWaiting.Add(-1)
 		qs.seatDemandIntegrator.Set(float64(qs.totSeatsInUse + qs.totSeatsWaiting))
@@ -482,7 +500,7 @@ func (qs *queueSet) advanceEpoch(ctx context.Context, now time.Time, incrR fqreq
 	klog.InfoS("Advancing epoch", "QS", qs.qCfg.Name, "when", now.Format(nsTimeFmt), "oldR", oldR, "newR", qs.currentR, "incrR", incrR)
 	success := true
 	for qIdx, queue := range qs.queues {
-		if queue.requests.Length() == 0 && queue.requestsExecuting == 0 {
+		if queue.requestsWaiting.Length() == 0 && queue.requestsExecuting.Len() == 0 {
 			// Do not just decrement, the value could be quite outdated.
 			// It is safe to reset to zero in this case, because the next request
 			// will overwrite the zero with `qs.currentR`.
@@ -495,7 +513,7 @@ func (qs *queueSet) advanceEpoch(ctx context.Context, now time.Time, incrR fqreq
 			klog.ErrorS(errors.New("queue::nextDispatchR underflow"), "Underflow", "QS", qs.qCfg.Name, "queue", qIdx, "oldNextDispatchR", oldNextDispatchR, "newNextDispatchR", queue.nextDispatchR, "incrR", incrR)
 			success = false
 		}
-		queue.requests.Walk(func(req *request) bool {
+		queue.requestsWaiting.Walk(func(req *request) bool {
 			oldArrivalR := req.arrivalR
 			req.arrivalR -= rDecrement
 			if req.arrivalR > oldArrivalR {
@@ -516,8 +534,8 @@ func (qs *queueSet) getVirtualTimeRatioLocked() float64 {
 	for _, queue := range qs.queues {
 		// here we want the sum of the maximum width of the requests in this queue since our
 		// goal is to find the maximum rate at which the queue could work.
-		seatsRequested += (queue.seatsInUse + queue.requests.QueueSum().MaxSeatsSum)
-		if queue.requests.Length() > 0 || queue.requestsExecuting > 0 {
+		seatsRequested += (queue.seatsInUse + queue.requestsWaiting.QueueSum().MaxSeatsSum)
+		if queue.requestsWaiting.Length() > 0 || queue.requestsExecuting.Len() > 0 {
 			activeQueues++
 		}
 	}
@@ -527,25 +545,19 @@ func (qs *queueSet) getVirtualTimeRatioLocked() float64 {
 	return math.Min(float64(seatsRequested), float64(qs.dCfg.ConcurrencyLimit)) / float64(activeQueues)
 }

-// timeoutOldRequestsAndRejectOrEnqueueLocked encapsulates the logic required
+// shuffleShardAndRejectOrEnqueueLocked encapsulates the logic required
 // to validate and enqueue a request for the queueSet/QueueSet:
 // 1) Start with shuffle sharding, to pick a queue.
-// 2) Reject old requests that have been waiting too long
-// 3) Reject current request if there is not enough concurrency shares and
+// 2) Reject current request if there is not enough concurrency shares and
 // we are at max queue length
-// 4) If not rejected, create a request and enqueue
+// 3) If not rejected, create a request and enqueue
 // returns the enqueud request on a successful enqueue
 // returns nil in the case that there is no available concurrency or
 // the queuelengthlimit has been reached
-func (qs *queueSet) timeoutOldRequestsAndRejectOrEnqueueLocked(ctx context.Context, workEstimate *fqrequest.WorkEstimate, hashValue uint64, flowDistinguisher, fsName string, descr1, descr2 interface{}, queueNoteFn fq.QueueNoteFn) *request {
+func (qs *queueSet) shuffleShardAndRejectOrEnqueueLocked(ctx context.Context, workEstimate *fqrequest.WorkEstimate, hashValue uint64, flowDistinguisher, fsName string, descr1, descr2 interface{}, queueNoteFn fq.QueueNoteFn) *request {
 	// Start with the shuffle sharding, to pick a queue.
 	queueIdx := qs.shuffleShardLocked(hashValue, descr1, descr2)
 	queue := qs.queues[queueIdx]
-	// The next step is the logic to reject requests that have been waiting too long
-	qs.removeTimedOutRequestsFromQueueToBoundLocked(queue, fsName)
-	// NOTE: currently timeout is only checked for each new request.  This means that there can be
-	// requests that are in the queue longer than the timeout if there are no new requests
-	// We prefer the simplicity over the promptness, at least for now.

 	defer qs.boundNextDispatchLocked(queue)

@@ -555,7 +567,7 @@ func (qs *queueSet) timeoutOldRequestsAndRejectOrEnqueueLocked(ctx context.Conte
 		fsName:            fsName,
 		flowDistinguisher: flowDistinguisher,
 		ctx:               ctx,
-		decision:          qs.promiseFactory(nil, ctx.Done(), decisionCancel),
+		decision:          qs.promiseFactory(nil, ctx, decisionCancel),
 		arrivalTime:       qs.clock.Now(),
 		arrivalR:          qs.currentR,
 		queue:             queue,
@@ -567,7 +579,7 @@ func (qs *queueSet) timeoutOldRequestsAndRejectOrEnqueueLocked(ctx context.Conte
 	if ok := qs.rejectOrEnqueueToBoundLocked(req); !ok {
 		return nil
 	}
-	metrics.ObserveQueueLength(ctx, qs.qCfg.Name, fsName, queue.requests.Length())
+	metrics.ObserveQueueLength(ctx, qs.qCfg.Name, fsName, queue.requestsWaiting.Length())
 	return req
 }

@@ -586,7 +598,7 @@ func (qs *queueSet) shuffleShardLocked(hashValue uint64, descr1, descr2 interfac
 	for i := 0; i < handSize; i++ {
 		queueIdx := hand[(offset+i)%handSize]
 		queue := qs.queues[queueIdx]
-		queueSum := queue.requests.QueueSum()
+		queueSum := queue.requestsWaiting.QueueSum()

 		// this is the total amount of work in seat-seconds for requests
 		// waiting in this queue, we will select the queue with the minimum.
@@ -599,55 +611,18 @@ func (qs *queueSet) shuffleShardLocked(hashValue uint64, descr1, descr2 interfac
 	}
 	if klogV := klog.V(6); klogV.Enabled() {
 		chosenQueue := qs.queues[bestQueueIdx]
-		klogV.Infof("QS(%s) at t=%s R=%v: For request %#+v %#+v chose queue %d, with sum: %#v & %d seats in use & nextDispatchR=%v", qs.qCfg.Name, qs.clock.Now().Format(nsTimeFmt), qs.currentR, descr1, descr2, bestQueueIdx, chosenQueue.requests.QueueSum(), chosenQueue.seatsInUse, chosenQueue.nextDispatchR)
+		klogV.Infof("QS(%s) at t=%s R=%v: For request %#+v %#+v chose queue %d, with sum: %#v & %d seats in use & nextDispatchR=%v", qs.qCfg.Name, qs.clock.Now().Format(nsTimeFmt), qs.currentR, descr1, descr2, bestQueueIdx, chosenQueue.requestsWaiting.QueueSum(), chosenQueue.seatsInUse, chosenQueue.nextDispatchR)
 	}
 	return bestQueueIdx
 }

-// removeTimedOutRequestsFromQueueToBoundLocked rejects old requests that have been enqueued
-// past the requestWaitLimit
-func (qs *queueSet) removeTimedOutRequestsFromQueueToBoundLocked(queue *queue, fsName string) {
-	timeoutCount := 0
-	disqueueSeats := 0
-	now := qs.clock.Now()
-	reqs := queue.requests
-	// reqs are sorted oldest -> newest
-	// can short circuit loop (break) if oldest requests are not timing out
-	// as newer requests also will not have timed out
-
-	// now - requestWaitLimit = arrivalLimit
-	arrivalLimit := now.Add(-qs.qCfg.RequestWaitLimit)
-	reqs.Walk(func(req *request) bool {
-		if arrivalLimit.After(req.arrivalTime) {
-			if req.decision.Set(decisionReject) && req.removeFromQueueLocked() != nil {
-				timeoutCount++
-				disqueueSeats += req.MaxSeats()
-				req.NoteQueued(false)
-				metrics.AddRequestsInQueues(req.ctx, qs.qCfg.Name, req.fsName, -1)
-			}
-			// we need to check if the next request has timed out.
-			return true
-		}
-		// since reqs are sorted oldest -> newest, we are done here.
-		return false
-	})
-
-	// remove timed out requests from queue
-	if timeoutCount > 0 {
-		qs.totRequestsWaiting -= timeoutCount
-		qs.totSeatsWaiting -= disqueueSeats
-		qs.reqsGaugePair.RequestsWaiting.Add(float64(-timeoutCount))
-		qs.seatDemandIntegrator.Set(float64(qs.totSeatsInUse + qs.totSeatsWaiting))
-	}
-}
-
 // rejectOrEnqueueToBoundLocked rejects or enqueues the newly arrived
 // request, which has been assigned to a queue.  If up against the
 // queue length limit and the concurrency limit then returns false.
 // Otherwise enqueues and returns true.
 func (qs *queueSet) rejectOrEnqueueToBoundLocked(request *request) bool {
 	queue := request.queue
-	curQueueLength := queue.requests.Length()
+	curQueueLength := queue.requestsWaiting.Length()
 	// rejects the newly arrived request if resource criteria not met
 	if qs.totSeatsInUse >= qs.dCfg.ConcurrencyLimit &&
 		curQueueLength >= qs.qCfg.QueueLengthLimit {
@@ -662,7 +637,7 @@ func (qs *queueSet) rejectOrEnqueueToBoundLocked(request *request) bool {
 func (qs *queueSet) enqueueToBoundLocked(request *request) {
 	queue := request.queue
 	now := qs.clock.Now()
-	if queue.requests.Length() == 0 && queue.requestsExecuting == 0 {
+	if queue.requestsWaiting.Length() == 0 && queue.requestsExecuting.Len() == 0 {
 		// the queue’s start R is set to the virtual time.
 		queue.nextDispatchR = qs.currentR
 		klogV := klog.V(6)
@@ -670,10 +645,11 @@ func (qs *queueSet) enqueueToBoundLocked(request *request) {
 			klogV.Infof("QS(%s) at t=%s R=%v: initialized queue %d start R due to request %#+v %#+v", qs.qCfg.Name, now.Format(nsTimeFmt), queue.nextDispatchR, queue.index, request.descr1, request.descr2)
 		}
 	}
-	request.removeFromQueueLocked = queue.requests.Enqueue(request)
+	request.removeFromQueueLocked = queue.requestsWaiting.Enqueue(request)
 	qs.totRequestsWaiting++
 	qs.totSeatsWaiting += request.MaxSeats()
 	metrics.AddRequestsInQueues(request.ctx, qs.qCfg.Name, request.fsName, 1)
+	metrics.AddSeatsInQueues(request.ctx, qs.qCfg.Name, request.fsName, request.MaxSeats())
 	request.NoteQueued(true)
 	qs.reqsGaugePair.RequestsWaiting.Add(1)
 	qs.seatDemandIntegrator.Set(float64(qs.totSeatsInUse + qs.totSeatsWaiting))
@@ -694,7 +670,7 @@ func (qs *queueSet) dispatchSansQueueLocked(ctx context.Context, workEstimate *f
 		flowDistinguisher: flowDistinguisher,
 		ctx:               ctx,
 		startTime:         now,
-		decision:          qs.promiseFactory(decisionExecute, ctx.Done(), decisionCancel),
+		decision:          qs.promiseFactory(decisionExecute, ctx, decisionCancel),
 		arrivalTime:       now,
 		arrivalR:          qs.currentR,
 		descr1:            descr1,
@@ -703,8 +679,9 @@ func (qs *queueSet) dispatchSansQueueLocked(ctx context.Context, workEstimate *f
 	}
 	qs.totRequestsExecuting++
 	qs.totSeatsInUse += req.MaxSeats()
+	qs.requestsExecutingSet = qs.requestsExecutingSet.Insert(req)
 	metrics.AddRequestsExecuting(ctx, qs.qCfg.Name, fsName, 1)
-	metrics.AddRequestConcurrencyInUse(qs.qCfg.Name, fsName, req.MaxSeats())
+	metrics.AddSeatConcurrencyInUse(qs.qCfg.Name, fsName, req.MaxSeats())
 	qs.reqsGaugePair.RequestsExecuting.Add(1)
 	qs.execSeatsGauge.Add(float64(req.MaxSeats()))
 	qs.seatDemandIntegrator.Set(float64(qs.totSeatsInUse + qs.totSeatsWaiting))
@@ -731,6 +708,7 @@ func (qs *queueSet) dispatchLocked() bool {
 	qs.totRequestsWaiting--
 	qs.totSeatsWaiting -= request.MaxSeats()
 	metrics.AddRequestsInQueues(request.ctx, qs.qCfg.Name, request.fsName, -1)
+	metrics.AddSeatsInQueues(request.ctx, qs.qCfg.Name, request.fsName, -request.MaxSeats())
 	request.NoteQueued(false)
 	qs.reqsGaugePair.RequestsWaiting.Add(-1)
 	defer qs.boundNextDispatchLocked(queue)
@@ -746,10 +724,10 @@ func (qs *queueSet) dispatchLocked() bool {
 	// problem because other overhead is also included.
 	qs.totRequestsExecuting++
 	qs.totSeatsInUse += request.MaxSeats()
-	queue.requestsExecuting++
+	queue.requestsExecuting = queue.requestsExecuting.Insert(request)
 	queue.seatsInUse += request.MaxSeats()
 	metrics.AddRequestsExecuting(request.ctx, qs.qCfg.Name, request.fsName, 1)
-	metrics.AddRequestConcurrencyInUse(qs.qCfg.Name, request.fsName, request.MaxSeats())
+	metrics.AddSeatConcurrencyInUse(qs.qCfg.Name, request.fsName, request.MaxSeats())
 	qs.reqsGaugePair.RequestsExecuting.Add(1)
 	qs.execSeatsGauge.Add(float64(request.MaxSeats()))
 	qs.seatDemandIntegrator.Set(float64(qs.totSeatsInUse + qs.totSeatsWaiting))
@@ -757,7 +735,7 @@ func (qs *queueSet) dispatchLocked() bool {
 	if klogV.Enabled() {
 		klogV.Infof("QS(%s) at t=%s R=%v: dispatching request %#+v %#+v work %v from queue %d with start R %v, queue will have %d waiting & %d requests occupying %d seats, set will have %d seats occupied",
 			qs.qCfg.Name, request.startTime.Format(nsTimeFmt), qs.currentR, request.descr1, request.descr2,
-			request.workEstimate, queue.index, queue.nextDispatchR, queue.requests.Length(), queue.requestsExecuting, queue.seatsInUse, qs.totSeatsInUse)
+			request.workEstimate, queue.index, queue.nextDispatchR, queue.requestsWaiting.Length(), queue.requestsExecuting.Len(), queue.seatsInUse, qs.totSeatsInUse)
 	}
 	// When a request is dequeued for service -> qs.virtualStart += G * width
 	if request.totalWork() > rDecrement/100 { // A single increment should never be so big
@@ -774,6 +752,9 @@ func (qs *queueSet) dispatchLocked() bool {
 // otherwise it returns false.
 func (qs *queueSet) canAccommodateSeatsLocked(seats int) bool {
 	switch {
+	case qs.qCfg.DesiredNumQueues < 0:
+		// This is code for exemption from limitation
+		return true
 	case seats > qs.dCfg.ConcurrencyLimit:
 		// we have picked the queue with the minimum virtual finish time, but
 		// the number of seats this request asks for exceeds the concurrency limit.
@@ -809,7 +790,7 @@ func (qs *queueSet) findDispatchQueueToBoundLocked() (*queue, *request) {
 	for range qs.queues {
 		qs.robinIndex = (qs.robinIndex + 1) % nq
 		queue := qs.queues[qs.robinIndex]
-		oldestWaiting, _ := queue.requests.Peek()
+		oldestWaiting, _ := queue.requestsWaiting.Peek()
 		if oldestWaiting != nil {
 			sMin = ssMin(sMin, queue.nextDispatchR)
 			sMax = ssMax(sMax, queue.nextDispatchR)
@@ -826,7 +807,7 @@ func (qs *queueSet) findDispatchQueueToBoundLocked() (*queue, *request) {
 		}
 	}

-	oldestReqFromMinQueue, _ := minQueue.requests.Peek()
+	oldestReqFromMinQueue, _ := minQueue.requestsWaiting.Peek()
 	if oldestReqFromMinQueue == nil {
 		// This cannot happen
 		klog.ErrorS(errors.New("selected queue is empty"), "Impossible", "queueSet", qs.qCfg.Name)
@@ -913,7 +894,7 @@ func (qs *queueSet) finishRequestLocked(r *request) {
 		defer qs.removeQueueIfEmptyLocked(r)

 		qs.totSeatsInUse -= r.MaxSeats()
-		metrics.AddRequestConcurrencyInUse(qs.qCfg.Name, r.fsName, -r.MaxSeats())
+		metrics.AddSeatConcurrencyInUse(qs.qCfg.Name, r.fsName, -r.MaxSeats())
 		qs.execSeatsGauge.Add(-float64(r.MaxSeats()))
 		qs.seatDemandIntegrator.Set(float64(qs.totSeatsInUse + qs.totSeatsWaiting))
 		if r.queue != nil {
@@ -930,7 +911,7 @@ func (qs *queueSet) finishRequestLocked(r *request) {
 			} else if r.queue != nil {
 				klogV.Infof("QS(%s) at t=%s R=%v: request %#+v %#+v finished all use of %d seats, adjusted queue %d start R to %v due to service time %.9fs, queue will have %d requests with %#v waiting & %d requests occupying %d seats",
 					qs.qCfg.Name, now.Format(nsTimeFmt), qs.currentR, r.descr1, r.descr2, r.workEstimate.MaxSeats(), r.queue.index,
-					r.queue.nextDispatchR, actualServiceDuration.Seconds(), r.queue.requests.Length(), r.queue.requests.QueueSum(), r.queue.requestsExecuting, r.queue.seatsInUse)
+					r.queue.nextDispatchR, actualServiceDuration.Seconds(), r.queue.requestsWaiting.Length(), r.queue.requestsWaiting.QueueSum(), r.queue.requestsExecuting.Len(), r.queue.seatsInUse)
 			} else {
 				klogV.Infof("QS(%s) at t=%s R=%v: request %#+v %#+v finished all use of %d seats, qs will have %d requests occupying %d seats", qs.qCfg.Name, now.Format(nsTimeFmt), qs.currentR, r.descr1, r.descr2, r.workEstimate.InitialSeats, qs.totRequestsExecuting, qs.totSeatsInUse)
 			}
@@ -942,7 +923,7 @@ func (qs *queueSet) finishRequestLocked(r *request) {
 		} else if r.queue != nil {
 			klogV.Infof("QS(%s) at t=%s R=%v: request %#+v %#+v finished main use of %d seats but lingering on %d seats for %v seconds, adjusted queue %d start R to %v due to service time %.9fs, queue will have %d requests with %#v waiting & %d requests occupying %d seats",
 				qs.qCfg.Name, now.Format(nsTimeFmt), qs.currentR, r.descr1, r.descr2, r.workEstimate.InitialSeats, r.workEstimate.FinalSeats, additionalLatency.Seconds(), r.queue.index,
-				r.queue.nextDispatchR, actualServiceDuration.Seconds(), r.queue.requests.Length(), r.queue.requests.QueueSum(), r.queue.requestsExecuting, r.queue.seatsInUse)
+				r.queue.nextDispatchR, actualServiceDuration.Seconds(), r.queue.requestsWaiting.Length(), r.queue.requestsWaiting.QueueSum(), r.queue.requestsExecuting.Len(), r.queue.seatsInUse)
 		} else {
 			klogV.Infof("QS(%s) at t=%s R=%v: request %#+v %#+v finished main use of %d seats but lingering on %d seats for %v seconds, qs will have %d requests occupying %d seats", qs.qCfg.Name, now.Format(nsTimeFmt), qs.currentR, r.descr1, r.descr2, r.workEstimate.InitialSeats, r.workEstimate.FinalSeats, additionalLatency.Seconds(), qs.totRequestsExecuting, qs.totSeatsInUse)
 		}
@@ -959,7 +940,7 @@ func (qs *queueSet) finishRequestLocked(r *request) {
 			} else if r.queue != nil {
 				klogV.Infof("QS(%s) at t=%s R=%v: request %#+v %#+v finished lingering on %d seats, queue %d will have %d requests with %#v waiting & %d requests occupying %d seats",
 					qs.qCfg.Name, now.Format(nsTimeFmt), qs.currentR, r.descr1, r.descr2, r.workEstimate.FinalSeats, r.queue.index,
-					r.queue.requests.Length(), r.queue.requests.QueueSum(), r.queue.requestsExecuting, r.queue.seatsInUse)
+					r.queue.requestsWaiting.Length(), r.queue.requestsWaiting.QueueSum(), r.queue.requestsExecuting.Len(), r.queue.seatsInUse)
 			} else {
 				klogV.Infof("QS(%s) at t=%s R=%v: request %#+v %#+v finished lingering on %d seats, qs will have %d requests occupying %d seats", qs.qCfg.Name, now.Format(nsTimeFmt), qs.currentR, r.descr1, r.descr2, r.workEstimate.FinalSeats, qs.totRequestsExecuting, qs.totSeatsInUse)
 			}
@@ -969,12 +950,14 @@ func (qs *queueSet) finishRequestLocked(r *request) {

 	if r.queue != nil {
 		// request has finished, remove from requests executing
-		r.queue.requestsExecuting--
+		r.queue.requestsExecuting = r.queue.requestsExecuting.Delete(r)

 		// When a request finishes being served, and the actual service time was S,
 		// the queue’s start R is decremented by (G - S)*width.
 		r.queue.nextDispatchR -= fqrequest.SeatsTimesDuration(float64(r.InitialSeats()), qs.estimatedServiceDuration-actualServiceDuration)
 		qs.boundNextDispatchLocked(r.queue)
+	} else {
+		qs.requestsExecutingSet = qs.requestsExecutingSet.Delete(r)
 	}
 }

@@ -986,7 +969,7 @@ func (qs *queueSet) finishRequestLocked(r *request) {
 // The following hack addresses the first side of that inequity,
 // by insisting that dispatch in the virtual world not precede arrival.
 func (qs *queueSet) boundNextDispatchLocked(queue *queue) {
-	oldestReqFromMinQueue, _ := queue.requests.Peek()
+	oldestReqFromMinQueue, _ := queue.requestsWaiting.Peek()
 	if oldestReqFromMinQueue == nil {
 		return
 	}
@@ -1007,8 +990,8 @@ func (qs *queueSet) removeQueueIfEmptyLocked(r *request) {
 	// If there are more queues than desired and this one has no
 	// requests then remove it
 	if len(qs.queues) > qs.qCfg.DesiredNumQueues &&
-		r.queue.requests.Length() == 0 &&
-		r.queue.requestsExecuting == 0 {
+		r.queue.requestsWaiting.Length() == 0 &&
+		r.queue.requestsExecuting.Len() == 0 {
 		qs.queues = removeQueueAndUpdateIndexes(qs.queues, r.queue.index)

 		// decrement here to maintain the invariant that (qs.robinIndex+1) % numQueues
@@ -1033,14 +1016,33 @@ func (qs *queueSet) Dump(includeRequestDetails bool) debug.QueueSetDump {
 	qs.lock.Lock()
 	defer qs.lock.Unlock()
 	d := debug.QueueSetDump{
-		Queues:       make([]debug.QueueDump, len(qs.queues)),
-		Waiting:      qs.totRequestsWaiting,
-		Executing:    qs.totRequestsExecuting,
-		SeatsInUse:   qs.totSeatsInUse,
-		SeatsWaiting: qs.totSeatsWaiting,
+		Queues:                     make([]debug.QueueDump, len(qs.queues)),
+		QueuelessExecutingRequests: SetMapReduce(dumpRequest(includeRequestDetails), append1[debug.RequestDump])(qs.requestsExecutingSet),
+		Waiting:                    qs.totRequestsWaiting,
+		Executing:                  qs.totRequestsExecuting,
+		SeatsInUse:                 qs.totSeatsInUse,
+		SeatsWaiting:               qs.totSeatsWaiting,
+		Dispatched:                 qs.totRequestsDispatched,
+		Rejected:                   qs.totRequestsRejected,
+		Timedout:                   qs.totRequestsTimedout,
+		Cancelled:                  qs.totRequestsCancelled,
 	}
 	for i, q := range qs.queues {
 		d.Queues[i] = q.dumpLocked(includeRequestDetails)
 	}
 	return d
 }
+
+func OnRequestDispatched(r fq.Request) {
+	req, ok := r.(*request)
+	if !ok {
+		return
+	}
+
+	qs := req.qs
+	if qs != nil {
+		qs.lock.Lock()
+		defer qs.lock.Unlock()
+		qs.totRequestsDispatched++
+	}
+}
--- a/vendor/k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing/queueset/types.go
+++ b/vendor/k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing/queueset/types.go
@@ -20,6 +20,7 @@ import (
 	"context"
 	"time"

+	"k8s.io/apimachinery/pkg/util/sets"
 	genericrequest "k8s.io/apiserver/pkg/endpoints/request"
 	"k8s.io/apiserver/pkg/util/flowcontrol/debug"
 	fq "k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing"
@@ -90,15 +91,15 @@ type completedWorkEstimate struct {
 // queue is a sequence of requests that have arrived but not yet finished
 // execution in both the real and virtual worlds.
 type queue struct {
-	// The requests not yet executing in the real world are stored in a FIFO list.
-	requests fifo
+	// The requestsWaiting not yet executing in the real world are stored in a FIFO list.
+	requestsWaiting fifo

 	// nextDispatchR is the R progress meter reading at
 	// which the next request will be dispatched in the virtual world.
 	nextDispatchR fcrequest.SeatSeconds

-	// requestsExecuting is the count in the real world.
-	requestsExecuting int
+	// requestsExecuting is the set of requests executing in the real world.
+	requestsExecuting sets.Set[*request]

 	// index is the position of this queue among those in its queueSet.
 	index int
@@ -145,28 +146,14 @@ func (qs *queueSet) computeFinalWork(we *fcrequest.WorkEstimate) fcrequest.SeatS
 }

 func (q *queue) dumpLocked(includeDetails bool) debug.QueueDump {
-	digest := make([]debug.RequestDump, q.requests.Length())
-	i := 0
-	q.requests.Walk(func(r *request) bool {
-		// dump requests.
-		digest[i].MatchedFlowSchema = r.fsName
-		digest[i].FlowDistinguisher = r.flowDistinguisher
-		digest[i].ArriveTime = r.arrivalTime
-		digest[i].StartTime = r.startTime
-		digest[i].WorkEstimate = r.workEstimate.WorkEstimate
-		if includeDetails {
-			userInfo, _ := genericrequest.UserFrom(r.ctx)
-			digest[i].UserName = userInfo.GetName()
-			requestInfo, ok := genericrequest.RequestInfoFrom(r.ctx)
-			if ok {
-				digest[i].RequestInfo = *requestInfo
-			}
-		}
-		i++
+	waitingDigest := make([]debug.RequestDump, 0, q.requestsWaiting.Length())
+	q.requestsWaiting.Walk(func(r *request) bool {
+		waitingDigest = append(waitingDigest, dumpRequest(includeDetails)(r))
 		return true
 	})
+	executingDigest := SetMapReduce(dumpRequest(includeDetails), append1[debug.RequestDump])(q.requestsExecuting)

-	sum := q.requests.QueueSum()
+	sum := q.requestsWaiting.QueueSum()
 	queueSum := debug.QueueSum{
 		InitialSeatsSum: sum.InitialSeatsSum,
 		MaxSeatsSum:     sum.MaxSeatsSum,
@@ -175,9 +162,57 @@ func (q *queue) dumpLocked(includeDetails bool) debug.QueueDump {

 	return debug.QueueDump{
 		NextDispatchR:     q.nextDispatchR.String(),
-		Requests:          digest,
-		ExecutingRequests: q.requestsExecuting,
+		Requests:          waitingDigest,
+		RequestsExecuting: executingDigest,
+		ExecutingRequests: q.requestsExecuting.Len(),
 		SeatsInUse:        q.seatsInUse,
 		QueueSum:          queueSum,
 	}
 }
+
+func dumpRequest(includeDetails bool) func(*request) debug.RequestDump {
+	return func(r *request) debug.RequestDump {
+		ans := debug.RequestDump{
+			MatchedFlowSchema: r.fsName,
+			FlowDistinguisher: r.flowDistinguisher,
+			ArriveTime:        r.arrivalTime,
+			StartTime:         r.startTime,
+			WorkEstimate:      r.workEstimate.WorkEstimate,
+		}
+		if includeDetails {
+			userInfo, _ := genericrequest.UserFrom(r.ctx)
+			ans.UserName = userInfo.GetName()
+			requestInfo, ok := genericrequest.RequestInfoFrom(r.ctx)
+			if ok {
+				ans.RequestInfo = *requestInfo
+			}
+		}
+		return ans
+	}
+}
+
+// SetMapReduce is map-reduce starting from a set type in the sets package.
+func SetMapReduce[Elt comparable, Result, Accumulator any](mapFn func(Elt) Result, reduceFn func(Accumulator, Result) Accumulator) func(map[Elt]sets.Empty) Accumulator {
+	return func(set map[Elt]sets.Empty) Accumulator {
+		var ans Accumulator
+		for elt := range set {
+			ans = reduceFn(ans, mapFn(elt))
+		}
+		return ans
+	}
+}
+
+// SliceMapReduce is map-reduce starting from a slice.
+func SliceMapReduce[Elt, Result, Accumulator any](mapFn func(Elt) Result, reduceFn func(Accumulator, Result) Accumulator) func([]Elt) Accumulator {
+	return func(slice []Elt) Accumulator {
+		var ans Accumulator
+		for _, elt := range slice {
+			ans = reduceFn(ans, mapFn(elt))
+		}
+		return ans
+	}
+}
+
+func or(x, y bool) bool { return x || y }
+
+func append1[Elt any](slice []Elt, next Elt) []Elt { return append(slice, next) }