From 3a902dd1a04cb9ba2f15f68d6201162eca40d9cb Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Tue, 3 Feb 2026 16:40:54 +0000 Subject: [PATCH] pacer: fix deadlock between pacer token and --max-connections It was possible in the presence of --max-connections and recursive calls to the pacer to deadlock it leaving all connections waiting on either a max connection token or a pacer token. This fixes the problem by making sure we return the pacer token on schedule if we take it. This also short circuits the pacer token if sleepTime is 0. --- lib/pacer/pacer.go | 24 +++++++++++++++--------- lib/pacer/pacer_test.go | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 9 deletions(-) diff --git a/lib/pacer/pacer.go b/lib/pacer/pacer.go index 357ff7d49..fce47bc1f 100644 --- a/lib/pacer/pacer.go +++ b/lib/pacer/pacer.go @@ -159,18 +159,24 @@ func (p *Pacer) beginCall(limitConnections bool) { // XXX ms later we put another in. We could do this with a // Ticker more accurately, but then we'd have to work out how // not to run it when it wasn't needed - <-p.pacer + + p.mu.Lock() + sleepTime := p.state.SleepTime + p.mu.Unlock() + + if sleepTime > 0 { + <-p.pacer + + // Restart the timer + go func(t time.Duration) { + time.Sleep(t) + p.pacer <- struct{}{} + }(sleepTime) + } + if limitConnections { <-p.connTokens } - - p.mu.Lock() - // Restart the timer - go func(t time.Duration) { - time.Sleep(t) - p.pacer <- struct{}{} - }(p.state.SleepTime) - p.mu.Unlock() } // endCall implements the pacing algorithm diff --git a/lib/pacer/pacer_test.go b/lib/pacer/pacer_test.go index 5c5bbd298..e34a406aa 100644 --- a/lib/pacer/pacer_test.go +++ b/lib/pacer/pacer_test.go @@ -367,6 +367,43 @@ func TestCallMaxConnectionsRecursiveDeadlock(t *testing.T) { assert.Equal(t, errFoo, err) } +func TestCallMaxConnectionsRecursiveDeadlock2(t *testing.T) { + p := New(CalculatorOption(NewDefault(MinSleep(1*time.Millisecond), MaxSleep(2*time.Millisecond)))) + p.SetMaxConnections(1) + dp := &dummyPaced{retry: false} + wg := new(sync.WaitGroup) + + // Normal + for range 100 { + wg.Add(1) + go func() { + defer wg.Done() + err := p.Call(func() (bool, error) { + // check we have taken the connection token + assert.Equal(t, 0, len(p.connTokens)) + return false, nil + }) + assert.NoError(t, err) + }() + + // Now attempt a recursive call + wg.Add(1) + go func() { + defer wg.Done() + err := p.Call(func() (bool, error) { + // check we have taken the connection token + assert.Equal(t, 0, len(p.connTokens)) + // Do recursive call + return false, p.Call(dp.fn) + }) + assert.Equal(t, errFoo, err) + }() + } + + // Tidy up + wg.Wait() +} + func TestRetryAfterError_NonNilErr(t *testing.T) { orig := errors.New("test failure") dur := 2 * time.Second