Skip to content

Commit ef83685

Browse files
gloursclaude
authored andcommitted
fix: don't cache transient errors in version negotiation
Replace sync.Once with sync.Mutex so that only successful version lookups are cached. Errors (context cancellation, network blips) are returned without caching, allowing subsequent calls to retry. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Signed-off-by: Guillaume Lours <glours@users.noreply.github.com>
1 parent c7d1a60 commit ef83685

2 files changed

Lines changed: 107 additions & 29 deletions

File tree

pkg/compose/compose.go

Lines changed: 42 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -494,49 +494,62 @@ func (s *composeService) isSwarmEnabled(ctx context.Context) (bool, error) {
494494
return swarmEnabled.val, swarmEnabled.err
495495
}
496496

497+
// runtimeVersionCache caches a version string after a successful lookup.
498+
// Errors (including context cancellation) are not cached so that
499+
// subsequent calls can retry with a fresh context.
497500
type runtimeVersionCache struct {
498-
once sync.Once
499-
val string
500-
err error
501+
mu sync.Mutex
502+
val string
501503
}
502504

503505
// RuntimeVersion returns the raw API version reported by the daemon.
504506
// Callers that need the negotiated/effective client API version should use
505507
// CurrentAPIVersion instead.
506508
func (s *composeService) RuntimeVersion(ctx context.Context) (string, error) {
507-
s.runtimeVersion.once.Do(func() {
508-
version, err := s.apiClient().ServerVersion(ctx, client.ServerVersionOptions{})
509-
if err != nil {
510-
s.runtimeVersion.err = err
511-
return
512-
}
513-
s.runtimeVersion.val = version.APIVersion
514-
})
515-
return s.runtimeVersion.val, s.runtimeVersion.err
509+
s.runtimeVersion.mu.Lock()
510+
defer s.runtimeVersion.mu.Unlock()
511+
if s.runtimeVersion.val != "" {
512+
return s.runtimeVersion.val, nil
513+
}
514+
version, err := s.apiClient().ServerVersion(ctx, client.ServerVersionOptions{})
515+
if err != nil {
516+
return "", err
517+
}
518+
s.runtimeVersion.val = version.APIVersion
519+
return s.runtimeVersion.val, nil
516520
}
517521

518522
// CurrentAPIVersion returns the API version currently used by the Docker client.
519523
// Trigger negotiation first so version-gated request shaping matches the version
520524
// that subsequent API calls will actually use.
525+
//
526+
// Lock ordering: currentAPIVersion.mu must be acquired before runtimeVersion.mu
527+
// (via the RuntimeVersion fallback). No code path should reverse this order.
521528
func (s *composeService) CurrentAPIVersion(ctx context.Context) (string, error) {
522-
s.currentAPIVersion.once.Do(func() {
523-
cli := s.apiClient()
524-
_, err := cli.Ping(ctx, client.PingOptions{NegotiateAPIVersion: true})
525-
if err != nil {
526-
s.currentAPIVersion.err = err
527-
return
528-
}
529+
s.currentAPIVersion.mu.Lock()
530+
defer s.currentAPIVersion.mu.Unlock()
531+
if s.currentAPIVersion.val != "" {
532+
return s.currentAPIVersion.val, nil
533+
}
529534

530-
version := cli.ClientVersion()
531-
if version != "" {
532-
s.currentAPIVersion.val = version
533-
return
534-
}
535+
cli := s.apiClient()
536+
_, err := cli.Ping(ctx, client.PingOptions{NegotiateAPIVersion: true})
537+
if err != nil {
538+
return "", err
539+
}
535540

536-
// Defensive fallback for unexpected client implementations or mocks that
537-
// do not populate ClientVersion after a successful negotiated ping.
538-
s.currentAPIVersion.val, s.currentAPIVersion.err = s.RuntimeVersion(ctx)
539-
})
541+
version := cli.ClientVersion()
542+
if version != "" {
543+
s.currentAPIVersion.val = version
544+
return s.currentAPIVersion.val, nil
545+
}
540546

541-
return s.currentAPIVersion.val, s.currentAPIVersion.err
547+
// Defensive fallback for unexpected client implementations or mocks that
548+
// do not populate ClientVersion after a successful negotiated ping.
549+
val, err := s.RuntimeVersion(ctx)
550+
if err != nil {
551+
return "", err
552+
}
553+
s.currentAPIVersion.val = val
554+
return s.currentAPIVersion.val, nil
542555
}

pkg/compose/convergence_test.go

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -521,3 +521,68 @@ func TestCurrentAPIVersionCachesNegotiation(t *testing.T) {
521521
assert.NilError(t, err)
522522
assert.Equal(t, version, "1.43")
523523
}
524+
525+
func TestRuntimeVersionRetriesOnTransientError(t *testing.T) {
526+
mockCtrl := gomock.NewController(t)
527+
defer mockCtrl.Finish()
528+
529+
apiClient := mocks.NewMockAPIClient(mockCtrl)
530+
cli := mocks.NewMockCli(mockCtrl)
531+
tested := &composeService{dockerCli: cli}
532+
533+
cli.EXPECT().Client().Return(apiClient).AnyTimes()
534+
535+
// First call: ServerVersion fails with a transient error
536+
firstCall := apiClient.EXPECT().ServerVersion(gomock.Any(), gomock.Any()).
537+
Return(client.ServerVersionResult{}, context.DeadlineExceeded).Times(1)
538+
539+
// Second call: succeeds
540+
apiClient.EXPECT().ServerVersion(gomock.Any(), gomock.Any()).
541+
Return(client.ServerVersionResult{APIVersion: "1.48"}, nil).Times(1).After(firstCall)
542+
543+
_, err := tested.RuntimeVersion(t.Context())
544+
assert.ErrorIs(t, err, context.DeadlineExceeded)
545+
546+
version, err := tested.RuntimeVersion(t.Context())
547+
assert.NilError(t, err)
548+
assert.Equal(t, version, "1.48")
549+
550+
// Third call returns cached value
551+
version, err = tested.RuntimeVersion(t.Context())
552+
assert.NilError(t, err)
553+
assert.Equal(t, version, "1.48")
554+
}
555+
556+
func TestCurrentAPIVersionRetriesOnTransientError(t *testing.T) {
557+
mockCtrl := gomock.NewController(t)
558+
defer mockCtrl.Finish()
559+
560+
apiClient := mocks.NewMockAPIClient(mockCtrl)
561+
cli := mocks.NewMockCli(mockCtrl)
562+
tested := &composeService{dockerCli: cli}
563+
564+
cli.EXPECT().Client().Return(apiClient).AnyTimes()
565+
566+
// First call: Ping fails with a transient error
567+
firstCall := apiClient.EXPECT().Ping(gomock.Any(), client.PingOptions{NegotiateAPIVersion: true}).
568+
Return(client.PingResult{}, context.DeadlineExceeded).Times(1)
569+
570+
// Second call: Ping succeeds after the transient failure
571+
apiClient.EXPECT().Ping(gomock.Any(), client.PingOptions{NegotiateAPIVersion: true}).
572+
Return(client.PingResult{APIVersion: "1.44"}, nil).Times(1).After(firstCall)
573+
apiClient.EXPECT().ClientVersion().Return("1.44").Times(1)
574+
575+
// First call should return the transient error
576+
_, err := tested.CurrentAPIVersion(t.Context())
577+
assert.ErrorIs(t, err, context.DeadlineExceeded)
578+
579+
// Second call should succeed — error was not cached
580+
version, err := tested.CurrentAPIVersion(t.Context())
581+
assert.NilError(t, err)
582+
assert.Equal(t, version, "1.44")
583+
584+
// Third call should return the cached value without calling Ping again
585+
version, err = tested.CurrentAPIVersion(t.Context())
586+
assert.NilError(t, err)
587+
assert.Equal(t, version, "1.44")
588+
}

0 commit comments

Comments
 (0)