From 4f5efe287125f887558f8dc7e93622bbba5e9f6f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 30 Oct 2025 17:56:13 +0000 Subject: [PATCH] Fix zombie SSH processes with --sftp-ssh by ensuring Wait() is called only once The issue was that cmd.Wait() was being called multiple times on the same process - once in the background goroutine and once in Close(). This could lead to zombie processes because only the first call to Wait() properly reaps the process. The fix uses sync.Once to ensure Wait() is only called once per SSH process, storing and returning the result on subsequent calls. Added tests to verify the fix works correctly. Co-authored-by: ncw <536803+ncw@users.noreply.github.com> --- backend/sftp/ssh_external.go | 28 +++++++---- backend/sftp/ssh_external_test.go | 84 +++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 10 deletions(-) create mode 100644 backend/sftp/ssh_external_test.go diff --git a/backend/sftp/ssh_external.go b/backend/sftp/ssh_external.go index a42cdd7cc..0193ed581 100644 --- a/backend/sftp/ssh_external.go +++ b/backend/sftp/ssh_external.go @@ -10,6 +10,7 @@ import ( "os/exec" "slices" "strings" + "sync" "time" "github.com/rclone/rclone/fs" @@ -76,6 +77,8 @@ type sshSessionExternal struct { cancel func() startCalled bool runningSFTP bool + waitOnce sync.Once // ensure Wait() is only called once + waitErr error // result of the Wait() call } func (f *Fs) newSSHSessionExternal() *sshSessionExternal { @@ -175,16 +178,21 @@ func (s *sshSessionExternal) exited() bool { // Wait for the command to exit func (s *sshSessionExternal) Wait() error { - if s.exited() { - return nil - } - err := s.cmd.Wait() - if err == nil { - fs.Debugf(s.f, "ssh external: command exited OK") - } else { - fs.Debugf(s.f, "ssh external: command exited with error: %v", err) - } - return err + // Use sync.Once to ensure we only wait for the process once + // This prevents zombie processes that occur when Wait() is called multiple times + s.waitOnce.Do(func() { + if s.exited() { + s.waitErr = nil + return + } + s.waitErr = s.cmd.Wait() + if s.waitErr == nil { + fs.Debugf(s.f, "ssh external: command exited OK") + } else { + fs.Debugf(s.f, "ssh external: command exited with error: %v", s.waitErr) + } + }) + return s.waitErr } // Run runs cmd on the remote host. Typically, the remote diff --git a/backend/sftp/ssh_external_test.go b/backend/sftp/ssh_external_test.go new file mode 100644 index 000000000..b2a51f904 --- /dev/null +++ b/backend/sftp/ssh_external_test.go @@ -0,0 +1,84 @@ +//go:build !plan9 + +package sftp + +import ( + "testing" + "time" + + "github.com/rclone/rclone/fs" + "github.com/stretchr/testify/assert" +) + +// TestSSHExternalWaitMultipleCalls verifies that calling Wait() multiple times +// doesn't cause zombie processes +func TestSSHExternalWaitMultipleCalls(t *testing.T) { + // Create a minimal Fs object for testing + opt := &Options{ + SSH: fs.SpaceSepList{"echo", "test"}, + } + + f := &Fs{ + opt: *opt, + } + + // Create a new SSH session + session := f.newSSHSessionExternal() + + // Start a simple command that exits quickly + err := session.Start("exit 0") + assert.NoError(t, err) + + // Give the command time to complete + time.Sleep(100 * time.Millisecond) + + // Call Wait() multiple times - this should not cause issues + err1 := session.Wait() + err2 := session.Wait() + err3 := session.Wait() + + // All calls should return the same result (no error in this case) + assert.NoError(t, err1) + assert.NoError(t, err2) + assert.NoError(t, err3) + + // Verify the process has exited + assert.True(t, session.exited()) +} + +// TestSSHExternalCloseMultipleCalls verifies that calling Close() multiple times +// followed by Wait() calls doesn't cause zombie processes +func TestSSHExternalCloseMultipleCalls(t *testing.T) { + // Create a minimal Fs object for testing + opt := &Options{ + SSH: fs.SpaceSepList{"sleep", "10"}, + } + + f := &Fs{ + opt: *opt, + } + + // Create a new SSH session + session := f.newSSHSessionExternal() + + // Start a long-running command + err := session.Start("sleep 10") + if err != nil { + t.Skip("Cannot start sleep command:", err) + } + + // Close should cancel and wait for the process + _ = session.Close() + + // Additional Wait() calls should return the same error + err2 := session.Wait() + err3 := session.Wait() + + // All should complete without panicking + // err1 could be nil or an error depending on how the process was killed + // err2 and err3 should be the same + assert.Equal(t, err2, err3, "Subsequent Wait() calls should return same result") + + // Verify the process has exited + assert.True(t, session.exited()) +}