1
0
mirror of https://github.com/rclone/rclone.git synced 2025-12-06 00:03:32 +00:00

Compare commits

...

4 Commits

Author SHA1 Message Date
copilot-swe-agent[bot]
46bc876085 Fix zombie SSH processes by storing the first session in sshClientExternal
The real issue was that sshClientExternal.session was never assigned,
so Wait() always returned nil without waiting for the SSH process to exit.
This caused zombie processes because the process was never reaped.

The fix:
- Store the first session created in NewSession() to s.session
- This allows Wait() to actually wait for the SSH process
- The sync.Once pattern is still useful for thread-safety
- Updated comments to reflect the correct behavior

Fixes the zombie process issue reported in rclone/rclone#8929

Co-authored-by: ncw <536803+ncw@users.noreply.github.com>
2025-10-31 11:54:04 +00:00
copilot-swe-agent[bot]
a58efc1544 Simplify Wait() implementation based on code review feedback
Remove unnecessary exited() check - we should always call cmd.Wait()
in the sync.Once block to properly reap the process and capture its
exit status. The sync.Once ensures it's only called once.

Co-authored-by: ncw <536803+ncw@users.noreply.github.com>
2025-10-30 18:03:48 +00:00
copilot-swe-agent[bot]
4f5efe2871 Fix zombie SSH processes with --sftp-ssh by ensuring Wait() is called only once
The issue was that cmd.Wait() was being called multiple times on the same
process - once in the background goroutine and once in Close(). This could
lead to zombie processes because only the first call to Wait() properly
reaps the process.

The fix uses sync.Once to ensure Wait() is only called once per SSH process,
storing and returning the result on subsequent calls.

Added tests to verify the fix works correctly.

Co-authored-by: ncw <536803+ncw@users.noreply.github.com>
2025-10-30 17:56:13 +00:00
copilot-swe-agent[bot]
6d9f4a3c20 Initial plan 2025-10-30 17:48:34 +00:00
2 changed files with 101 additions and 10 deletions

View File

@@ -10,6 +10,7 @@ import (
"os/exec" "os/exec"
"slices" "slices"
"strings" "strings"
"sync"
"time" "time"
"github.com/rclone/rclone/fs" "github.com/rclone/rclone/fs"
@@ -50,6 +51,9 @@ func (s *sshClientExternal) Close() error {
func (s *sshClientExternal) NewSession() (sshSession, error) { func (s *sshClientExternal) NewSession() (sshSession, error) {
session := s.f.newSSHSessionExternal() session := s.f.newSSHSessionExternal()
if s.session == nil { if s.session == nil {
// Store the first session so Wait() and Close() can use it
s.session = session
} else {
fs.Debugf(s.f, "ssh external: creating additional session") fs.Debugf(s.f, "ssh external: creating additional session")
} }
return session, nil return session, nil
@@ -76,6 +80,8 @@ type sshSessionExternal struct {
cancel func() cancel func()
startCalled bool startCalled bool
runningSFTP bool runningSFTP bool
waitOnce sync.Once // ensure Wait() is only called once
waitErr error // result of the Wait() call
} }
func (f *Fs) newSSHSessionExternal() *sshSessionExternal { func (f *Fs) newSSHSessionExternal() *sshSessionExternal {
@@ -175,16 +181,17 @@ func (s *sshSessionExternal) exited() bool {
// Wait for the command to exit // Wait for the command to exit
func (s *sshSessionExternal) Wait() error { func (s *sshSessionExternal) Wait() error {
if s.exited() { // Use sync.Once to ensure we only wait for the process once.
return nil // This is safe even if Wait() is called from multiple goroutines.
} s.waitOnce.Do(func() {
err := s.cmd.Wait() s.waitErr = s.cmd.Wait()
if err == nil { if s.waitErr == nil {
fs.Debugf(s.f, "ssh external: command exited OK") fs.Debugf(s.f, "ssh external: command exited OK")
} else { } else {
fs.Debugf(s.f, "ssh external: command exited with error: %v", err) fs.Debugf(s.f, "ssh external: command exited with error: %v", s.waitErr)
} }
return err })
return s.waitErr
} }
// Run runs cmd on the remote host. Typically, the remote // Run runs cmd on the remote host. Typically, the remote

View File

@@ -0,0 +1,84 @@
//go:build !plan9
package sftp
import (
"testing"
"time"
"github.com/rclone/rclone/fs"
"github.com/stretchr/testify/assert"
)
// TestSSHExternalWaitMultipleCalls verifies that calling Wait() multiple times
// doesn't cause zombie processes
func TestSSHExternalWaitMultipleCalls(t *testing.T) {
// Create a minimal Fs object for testing
opt := &Options{
SSH: fs.SpaceSepList{"echo", "test"},
}
f := &Fs{
opt: *opt,
}
// Create a new SSH session
session := f.newSSHSessionExternal()
// Start a simple command that exits quickly
err := session.Start("exit 0")
assert.NoError(t, err)
// Give the command time to complete
time.Sleep(100 * time.Millisecond)
// Call Wait() multiple times - this should not cause issues
err1 := session.Wait()
err2 := session.Wait()
err3 := session.Wait()
// All calls should return the same result (no error in this case)
assert.NoError(t, err1)
assert.NoError(t, err2)
assert.NoError(t, err3)
// Verify the process has exited
assert.True(t, session.exited())
}
// TestSSHExternalCloseMultipleCalls verifies that calling Close() multiple times
// followed by Wait() calls doesn't cause zombie processes
func TestSSHExternalCloseMultipleCalls(t *testing.T) {
// Create a minimal Fs object for testing
opt := &Options{
SSH: fs.SpaceSepList{"sleep", "10"},
}
f := &Fs{
opt: *opt,
}
// Create a new SSH session
session := f.newSSHSessionExternal()
// Start a long-running command
err := session.Start("sleep 10")
if err != nil {
t.Skip("Cannot start sleep command:", err)
}
// Close should cancel and wait for the process
_ = session.Close()
// Additional Wait() calls should return the same error
err2 := session.Wait()
err3 := session.Wait()
// All should complete without panicking
// err1 could be nil or an error depending on how the process was killed
// err2 and err3 should be the same
assert.Equal(t, err2, err3, "Subsequent Wait() calls should return same result")
// Verify the process has exited
assert.True(t, session.exited())
}