mirror of
https://github.com/rclone/rclone.git
synced 2025-12-06 00:03:32 +00:00
march: Implement --assume-listings-sorted to speed up sync starting
This can be used when the source and destination backends are guaranteed to return the items in the same sorted order. Fixes #5859
This commit is contained in:
@@ -852,6 +852,32 @@ the binary units, e.g. 1, 2\*\*10, 2\*\*20, 2\*\*30 respectively.
|
||||
|
||||
See also [--human-readable](#human-readable).
|
||||
|
||||
### --assume-listings-sorted
|
||||
|
||||
This flag can be used when the source and destination backends are
|
||||
guaranteed to return the items in the same sorted order and in that
|
||||
case it will speed up the sync.
|
||||
|
||||
Not all backends are guaranteed to return sorted entries (eg local)
|
||||
but s3 should, so an s3 to s3 sync could benefit from this flag.
|
||||
|
||||
If rclone finds an out of order directory entry then it will cancel
|
||||
the sync with the error:
|
||||
|
||||
```console
|
||||
out of order listing in source (remote:dir)
|
||||
```
|
||||
|
||||
In this case you should remove the `--assume-listings-sorted` flag.
|
||||
|
||||
If you are using `--assume-listings-sorted` then rclone will assume
|
||||
`--no-unicode-normalization` and it will compare file names in a case
|
||||
sensitive way.
|
||||
|
||||
Normally sorting directory entries is not a bottleneck, but it can
|
||||
become so with syncs of millions of items in a single directory as the
|
||||
sync will not start until the directory listing is complete.
|
||||
|
||||
## Main options
|
||||
|
||||
### --backup-dir string
|
||||
|
||||
@@ -566,6 +566,12 @@ var ConfigOptionsInfo = Options{{
|
||||
Default: "",
|
||||
Help: "HTTP proxy URL.",
|
||||
Groups: "Networking",
|
||||
}, {
|
||||
Name: "assume_listings_sorted",
|
||||
Default: false,
|
||||
Advanced: true,
|
||||
Help: "If set will not sort listings. If listings aren't sorted the sync may go wrong.",
|
||||
Groups: "Copy",
|
||||
}}
|
||||
|
||||
// ConfigInfo is filesystem config options
|
||||
@@ -680,6 +686,7 @@ type ConfigInfo struct {
|
||||
MaxConnections int `config:"max_connections"`
|
||||
NameTransform []string `config:"name_transform"`
|
||||
HTTPProxy string `config:"http_proxy"`
|
||||
AssumeListingsSorted bool `config:"assume_listings_sorted"`
|
||||
}
|
||||
|
||||
func init() {
|
||||
|
||||
@@ -45,6 +45,7 @@ type Sorter struct {
|
||||
keyFn KeyFn // transform an entry into a sort key
|
||||
cutoff int // number of entries above which we start extsort
|
||||
extSort bool // true if we are ext sorting
|
||||
noSort bool // true if we aren't sorting
|
||||
inputChan chan string // for sending data to the ext sort
|
||||
outputChan <-chan string // for receiving data from the ext sort
|
||||
errChan <-chan error // for getting errors from the ext sort
|
||||
@@ -78,6 +79,7 @@ func NewSorter(ctx context.Context, f NewObjecter, callback fs.ListRCallback, ke
|
||||
keyFn: keyFn,
|
||||
cutoff: ci.ListCutoff,
|
||||
errs: errcount.New(),
|
||||
noSort: ci.AssumeListingsSorted,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -172,6 +174,9 @@ func (ls *Sorter) startExtSort() (err error) {
|
||||
//
|
||||
// Safe to call from concurrent go routines
|
||||
func (ls *Sorter) Add(entries fs.DirEntries) error {
|
||||
if ls.noSort {
|
||||
return ls.callback(entries)
|
||||
}
|
||||
ls.mu.Lock()
|
||||
defer ls.mu.Unlock()
|
||||
if ls.extSort {
|
||||
@@ -267,6 +272,9 @@ func (lh *listHelper) Flush() error {
|
||||
|
||||
// Send the sorted entries to the callback.
|
||||
func (ls *Sorter) Send() (err error) {
|
||||
if ls.noSort {
|
||||
return nil
|
||||
}
|
||||
ls.mu.Lock()
|
||||
defer ls.mu.Unlock()
|
||||
|
||||
|
||||
@@ -46,6 +46,46 @@ func TestSorter(t *testing.T) {
|
||||
assert.Equal(t, fs.DirEntries(nil), ls.entries)
|
||||
}
|
||||
|
||||
func TestSorterAssumeSorted(t *testing.T) {
|
||||
ctx, ci := fs.AddConfig(context.Background())
|
||||
ci.AssumeListingsSorted = true
|
||||
|
||||
gotEntry := 0
|
||||
wantEntries := fs.DirEntries{
|
||||
mockdir.New("c"),
|
||||
mockobject.Object("C"),
|
||||
mockdir.New("b"),
|
||||
mockobject.Object("B"),
|
||||
mockdir.New("a"),
|
||||
mockobject.Object("A"),
|
||||
}
|
||||
callback := func(entries fs.DirEntries) error {
|
||||
for _, entry := range entries {
|
||||
require.Equal(t, wantEntries[gotEntry], entry)
|
||||
gotEntry++
|
||||
}
|
||||
return nil
|
||||
}
|
||||
ls, err := NewSorter(ctx, nil, callback, nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Test Add
|
||||
require.NoError(t, ls.Add(wantEntries[0:2]))
|
||||
require.NoError(t, ls.Add(wantEntries[2:6]))
|
||||
assert.Equal(t, 6, gotEntry)
|
||||
assert.Equal(t, fs.DirEntries(nil), ls.entries)
|
||||
|
||||
// Test Send
|
||||
err = ls.Send()
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 6, gotEntry)
|
||||
|
||||
// Test Cleanup
|
||||
ls.CleanUp()
|
||||
assert.Equal(t, 6, gotEntry)
|
||||
assert.Equal(t, fs.DirEntries(nil), ls.entries)
|
||||
}
|
||||
|
||||
func TestSorterIdentity(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cmpFn := func(a, b fs.DirEntry) int {
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"github.com/rclone/rclone/fs"
|
||||
"github.com/rclone/rclone/fs/dirtree"
|
||||
"github.com/rclone/rclone/fs/filter"
|
||||
"github.com/rclone/rclone/fs/fserrors"
|
||||
"github.com/rclone/rclone/fs/list"
|
||||
"github.com/rclone/rclone/fs/walk"
|
||||
"github.com/rclone/rclone/lib/transform"
|
||||
@@ -330,7 +331,8 @@ func (m *March) matchListings(srcChan, dstChan <-chan fs.DirEntry, srcOnly, dstO
|
||||
continue
|
||||
} else if srcName < srcPrevName {
|
||||
// this should never happen since we sort the listings
|
||||
panic("Out of order listing in source")
|
||||
// however the user may be using the --assume-listings-sorted flag
|
||||
return fserrors.FatalError(fmt.Errorf("out of order listing in source (%v)", src.Fs()))
|
||||
}
|
||||
}
|
||||
if dst != nil && dstPrev != nil {
|
||||
@@ -340,7 +342,8 @@ func (m *March) matchListings(srcChan, dstChan <-chan fs.DirEntry, srcOnly, dstO
|
||||
continue
|
||||
} else if dstName < dstPrevName {
|
||||
// this should never happen since we sort the listings
|
||||
panic("Out of order listing in destination")
|
||||
// however the user may be using the --assume-listings-sorted flag
|
||||
return fserrors.FatalError(fmt.Errorf("out of order listing in destination (%v)", dst.Fs()))
|
||||
}
|
||||
}
|
||||
switch {
|
||||
|
||||
Reference in New Issue
Block a user