1
0
mirror of https://github.com/rclone/rclone.git synced 2026-01-06 18:43:50 +00:00

Refactor the List and ListDir interface

Gives more accurate error propagation, control of depth of recursion
and short circuit recursion where possible.

Most of the the heavy lifting is done in the "fs" package, making file
system implementations a bit simpler.

This commit contains some code originally by Klaus Post.

Fixes #316
This commit is contained in:
Nick Craig-Wood
2016-04-21 20:06:21 +01:00
parent 3bdad260b0
commit 753b0717be
21 changed files with 1512 additions and 996 deletions

View File

@@ -6,6 +6,7 @@ import (
"bufio"
"fmt"
"os"
"path"
"regexp"
"strconv"
"strings"
@@ -69,7 +70,8 @@ type Filter struct {
ModTimeFrom time.Time
ModTimeTo time.Time
rules []rule
files filesMap
files filesMap // files if filesFrom
dirs filesMap // dirs from filesFrom
}
// We use time conventions
@@ -244,9 +246,21 @@ func (f *Filter) AddRule(rule string) error {
func (f *Filter) AddFile(file string) error {
if f.files == nil {
f.files = make(filesMap)
f.dirs = make(filesMap)
}
file = strings.Trim(file, "/")
f.files[file] = struct{}{}
// Put all the parent directories into f.dirs
for {
file = path.Dir(file)
if file == "." {
break
}
if _, found := f.dirs[file]; found {
break
}
f.dirs[file] = struct{}{}
}
return nil
}
@@ -265,6 +279,28 @@ func (f *Filter) InActive() bool {
len(f.rules) == 0)
}
// includeRemote returns whether this remote passes the filter rules.
func (f *Filter) includeRemote(remote string) bool {
for _, rule := range f.rules {
if rule.Match(remote) {
return rule.Include
}
}
return true
}
// IncludeDirectory returns whether this directory should be included
// in the sync or not.
func (f *Filter) IncludeDirectory(remote string) bool {
remote = strings.Trim(remote, "/")
// filesFrom takes precedence
if f.files != nil {
_, include := f.dirs[remote]
return include
}
return f.includeRemote(remote + "/")
}
// Include returns whether this object should be included into the
// sync or not
func (f *Filter) Include(remote string, size int64, modTime time.Time) bool {
@@ -285,12 +321,7 @@ func (f *Filter) Include(remote string, size int64, modTime time.Time) bool {
if f.MaxSize != 0 && size > f.MaxSize {
return false
}
for _, rule := range f.rules {
if rule.Match(remote) {
return rule.Include
}
}
return true
return f.includeRemote(remote)
}
// IncludeObject returns whether this object should be included into

View File

@@ -6,6 +6,8 @@ import (
"strings"
"testing"
"time"
"github.com/stretchr/testify/assert"
)
func TestAgeSuffix(t *testing.T) {
@@ -192,6 +194,20 @@ func testInclude(t *testing.T, f *Filter, tests []includeTest) {
}
}
type includeDirTest struct {
in string
want bool
}
func testDirInclude(t *testing.T, f *Filter, tests []includeDirTest) {
for _, test := range tests {
got := f.IncludeDirectory(test.in)
if test.want != got {
t.Errorf("%q: want %v got %v", test.in, test.want, got)
}
}
}
func TestNewFilterIncludeFiles(t *testing.T) {
f, err := NewFilter()
if err != nil {
@@ -205,6 +221,11 @@ func TestNewFilterIncludeFiles(t *testing.T) {
if err != nil {
t.Error(err)
}
assert.Equal(t, filesMap{
"file1.jpg": {},
"file2.jpg": {},
}, f.files)
assert.Equal(t, filesMap{}, f.dirs)
testInclude(t, f, []includeTest{
{"file1.jpg", 0, 0, true},
{"file2.jpg", 1, 0, true},
@@ -216,6 +237,42 @@ func TestNewFilterIncludeFiles(t *testing.T) {
}
}
func TestNewFilterIncludeFilesDirs(t *testing.T) {
f, err := NewFilter()
if err != nil {
t.Fatal(err)
}
for _, path := range []string{
"path/to/dir/file1.png",
"/path/to/dir/file2.png",
"/path/to/file3.png",
"/path/to/dir2/file4.png",
} {
err = f.AddFile(path)
if err != nil {
t.Error(err)
}
}
assert.Equal(t, filesMap{
"path": {},
"path/to": {},
"path/to/dir": {},
"path/to/dir2": {},
}, f.dirs)
testDirInclude(t, f, []includeDirTest{
{"path", true},
{"path/to", true},
{"path/to/", true},
{"/path/to", true},
{"/path/to/", true},
{"path/to/dir", true},
{"path/to/dir2", true},
{"path/too", false},
{"path/three", false},
{"four", false},
})
}
func TestNewFilterMinSize(t *testing.T) {
f, err := NewFilter()
if err != nil {
@@ -340,6 +397,16 @@ func TestNewFilterMatches(t *testing.T) {
{"sausage3/potato", 101, 0, true},
{"unicorn", 99, 0, false},
})
testDirInclude(t, f, []includeDirTest{
{"sausage1", false},
{"sausage2", false},
{"sausage2/sub", false},
{"sausage2/sub/dir", false},
{"sausage3", true},
{"sausage3/sub", true},
{"sausage3/sub/dir", true},
{"sausage4", false},
})
if f.InActive() {
t.Errorf("want !InActive")
}

303
fs/fs.go
View File

@@ -5,9 +5,12 @@ import (
"fmt"
"io"
"log"
"math"
"path/filepath"
"regexp"
"sort"
"strings"
"sync"
"time"
)
@@ -18,6 +21,8 @@ const (
// ModTimeNotSupported is a very large precision value to show
// mod time isn't supported on this Fs
ModTimeNotSupported = 100 * 365 * 24 * time.Hour
// MaxLevel is a sentinel representing an infinite depth for listings
MaxLevel = math.MaxInt32
)
// Globals
@@ -32,6 +37,9 @@ var (
ErrorCantDirMove = fmt.Errorf("Can't move directory - incompatible remotes")
ErrorDirExists = fmt.Errorf("Can't copy directory - destination already exists")
ErrorCantSetModTime = fmt.Errorf("Can't set modified time")
ErrorDirNotFound = fmt.Errorf("Directory not found")
ErrorLevelNotSupported = fmt.Errorf("Level value not supported")
ErrorListAborted = fmt.Errorf("List aborted")
)
// RegInfo provides information about a filesystem
@@ -90,11 +98,10 @@ func Register(info *RegInfo) {
type Fs interface {
Info
// List the Fs into a channel
List() ObjectsChan
// ListDir lists the Fs directories/buckets/containers into a channel
ListDir() DirChan
// List the objects and directories of the Fs
//
// This should return ErrDirNotFound if the directory isn't found.
List(ListOpts)
// NewFsObject finds the Object at remote. Returns nil if can't be found
NewFsObject(remote string) Object
@@ -239,6 +246,292 @@ type UnWrapper interface {
// ObjectsChan is a channel of Objects
type ObjectsChan chan Object
// ListOpts describes the interface used for Fs.List operations
type ListOpts interface {
// Add an object to the output.
// If the function returns true, the operation has been aborted.
// Multiple goroutines can safely add objects concurrently.
Add(obj Object) (abort bool)
// Add a directory to the output.
// If the function returns true, the operation has been aborted.
// Multiple goroutines can safely add objects concurrently.
AddDir(dir *Dir) (abort bool)
// IncludeDirectory returns whether this directory should be
// included in the listing (and recursed into or not).
IncludeDirectory(remote string) bool
// SetError will set an error state, and will cause the listing to
// be aborted.
// Multiple goroutines can set the error state concurrently,
// but only the first will be returned to the caller.
SetError(err error)
// Level returns the level it should recurse to. Fses may
// ignore this in which case the listing will be less
// efficient.
Level() int
// Buffer returns the channel depth in use
Buffer() int
// Finished should be called when listing is finished
Finished()
// IsFinished returns whether Finished or SetError have been called
IsFinished() bool
}
// listerResult is returned by the lister methods
type listerResult struct {
Obj Object
Dir *Dir
Err error
}
// Lister objects are used for controlling listing of Fs objects
type Lister struct {
mu sync.RWMutex
buffer int
abort bool
results chan listerResult
finished sync.Once
level int
filter *Filter
}
// NewLister creates a Lister object.
//
// The default channel buffer size will be Config.Checkers unless
// overridden with SetBuffer. The default level will be infinite.
func NewLister() *Lister {
o := &Lister{}
return o.SetLevel(-1).SetBuffer(Config.Checkers)
}
// Start starts a go routine listing the Fs passed in. It returns the
// same Lister that was passed in for convenience.
func (o *Lister) Start(f Fs) *Lister {
o.results = make(chan listerResult, o.buffer)
go func() {
f.List(o)
}()
return o
}
// SetLevel sets the level to recurse to. It returns same Lister that
// was passed in for convenience. If Level is < 0 then it sets it to
// infinite. Must be called before Start().
func (o *Lister) SetLevel(level int) *Lister {
if level < 0 {
o.level = MaxLevel
} else {
o.level = level
}
return o
}
// SetFilter sets the Filter that is in use. It defaults to no
// filtering. Must be called before Start().
func (o *Lister) SetFilter(filter *Filter) *Lister {
o.filter = filter
return o
}
// Level gets the recursion level for this listing.
//
// Fses may ignore this, but should implement it for improved efficiency if possible.
//
// Level 1 means list just the contents of the directory
//
// Each returned item must have less than level `/`s in.
func (o *Lister) Level() int {
return o.level
}
// SetBuffer sets the channel buffer size in use. Must be called
// before Start().
func (o *Lister) SetBuffer(buffer int) *Lister {
if buffer < 1 {
buffer = 1
}
o.buffer = buffer
return o
}
// Buffer gets the channel buffer size in use
func (o *Lister) Buffer() int {
return o.buffer
}
// Add an object to the output.
// If the function returns true, the operation has been aborted.
// Multiple goroutines can safely add objects concurrently.
func (o *Lister) Add(obj Object) (abort bool) {
o.mu.RLock()
defer o.mu.RUnlock()
if o.abort {
return true
}
o.results <- listerResult{Obj: obj}
return false
}
// AddDir will a directory to the output.
// If the function returns true, the operation has been aborted.
// Multiple goroutines can safely add objects concurrently.
func (o *Lister) AddDir(dir *Dir) (abort bool) {
o.mu.RLock()
defer o.mu.RUnlock()
if o.abort {
return true
}
remote := dir.Name
remote = strings.Trim(remote, "/")
dir.Name = remote
// Check the level and ignore if too high
slashes := strings.Count(remote, "/")
if slashes >= o.level {
return false
}
// Check if directory is included
if !o.IncludeDirectory(remote) {
return false
}
o.results <- listerResult{Dir: dir}
return false
}
// IncludeDirectory returns whether this directory should be
// included in the listing (and recursed into or not).
func (o *Lister) IncludeDirectory(remote string) bool {
if o.filter == nil {
return true
}
return o.filter.IncludeDirectory(remote)
}
// SetError will set an error state, and will cause the listing to
// be aborted.
// Multiple goroutines can set the error state concurrently,
// but only the first will be returned to the caller.
func (o *Lister) SetError(err error) {
o.mu.RLock()
if err != nil && !o.abort {
o.results <- listerResult{Err: err}
}
o.mu.RUnlock()
o.Finished()
}
// Finished should be called when listing is finished
func (o *Lister) Finished() {
o.finished.Do(func() {
o.mu.Lock()
o.abort = true
close(o.results)
o.mu.Unlock()
})
}
// IsFinished returns whether the directory listing is finished or not
func (o *Lister) IsFinished() bool {
o.mu.RLock()
defer o.mu.RUnlock()
return o.abort
}
// Get an object from the listing.
// Will return either an object or a directory, never both.
// Will return (nil, nil, nil) when all objects have been returned.
func (o *Lister) Get() (Object, *Dir, error) {
select {
case r := <-o.results:
return r.Obj, r.Dir, r.Err
}
}
// GetObject will return an object from the listing.
// It will skip over any directories.
// Will return (nil, nil) when all objects have been returned.
func (o *Lister) GetObject() (Object, error) {
for {
obj, dir, err := o.Get()
if err != nil {
return nil, err
}
// Check if we are finished
if dir == nil && obj == nil {
return nil, nil
}
// Ignore directories
if dir != nil {
continue
}
return obj, nil
}
}
// GetObjects will return a slice of object from the listing.
// It will skip over any directories.
func (o *Lister) GetObjects() (objs []Object, err error) {
for {
obj, dir, err := o.Get()
if err != nil {
return nil, err
}
// Check if we are finished
if dir == nil && obj == nil {
break
}
if obj != nil {
objs = append(objs, obj)
}
}
return objs, nil
}
// GetDir will return a directory from the listing.
// It will skip over any objects.
// Will return (nil, nil) when all objects have been returned.
func (o *Lister) GetDir() (*Dir, error) {
for {
obj, dir, err := o.Get()
if err != nil {
return nil, err
}
// Check if we are finished
if dir == nil && obj == nil {
return nil, nil
}
// Ignore objects
if obj != nil {
continue
}
return dir, nil
}
}
// GetDirs will return a slice of directories from the listing.
// It will skip over any objects.
func (o *Lister) GetDirs() (dirs []*Dir, err error) {
for {
obj, dir, err := o.Get()
if err != nil {
return nil, err
}
// Check if we are finished
if dir == nil && obj == nil {
break
}
if dir != nil {
dirs = append(dirs, dir)
}
}
return dirs, nil
}
// Objects is a slice of Object~s
type Objects []Object

View File

@@ -38,22 +38,13 @@ func (f *Limited) String() string {
}
// List the Fs into a channel
func (f *Limited) List() ObjectsChan {
out := make(ObjectsChan, Config.Checkers)
go func() {
for _, obj := range f.objects {
out <- obj
func (f *Limited) List(opts ListOpts) {
defer opts.Finished()
for _, obj := range f.objects {
if opts.Add(obj) {
return
}
close(out)
}()
return out
}
// ListDir lists the Fs directories/buckets/containers into a channel
func (f *Limited) ListDir() DirChan {
out := make(DirChan, Config.Checkers)
close(out)
return out
}
}
// NewFsObject finds the Object at remote. Returns nil if can't be found

View File

@@ -456,10 +456,23 @@ func DeleteFiles(toBeDeleted ObjectsChan) {
// Read a map of Object.Remote to Object for the given Fs.
// If includeAll is specified all files will be added,
// otherwise only files passing the filter will be added.
func readFilesMap(fs Fs, includeAll bool) map[string]Object {
files := make(map[string]Object)
func readFilesMap(fs Fs, includeAll bool) (files map[string]Object, err error) {
files = make(map[string]Object)
normalised := make(map[string]struct{})
for o := range fs.List() {
list := NewLister()
if !includeAll {
list.SetFilter(Config.Filter)
}
list.Start(fs)
for {
o, err := list.GetObject()
if err != nil {
return files, err
}
// Check if we are finished
if o == nil {
break
}
remote := o.Remote()
normalisedRemote := strings.ToLower(norm.NFC.String(remote))
if _, ok := files[remote]; !ok {
@@ -477,7 +490,39 @@ func readFilesMap(fs Fs, includeAll bool) map[string]Object {
}
normalised[normalisedRemote] = struct{}{}
}
return files
return files, nil
}
// readFilesMaps runs readFilesMap on fdst and fsrc at the same time
func readFilesMaps(fdst Fs, fdstIncludeAll bool, fsrc Fs, fsrcIncludeAll bool) (dstFiles, srcFiles map[string]Object, err error) {
var wg sync.WaitGroup
var srcErr, dstErr error
list := func(fs Fs, includeAll bool, pMap *map[string]Object, pErr *error) {
defer wg.Done()
Log(fs, "Building file list")
dstFiles, listErr := readFilesMap(fs, includeAll)
if listErr != nil {
ErrorLog(fs, "Error building file list: %v", listErr)
*pErr = listErr
} else {
Debug(fs, "Done building file list")
*pMap = dstFiles
}
}
wg.Add(2)
go list(fdst, fdstIncludeAll, &dstFiles, &srcErr)
go list(fsrc, fsrcIncludeAll, &srcFiles, &dstErr)
wg.Wait()
if srcErr != nil {
err = srcErr
}
if dstErr != nil {
err = dstErr
}
return dstFiles, srcFiles, err
}
// Same returns true if fdst and fsrc point to the same underlying Fs
@@ -501,31 +546,11 @@ func syncCopyMove(fdst, fsrc Fs, Delete bool, DoMove bool) error {
return err
}
Log(fdst, "Building file list")
// Read the files of both source and destination
var listWg sync.WaitGroup
listWg.Add(2)
var dstFiles map[string]Object
var srcFiles map[string]Object
var srcObjects = make(ObjectsChan, Config.Transfers)
// Read dst files including excluded files if DeleteExcluded is set
go func() {
dstFiles = readFilesMap(fdst, Config.Filter.DeleteExcluded)
listWg.Done()
}()
// Read src file not including excluded files
go func() {
srcFiles = readFilesMap(fsrc, false)
listWg.Done()
for _, v := range srcFiles {
srcObjects <- v
}
close(srcObjects)
}()
// Read the files of both source and destination in parallel
dstFiles, srcFiles, err := readFilesMaps(fdst, Config.Filter.DeleteExcluded, fsrc, false)
if err != nil {
return err
}
startDeletion := make(chan struct{}, 0)
@@ -564,9 +589,6 @@ func syncCopyMove(fdst, fsrc Fs, Delete bool, DoMove bool) error {
DeleteFiles(toDelete)
}()
// Wait for all files to be read
listWg.Wait()
// Start deleting, unless we must delete after transfer
if Delete && !Config.DeleteAfter {
close(startDeletion)
@@ -598,18 +620,15 @@ func syncCopyMove(fdst, fsrc Fs, Delete bool, DoMove bool) error {
}
}
go func() {
for src := range srcObjects {
remote := src.Remote()
if dst, dstFound := dstFiles[remote]; dstFound {
toBeChecked <- ObjectPair{src, dst}
} else {
// No need to check since doesn't exist
toBeUploaded <- ObjectPair{src, nil}
}
for remote, src := range srcFiles {
if dst, dstFound := dstFiles[remote]; dstFound {
toBeChecked <- ObjectPair{src, dst}
} else {
// No need to check since doesn't exist
toBeUploaded <- ObjectPair{src, nil}
}
close(toBeChecked)
}()
}
close(toBeChecked)
Log(fdst, "Waiting for checks to finish")
checkerWg.Wait()
@@ -713,30 +732,11 @@ func checkIdentical(dst, src Object) bool {
// Check the files in fsrc and fdst according to Size and hash
func Check(fdst, fsrc Fs) error {
dstFiles, srcFiles, err := readFilesMaps(fdst, false, fsrc, false)
if err != nil {
return err
}
differences := int32(0)
var (
wg sync.WaitGroup
dstFiles, srcFiles map[string]Object
)
wg.Add(2)
go func() {
defer wg.Done()
// Read the destination files
Log(fdst, "Building file list")
dstFiles = readFilesMap(fdst, false)
Debug(fdst, "Done building file list")
}()
go func() {
defer wg.Done()
// Read the source files
Log(fsrc, "Building file list")
srcFiles = readFilesMap(fsrc, false)
Debug(fdst, "Done building file list")
}()
wg.Wait()
// FIXME could do this as it goes along and make it use less
// memory.
@@ -800,13 +800,21 @@ func Check(fdst, fsrc Fs) error {
//
// Lists in parallel which may get them out of order
func ListFn(f Fs, fn func(Object)) error {
in := f.List()
list := NewLister().SetFilter(Config.Filter).Start(f)
var wg sync.WaitGroup
wg.Add(Config.Checkers)
for i := 0; i < Config.Checkers; i++ {
go func() {
defer wg.Done()
for o := range in {
for {
o, err := list.GetObject()
if err != nil {
log.Fatal(err)
}
// check if we are finished
if o == nil {
return
}
if Config.Filter.IncludeObject(o) {
fn(o)
}
@@ -901,7 +909,15 @@ func Count(f Fs) (objects int64, size int64, err error) {
// ListDir lists the directories/buckets/containers in the Fs to the supplied writer
func ListDir(f Fs, w io.Writer) error {
for dir := range f.ListDir() {
list := NewLister().SetLevel(1).Start(f)
for {
dir, err := list.GetDir()
if err != nil {
log.Fatal(err)
}
if dir == nil {
break
}
syncFprintf(w, "%12d %13s %9d %s\n", dir.Bytes, dir.When.Format("2006-01-02 15:04:05"), dir.Count, dir.Name)
}
return nil
@@ -960,7 +976,8 @@ func Purge(f Fs) error {
}
if doFallbackPurge {
// DeleteFiles and Rmdir observe --dry-run
DeleteFiles(f.List())
list := NewLister().Start(f)
DeleteFiles(listToChan(list))
err = Rmdir(f)
}
if err != nil {
@@ -1115,7 +1132,16 @@ func (mode DeduplicateMode) String() string {
func Deduplicate(f Fs, mode DeduplicateMode) error {
Log(f, "Looking for duplicates using %v mode.", mode)
files := map[string][]Object{}
for o := range f.List() {
list := NewLister().Start(f)
for {
o, err := list.GetObject()
if err != nil {
return err
}
// Check if we are finished
if o == nil {
break
}
remote := o.Remote()
files[remote] = append(files[remote], o)
}
@@ -1149,3 +1175,34 @@ func Deduplicate(f Fs, mode DeduplicateMode) error {
}
return nil
}
// listToChan will transfer all incoming objects to a new channel.
//
// If an error occurs, the error will be logged, and it will close the
// channel.
//
// If the error was ErrorDirNotFound then it will be ignored
func listToChan(list *Lister) ObjectsChan {
o := make(ObjectsChan, Config.Checkers)
go func() {
defer close(o)
for {
obj, dir, err := list.Get()
if err != nil {
if err != ErrorDirNotFound {
Stats.Error()
ErrorLog(nil, "Failed to list: %v", err)
}
return
}
if dir == nil && obj == nil {
return
}
if o == nil {
continue
}
o <- obj
}
}()
return o
}

View File

@@ -140,11 +140,20 @@ func NewRun(t *testing.T) *Run {
r = new(Run)
*r = *oneRun
r.cleanRemote = func() {
oldErrors := fs.Stats.GetErrors()
fs.DeleteFiles(r.fremote.List())
errors := fs.Stats.GetErrors() - oldErrors
if errors != 0 {
t.Fatalf("%d errors while cleaning remote %v", errors, r.fremote)
list := fs.NewLister().Start(r.fremote)
for {
o, err := list.GetObject()
if err != nil {
t.Fatalf("Error listing: %v", err)
}
// Check if we are finished
if o == nil {
break
}
err = o.Remove()
if err != nil {
t.Errorf("Error removing file: %v", err)
}
}
// Check remote is empty
fstest.CheckItems(t, r.fremote)
@@ -320,7 +329,12 @@ func TestCopyAfterDelete(t *testing.T) {
fstest.CheckItems(t, r.flocal)
fstest.CheckItems(t, r.fremote, file1)
err := fs.CopyDir(r.fremote, r.flocal)
err := fs.Mkdir(r.flocal)
if err != nil {
t.Fatalf("Mkdir failed: %v", err)
}
err = fs.CopyDir(r.fremote, r.flocal)
if err != nil {
t.Fatalf("Copy failed: %v", err)
}
@@ -1167,7 +1181,16 @@ func TestDeduplicateRename(t *testing.T) {
t.Fatalf("fs.Deduplicate returned error: %v", err)
}
for o := range r.fremote.List() {
list := fs.NewLister().Start(r.fremote)
for {
o, err := list.GetObject()
if err != nil {
t.Fatalf("Listing failed: %v", err)
}
// Check if we are finished
if o == nil {
break
}
remote := o.Remote()
if remote != "one-1.txt" &&
remote != "one-2.txt" &&