From a0bcdc26389806f42536855615230dbfec1ea277 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Tue, 28 Nov 2023 18:37:33 +0000 Subject: [PATCH] Archive backend to read archives on cloud storage. Initial support with Zip and Squashfs archives. Fixes #8633 See #2815 --- README.md | 1 + backend/all/all.go | 1 + backend/archive/archive.go | 679 +++++++++++++++++++++++ backend/archive/archive_internal_test.go | 221 ++++++++ backend/archive/archive_test.go | 67 +++ backend/archive/archive_unsupported.go | 7 + backend/archive/archiver/archiver.go | 24 + backend/archive/base/base.go | 233 ++++++++ backend/archive/squashfs/cache.go | 165 ++++++ backend/archive/squashfs/squashfs.go | 446 +++++++++++++++ backend/archive/zip/zip.go | 385 +++++++++++++ bin/make_manual.py | 1 + docs/content/_index.md | 1 + docs/content/archive.md | 270 +++++++++ docs/content/docs.md | 1 + docs/layouts/chrome/navbar.html | 1 + fstest/test_all/config.yaml | 11 + go.mod | 6 +- go.sum | 16 +- 19 files changed, 2533 insertions(+), 3 deletions(-) create mode 100644 backend/archive/archive.go create mode 100644 backend/archive/archive_internal_test.go create mode 100644 backend/archive/archive_test.go create mode 100644 backend/archive/archive_unsupported.go create mode 100644 backend/archive/archiver/archiver.go create mode 100644 backend/archive/base/base.go create mode 100644 backend/archive/squashfs/cache.go create mode 100644 backend/archive/squashfs/squashfs.go create mode 100644 backend/archive/zip/zip.go create mode 100644 docs/content/archive.md diff --git a/README.md b/README.md index 6e5d1dc2d..a368d62df 100644 --- a/README.md +++ b/README.md @@ -131,6 +131,7 @@ Please see [the full list of all storage providers and their features](https://r These backends adapt or modify other storage providers - Alias: rename existing remotes [:page_facing_up:](https://rclone.org/alias/) +- Archive: read archive files [:page_facing_up:](https://rclone.org/archive/) - Cache: cache remotes (DEPRECATED) [:page_facing_up:](https://rclone.org/cache/) - Chunker: split large files [:page_facing_up:](https://rclone.org/chunker/) - Combine: combine multiple remotes into a directory tree [:page_facing_up:](https://rclone.org/combine/) diff --git a/backend/all/all.go b/backend/all/all.go index 234b2bf50..8a3c08802 100644 --- a/backend/all/all.go +++ b/backend/all/all.go @@ -4,6 +4,7 @@ package all import ( // Active file systems _ "github.com/rclone/rclone/backend/alias" + _ "github.com/rclone/rclone/backend/archive" _ "github.com/rclone/rclone/backend/azureblob" _ "github.com/rclone/rclone/backend/azurefiles" _ "github.com/rclone/rclone/backend/b2" diff --git a/backend/archive/archive.go b/backend/archive/archive.go new file mode 100644 index 000000000..20cca571a --- /dev/null +++ b/backend/archive/archive.go @@ -0,0 +1,679 @@ +//go:build !plan9 + +// Package archive implements a backend to access archive files in a remote +package archive + +// FIXME factor common code between backends out - eg VFS initialization + +// FIXME can we generalize the VFS handle caching and use it in zip backend + +// Factor more stuff out if possible + +// Odd stats which are probably coming from the VFS +// * tensorflow.sqfs: 0% /3.074Gi, 204.426Ki/s, 4h22m46s + +// FIXME this will perform poorly for unpacking as the VFS Reader is bad +// at multiple streams - need cache mode setting? + +import ( + "context" + "errors" + "fmt" + "io" + "path" + "strings" + "sync" + "time" + + // Import all the required archivers here + _ "github.com/rclone/rclone/backend/archive/squashfs" + _ "github.com/rclone/rclone/backend/archive/zip" + + "github.com/rclone/rclone/backend/archive/archiver" + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fs/cache" + "github.com/rclone/rclone/fs/config/configmap" + "github.com/rclone/rclone/fs/config/configstruct" + "github.com/rclone/rclone/fs/fspath" + "github.com/rclone/rclone/fs/hash" +) + +// Register with Fs +func init() { + fsi := &fs.RegInfo{ + Name: "archive", + Description: "Read archives", + NewFs: NewFs, + MetadataInfo: &fs.MetadataInfo{ + Help: `Any metadata supported by the underlying remote is read and written.`, + }, + Options: []fs.Option{{ + Name: "remote", + Help: `Remote to wrap to read archives from. + +Normally should contain a ':' and a path, e.g. "myremote:path/to/dir", +"myremote:bucket" or "myremote:". + +If this is left empty, then the archive backend will use the root as +the remote. + +This means that you can use :archive:remote:path and it will be +equivalent to setting remote="remote:path". +`, + Required: false, + }}, + } + fs.Register(fsi) +} + +// Options defines the configuration for this backend +type Options struct { + Remote string `config:"remote"` +} + +// Fs represents a archive of upstreams +type Fs struct { + name string // name of this remote + features *fs.Features // optional features + opt Options // options for this Fs + root string // the path we are working on + f fs.Fs // remote we are wrapping + wrapper fs.Fs // fs that wraps us + + mu sync.Mutex // protects the below + archives map[string]*archive // the archives we have, by path +} + +// A single open archive +type archive struct { + archiver archiver.Archiver // archiver responsible + remote string // path to the archive + prefix string // prefix to add on to listings + root string // root of the archive to remove from listings + mu sync.Mutex // protects the following variables + f fs.Fs // the archive Fs, may be nil +} + +// If remote is an archive then return it otherwise return nil +func findArchive(remote string) *archive { + // FIXME use something faster than linear search? + for _, archiver := range archiver.Archivers { + if strings.HasSuffix(remote, archiver.Extension) { + return &archive{ + archiver: archiver, + remote: remote, + prefix: remote, + root: "", + } + } + } + return nil +} + +// Find an archive buried in remote +func subArchive(remote string) *archive { + archive := findArchive(remote) + if archive != nil { + return archive + } + parent := path.Dir(remote) + if parent == "/" || parent == "." { + return nil + } + return subArchive(parent) +} + +// If remote is an archive then return it otherwise return nil +func (f *Fs) findArchive(remote string) (archive *archive) { + archive = findArchive(remote) + if archive != nil { + f.mu.Lock() + f.archives[remote] = archive + f.mu.Unlock() + } + return archive +} + +// Instantiate archive if it hasn't been instantiated yet +// +// This is done lazily so that we can list a directory full of +// archives without opening them all. +func (a *archive) init(ctx context.Context, f fs.Fs) (fs.Fs, error) { + a.mu.Lock() + defer a.mu.Unlock() + if a.f != nil { + return a.f, nil + } + newFs, err := a.archiver.New(ctx, f, a.remote, a.prefix, a.root) + if err != nil && err != fs.ErrorIsFile { + return nil, fmt.Errorf("failed to create archive %q: %w", a.remote, err) + } + a.f = newFs + return a.f, nil +} + +// NewFs constructs an Fs from the path. +// +// The returned Fs is the actual Fs, referenced by remote in the config +func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (outFs fs.Fs, err error) { + // defer log.Trace(nil, "name=%q, root=%q, m=%v", name, root, m)("f=%+v, err=%v", &outFs, &err) + // Parse config into Options struct + opt := new(Options) + err = configstruct.Set(m, opt) + if err != nil { + return nil, err + } + remote := opt.Remote + origRoot := root + + // If remote is empty, use the root instead + if remote == "" { + remote = root + root = "" + } + isDirectory := strings.HasSuffix(remote, "/") + remote = strings.TrimRight(remote, "/") + if remote == "" { + remote = "/" + } + if strings.HasPrefix(remote, name+":") { + return nil, errors.New("can't point archive remote at itself - check the value of the upstreams setting") + } + + _ = isDirectory + + foundArchive := subArchive(remote) + if foundArchive != nil { + fs.Debugf(nil, "Found archiver for %q remote %q", foundArchive.archiver.Extension, foundArchive.remote) + // Archive path + foundArchive.root = strings.Trim(remote[len(foundArchive.remote):], "/") + // Path to the archive + archiveRemote := remote[:len(foundArchive.remote)] + // Remote is archive leaf name + foundArchive.remote = path.Base(archiveRemote) + foundArchive.prefix = "" + // Point remote to archive file + remote = archiveRemote + } + + // Make sure to remove trailing . referring to the current dir + if path.Base(root) == "." { + root = strings.TrimSuffix(root, ".") + } + remotePath := fspath.JoinRootPath(remote, root) + wrappedFs, err := cache.Get(ctx, remotePath) + if err != fs.ErrorIsFile && err != nil { + return nil, fmt.Errorf("failed to make remote %q to wrap: %w", remote, err) + } + + f := &Fs{ + name: name, + //root: path.Join(remotePath, root), + root: origRoot, + opt: *opt, + f: wrappedFs, + archives: make(map[string]*archive), + } + cache.PinUntilFinalized(f.f, f) + // the features here are ones we could support, and they are + // ANDed with the ones from wrappedFs + f.features = (&fs.Features{ + CaseInsensitive: true, + DuplicateFiles: false, + ReadMimeType: true, + WriteMimeType: true, + CanHaveEmptyDirectories: true, + BucketBased: true, + SetTier: true, + GetTier: true, + ReadMetadata: true, + WriteMetadata: true, + UserMetadata: true, + PartialUploads: true, + }).Fill(ctx, f).Mask(ctx, wrappedFs).WrapsFs(f, wrappedFs) + + if foundArchive != nil { + fs.Debugf(f, "Root is an archive") + if err != fs.ErrorIsFile { + return nil, fmt.Errorf("expecting to find a file at %q", remote) + } + return foundArchive.init(ctx, f.f) + } + // Correct root if definitely pointing to a file + if err == fs.ErrorIsFile { + f.root = path.Dir(f.root) + if f.root == "." || f.root == "/" { + f.root = "" + } + } + return f, err +} + +// Name of the remote (as passed into NewFs) +func (f *Fs) Name() string { + return f.name +} + +// Root of the remote (as passed into NewFs) +func (f *Fs) Root() string { + return f.root +} + +// String converts this Fs to a string +func (f *Fs) String() string { + return fmt.Sprintf("archive root '%s'", f.root) +} + +// Features returns the optional features of this Fs +func (f *Fs) Features() *fs.Features { + return f.features +} + +// Rmdir removes the root directory of the Fs object +func (f *Fs) Rmdir(ctx context.Context, dir string) error { + return f.f.Rmdir(ctx, dir) +} + +// Hashes returns hash.HashNone to indicate remote hashing is unavailable +func (f *Fs) Hashes() hash.Set { + return f.f.Hashes() +} + +// Mkdir makes the root directory of the Fs object +func (f *Fs) Mkdir(ctx context.Context, dir string) error { + return f.f.Mkdir(ctx, dir) +} + +// Purge all files in the directory +// +// Implement this if you have a way of deleting all the files +// quicker than just running Remove() on the result of List() +// +// Return an error if it doesn't exist +func (f *Fs) Purge(ctx context.Context, dir string) error { + do := f.f.Features().Purge + if do == nil { + return fs.ErrorCantPurge + } + return do(ctx, dir) +} + +// Copy src to this remote using server-side copy operations. +// +// This is stored with the remote path given. +// +// It returns the destination Object and a possible error. +// +// Will only be called if src.Fs().Name() == f.Name() +// +// If it isn't possible then return fs.ErrorCantCopy +func (f *Fs) Copy(ctx context.Context, src fs.Object, remote string) (fs.Object, error) { + do := f.f.Features().Copy + if do == nil { + return nil, fs.ErrorCantCopy + } + // FIXME + // o, ok := src.(*Object) + // if !ok { + // return nil, fs.ErrorCantCopy + // } + return do(ctx, src, remote) +} + +// Move src to this remote using server-side move operations. +// +// This is stored with the remote path given. +// +// It returns the destination Object and a possible error. +// +// Will only be called if src.Fs().Name() == f.Name() +// +// If it isn't possible then return fs.ErrorCantMove +func (f *Fs) Move(ctx context.Context, src fs.Object, remote string) (fs.Object, error) { + do := f.f.Features().Move + if do == nil { + return nil, fs.ErrorCantMove + } + // FIXME + // o, ok := src.(*Object) + // if !ok { + // return nil, fs.ErrorCantMove + // } + return do(ctx, src, remote) +} + +// DirMove moves src, srcRemote to this remote at dstRemote +// using server-side move operations. +// +// Will only be called if src.Fs().Name() == f.Name() +// +// If it isn't possible then return fs.ErrorCantDirMove +// +// If destination exists then return fs.ErrorDirExists +func (f *Fs) DirMove(ctx context.Context, src fs.Fs, srcRemote, dstRemote string) (err error) { + do := f.f.Features().DirMove + if do == nil { + return fs.ErrorCantDirMove + } + srcFs, ok := src.(*Fs) + if !ok { + fs.Debugf(srcFs, "Can't move directory - not same remote type") + return fs.ErrorCantDirMove + } + return do(ctx, srcFs.f, srcRemote, dstRemote) +} + +// ChangeNotify calls the passed function with a path +// that has had changes. If the implementation +// uses polling, it should adhere to the given interval. +// At least one value will be written to the channel, +// specifying the initial value and updated values might +// follow. A 0 Duration should pause the polling. +// The ChangeNotify implementation must empty the channel +// regularly. When the channel gets closed, the implementation +// should stop polling and release resources. +func (f *Fs) ChangeNotify(ctx context.Context, notifyFunc func(string, fs.EntryType), ch <-chan time.Duration) { + do := f.f.Features().ChangeNotify + if do == nil { + return + } + wrappedNotifyFunc := func(path string, entryType fs.EntryType) { + // fs.Debugf(f, "ChangeNotify: path %q entryType %d", path, entryType) + notifyFunc(path, entryType) + } + do(ctx, wrappedNotifyFunc, ch) +} + +// DirCacheFlush resets the directory cache - used in testing +// as an optional interface +func (f *Fs) DirCacheFlush() { + do := f.f.Features().DirCacheFlush + if do != nil { + do() + } +} + +func (f *Fs) put(ctx context.Context, in io.Reader, src fs.ObjectInfo, stream bool, options ...fs.OpenOption) (fs.Object, error) { + var o fs.Object + var err error + if stream { + o, err = f.f.Features().PutStream(ctx, in, src, options...) + } else { + o, err = f.f.Put(ctx, in, src, options...) + } + if err != nil { + return nil, err + } + return o, nil +} + +// Put in to the remote path with the modTime given of the given size +// +// May create the object even if it returns an error - if so +// will return the object and the error, otherwise will return +// nil and the error +func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { + o, err := f.NewObject(ctx, src.Remote()) + switch err { + case nil: + return o, o.Update(ctx, in, src, options...) + case fs.ErrorObjectNotFound: + return f.put(ctx, in, src, false, options...) + default: + return nil, err + } +} + +// PutStream uploads to the remote path with the modTime given of indeterminate size +// +// May create the object even if it returns an error - if so +// will return the object and the error, otherwise will return +// nil and the error +func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { + o, err := f.NewObject(ctx, src.Remote()) + switch err { + case nil: + return o, o.Update(ctx, in, src, options...) + case fs.ErrorObjectNotFound: + return f.put(ctx, in, src, true, options...) + default: + return nil, err + } +} + +// About gets quota information from the Fs +func (f *Fs) About(ctx context.Context) (*fs.Usage, error) { + do := f.f.Features().About + if do == nil { + return nil, errors.New("not supported by underlying remote") + } + return do(ctx) +} + +// Find the Fs for the directory +func (f *Fs) findFs(ctx context.Context, dir string) (subFs fs.Fs, err error) { + f.mu.Lock() + defer f.mu.Unlock() + + subFs = f.f + + // FIXME should do this with a better datastructure like a prefix tree + // FIXME want to find the longest first otherwise nesting won't work + dirSlash := dir + "/" + for archiverRemote, archive := range f.archives { + subRemote := archiverRemote + "/" + if strings.HasPrefix(dirSlash, subRemote) { + subFs, err = archive.init(ctx, f.f) + if err != nil { + return nil, err + } + break + } + } + + return subFs, nil +} + +// List the objects and directories in dir into entries. The +// entries can be returned in any order but should be for a +// complete directory. +// +// dir should be "" to list the root, and should not have +// trailing slashes. +// +// This should return ErrDirNotFound if the directory isn't +// found. +func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) { + // defer log.Trace(f, "dir=%q", dir)("entries = %v, err=%v", &entries, &err) + + subFs, err := f.findFs(ctx, dir) + if err != nil { + return nil, err + } + + entries, err = subFs.List(ctx, dir) + if err != nil { + return nil, err + } + for i, entry := range entries { + // Can only unarchive files + if o, ok := entry.(fs.Object); ok { + remote := o.Remote() + archive := f.findArchive(remote) + if archive != nil { + // Overwrite entry with directory + entries[i] = fs.NewDir(remote, o.ModTime(ctx)) + } + } + } + return entries, nil +} + +// NewObject creates a new remote archive file object +func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) { + + dir := path.Dir(remote) + if dir == "/" || dir == "." { + dir = "" + } + + subFs, err := f.findFs(ctx, dir) + if err != nil { + return nil, err + } + + o, err := subFs.NewObject(ctx, remote) + if err != nil { + return nil, err + } + return o, nil +} + +// Precision is the greatest precision of all the archivers +func (f *Fs) Precision() time.Duration { + return time.Second +} + +// Shutdown the backend, closing any background tasks and any +// cached connections. +func (f *Fs) Shutdown(ctx context.Context) error { + if do := f.f.Features().Shutdown; do != nil { + return do(ctx) + } + return nil +} + +// PublicLink generates a public link to the remote path (usually readable by anyone) +func (f *Fs) PublicLink(ctx context.Context, remote string, expire fs.Duration, unlink bool) (string, error) { + do := f.f.Features().PublicLink + if do == nil { + return "", errors.New("PublicLink not supported") + } + return do(ctx, remote, expire, unlink) +} + +// PutUnchecked in to the remote path with the modTime given of the given size +// +// May create the object even if it returns an error - if so +// will return the object and the error, otherwise will return +// nil and the error +// +// May create duplicates or return errors if src already +// exists. +func (f *Fs) PutUnchecked(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { + do := f.f.Features().PutUnchecked + if do == nil { + return nil, errors.New("can't PutUnchecked") + } + o, err := do(ctx, in, src, options...) + if err != nil { + return nil, err + } + return o, nil +} + +// MergeDirs merges the contents of all the directories passed +// in into the first one and rmdirs the other directories. +func (f *Fs) MergeDirs(ctx context.Context, dirs []fs.Directory) error { + if len(dirs) == 0 { + return nil + } + do := f.f.Features().MergeDirs + if do == nil { + return errors.New("MergeDirs not supported") + } + return do(ctx, dirs) +} + +// CleanUp the trash in the Fs +// +// Implement this if you have a way of emptying the trash or +// otherwise cleaning up old versions of files. +func (f *Fs) CleanUp(ctx context.Context) error { + do := f.f.Features().CleanUp + if do == nil { + return errors.New("not supported by underlying remote") + } + return do(ctx) +} + +// OpenWriterAt opens with a handle for random access writes +// +// Pass in the remote desired and the size if known. +// +// It truncates any existing object +func (f *Fs) OpenWriterAt(ctx context.Context, remote string, size int64) (fs.WriterAtCloser, error) { + do := f.f.Features().OpenWriterAt + if do == nil { + return nil, fs.ErrorNotImplemented + } + return do(ctx, remote, size) +} + +// UnWrap returns the Fs that this Fs is wrapping +func (f *Fs) UnWrap() fs.Fs { + return f.f +} + +// WrapFs returns the Fs that is wrapping this Fs +func (f *Fs) WrapFs() fs.Fs { + return f.wrapper +} + +// SetWrapper sets the Fs that is wrapping this Fs +func (f *Fs) SetWrapper(wrapper fs.Fs) { + f.wrapper = wrapper +} + +// OpenChunkWriter returns the chunk size and a ChunkWriter +// +// Pass in the remote and the src object +// You can also use options to hint at the desired chunk size +func (f *Fs) OpenChunkWriter(ctx context.Context, remote string, src fs.ObjectInfo, options ...fs.OpenOption) (info fs.ChunkWriterInfo, writer fs.ChunkWriter, err error) { + do := f.f.Features().OpenChunkWriter + if do == nil { + return info, nil, fs.ErrorNotImplemented + } + return do(ctx, remote, src, options...) +} + +// UserInfo returns info about the connected user +func (f *Fs) UserInfo(ctx context.Context) (map[string]string, error) { + do := f.f.Features().UserInfo + if do == nil { + return nil, fs.ErrorNotImplemented + } + return do(ctx) +} + +// Disconnect the current user +func (f *Fs) Disconnect(ctx context.Context) error { + do := f.f.Features().Disconnect + if do == nil { + return fs.ErrorNotImplemented + } + return do(ctx) +} + +// Check the interfaces are satisfied +var ( + _ fs.Fs = (*Fs)(nil) + _ fs.Purger = (*Fs)(nil) + _ fs.PutStreamer = (*Fs)(nil) + _ fs.Copier = (*Fs)(nil) + _ fs.Mover = (*Fs)(nil) + _ fs.DirMover = (*Fs)(nil) + _ fs.DirCacheFlusher = (*Fs)(nil) + _ fs.ChangeNotifier = (*Fs)(nil) + _ fs.Abouter = (*Fs)(nil) + _ fs.Shutdowner = (*Fs)(nil) + _ fs.PublicLinker = (*Fs)(nil) + _ fs.PutUncheckeder = (*Fs)(nil) + _ fs.MergeDirser = (*Fs)(nil) + _ fs.CleanUpper = (*Fs)(nil) + _ fs.OpenWriterAter = (*Fs)(nil) + _ fs.OpenChunkWriter = (*Fs)(nil) + _ fs.UserInfoer = (*Fs)(nil) + _ fs.Disconnecter = (*Fs)(nil) + // FIXME _ fs.FullObject = (*Object)(nil) +) diff --git a/backend/archive/archive_internal_test.go b/backend/archive/archive_internal_test.go new file mode 100644 index 000000000..ccbd04ff2 --- /dev/null +++ b/backend/archive/archive_internal_test.go @@ -0,0 +1,221 @@ +//go:build !plan9 + +package archive + +import ( + "bytes" + "context" + "fmt" + "os" + "os/exec" + "path" + "path/filepath" + "strconv" + "strings" + "testing" + + _ "github.com/rclone/rclone/backend/local" + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fs/cache" + "github.com/rclone/rclone/fs/filter" + "github.com/rclone/rclone/fs/operations" + "github.com/rclone/rclone/fstest" + "github.com/rclone/rclone/fstest/fstests" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// FIXME need to test Open with seek + +// run - run a shell command +func run(t *testing.T, args ...string) { + cmd := exec.Command(args[0], args[1:]...) + fs.Debugf(nil, "run args = %v", args) + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf(` +---------------------------- +Failed to run %v: %v +Command output was: +%s +---------------------------- +`, args, err, out) + } +} + +// check the dst and src are identical +func checkTree(ctx context.Context, name string, t *testing.T, dstArchive, src string, expectedCount int) { + t.Run(name, func(t *testing.T) { + fs.Debugf(nil, "check %q vs %q", dstArchive, src) + Farchive, err := cache.Get(ctx, dstArchive) + if err != fs.ErrorIsFile { + require.NoError(t, err) + } + Fsrc, err := cache.Get(ctx, src) + if err != fs.ErrorIsFile { + require.NoError(t, err) + } + + var matches bytes.Buffer + opt := operations.CheckOpt{ + Fdst: Farchive, + Fsrc: Fsrc, + Match: &matches, + } + + for _, action := range []string{"Check", "Download"} { + t.Run(action, func(t *testing.T) { + matches.Reset() + if action == "Download" { + assert.NoError(t, operations.CheckDownload(ctx, &opt)) + } else { + assert.NoError(t, operations.Check(ctx, &opt)) + } + if expectedCount > 0 { + assert.Equal(t, expectedCount, strings.Count(matches.String(), "\n")) + } + }) + } + + t.Run("NewObject", func(t *testing.T) { + // Check we can run NewObject on all files and read them + assert.NoError(t, operations.ListFn(ctx, Fsrc, func(srcObj fs.Object) { + if t.Failed() { + return + } + remote := srcObj.Remote() + archiveObj, err := Farchive.NewObject(ctx, remote) + require.NoError(t, err, remote) + assert.Equal(t, remote, archiveObj.Remote(), remote) + + // Test that the contents are the same + archiveBuf := fstests.ReadObject(ctx, t, archiveObj, -1) + srcBuf := fstests.ReadObject(ctx, t, srcObj, -1) + assert.Equal(t, srcBuf, archiveBuf) + + if len(srcBuf) < 81 { + return + } + + // Tests that Open works with SeekOption + assert.Equal(t, srcBuf[50:], fstests.ReadObject(ctx, t, archiveObj, -1, &fs.SeekOption{Offset: 50}), "contents differ after seek") + + // Tests that Open works with RangeOption + for _, test := range []struct { + ro fs.RangeOption + wantStart, wantEnd int + }{ + {fs.RangeOption{Start: 5, End: 15}, 5, 16}, + {fs.RangeOption{Start: 80, End: -1}, 80, len(srcBuf)}, + {fs.RangeOption{Start: 81, End: 100000}, 81, len(srcBuf)}, + {fs.RangeOption{Start: -1, End: 20}, len(srcBuf) - 20, len(srcBuf)}, // if start is omitted this means get the final bytes + // {fs.RangeOption{Start: -1, End: -1}, 0, len(srcBuf)}, - this seems to work but the RFC doesn't define it + } { + got := fstests.ReadObject(ctx, t, archiveObj, -1, &test.ro) + foundAt := strings.Index(srcBuf, got) + help := fmt.Sprintf("%#v failed want [%d:%d] got [%d:%d]", test.ro, test.wantStart, test.wantEnd, foundAt, foundAt+len(got)) + assert.Equal(t, srcBuf[test.wantStart:test.wantEnd], got, help) + } + + // Test that the modtimes are correct + fstest.AssertTimeEqualWithPrecision(t, remote, srcObj.ModTime(ctx), archiveObj.ModTime(ctx), Farchive.Precision()) + + // Test that the sizes are correct + assert.Equal(t, srcObj.Size(), archiveObj.Size()) + + // Test that Strings are OK + assert.Equal(t, srcObj.String(), archiveObj.String()) + })) + }) + + // t.Logf("Fdst ------------- %v", Fdst) + // operations.List(ctx, Fdst, os.Stdout) + // t.Logf("Fsrc ------------- %v", Fsrc) + // operations.List(ctx, Fsrc, os.Stdout) + }) + +} + +// test creating and reading back some archives +// +// Note that this uses rclone and zip as external binaries. +func testArchive(t *testing.T, archiveName string, archiveFn func(t *testing.T, output, input string)) { + ctx := context.Background() + checkFiles := 1000 + + // create random test input files + inputRoot := t.TempDir() + input := filepath.Join(inputRoot, archiveName) + require.NoError(t, os.Mkdir(input, 0777)) + run(t, "rclone", "test", "makefiles", "--files", strconv.Itoa(checkFiles), "--ascii", input) + + // Create the archive + output := t.TempDir() + zipFile := path.Join(output, archiveName) + archiveFn(t, zipFile, input) + + // Check the archive itself + checkTree(ctx, "Archive", t, ":archive:"+zipFile, input, checkFiles) + + // Now check a subdirectory + fis, err := os.ReadDir(input) + require.NoError(t, err) + subDir := "NOT FOUND" + aFile := "NOT FOUND" + for _, fi := range fis { + if fi.IsDir() { + subDir = fi.Name() + } else { + aFile = fi.Name() + } + } + checkTree(ctx, "SubDir", t, ":archive:"+zipFile+"/"+subDir, filepath.Join(input, subDir), 0) + + // Now check a single file + fiCtx, fi := filter.AddConfig(ctx) + require.NoError(t, fi.AddRule("+ "+aFile)) + require.NoError(t, fi.AddRule("- *")) + checkTree(fiCtx, "SingleFile", t, ":archive:"+zipFile+"/"+aFile, filepath.Join(input, aFile), 0) + + // Now check the level above + checkTree(ctx, "Root", t, ":archive:"+output, inputRoot, checkFiles) + // run(t, "cp", "-a", inputRoot, output, "/tmp/test-"+archiveName) +} + +// Make sure we have the executable named +func skipIfNoExe(t *testing.T, exeName string) { + _, err := exec.LookPath(exeName) + if err != nil { + t.Skipf("%s executable not installed", exeName) + } +} + +// Test creating and reading back some archives +// +// Note that this uses rclone and zip as external binaries. +func TestArchiveZip(t *testing.T) { + fstest.Initialise() + skipIfNoExe(t, "zip") + skipIfNoExe(t, "rclone") + testArchive(t, "test.zip", func(t *testing.T, output, input string) { + oldcwd, err := os.Getwd() + require.NoError(t, err) + require.NoError(t, os.Chdir(input)) + defer func() { + require.NoError(t, os.Chdir(oldcwd)) + }() + run(t, "zip", "-9r", output, ".") + }) +} + +// Test creating and reading back some archives +// +// Note that this uses rclone and squashfs as external binaries. +func TestArchiveSquashfs(t *testing.T) { + fstest.Initialise() + skipIfNoExe(t, "mksquashfs") + skipIfNoExe(t, "rclone") + testArchive(t, "test.sqfs", func(t *testing.T, output, input string) { + run(t, "mksquashfs", input, output) + }) +} diff --git a/backend/archive/archive_test.go b/backend/archive/archive_test.go new file mode 100644 index 000000000..5917810d4 --- /dev/null +++ b/backend/archive/archive_test.go @@ -0,0 +1,67 @@ +//go:build !plan9 + +// Test Archive filesystem interface +package archive_test + +import ( + "testing" + + _ "github.com/rclone/rclone/backend/local" + _ "github.com/rclone/rclone/backend/memory" + "github.com/rclone/rclone/fstest" + "github.com/rclone/rclone/fstest/fstests" +) + +var ( + unimplementableFsMethods = []string{"ListR", "ListP", "MkdirMetadata", "DirSetModTime"} + // In these tests we receive objects from the underlying remote which don't implement these methods + unimplementableObjectMethods = []string{"GetTier", "ID", "Metadata", "MimeType", "SetTier", "UnWrap", "SetMetadata"} +) + +// TestIntegration runs integration tests against the remote +func TestIntegration(t *testing.T) { + if *fstest.RemoteName == "" { + t.Skip("Skipping as -remote not set") + } + fstests.Run(t, &fstests.Opt{ + RemoteName: *fstest.RemoteName, + UnimplementableFsMethods: unimplementableFsMethods, + UnimplementableObjectMethods: unimplementableObjectMethods, + }) +} + +func TestLocal(t *testing.T) { + if *fstest.RemoteName != "" { + t.Skip("Skipping as -remote set") + } + remote := t.TempDir() + name := "TestArchiveLocal" + fstests.Run(t, &fstests.Opt{ + RemoteName: name + ":", + ExtraConfig: []fstests.ExtraConfigItem{ + {Name: name, Key: "type", Value: "archive"}, + {Name: name, Key: "remote", Value: remote}, + }, + QuickTestOK: true, + UnimplementableFsMethods: unimplementableFsMethods, + UnimplementableObjectMethods: unimplementableObjectMethods, + }) +} + +func TestMemory(t *testing.T) { + if *fstest.RemoteName != "" { + t.Skip("Skipping as -remote set") + } + remote := ":memory:" + name := "TestArchiveMemory" + fstests.Run(t, &fstests.Opt{ + RemoteName: name + ":", + ExtraConfig: []fstests.ExtraConfigItem{ + {Name: name, Key: "type", Value: "archive"}, + {Name: name, Key: "remote", Value: remote}, + }, + QuickTestOK: true, + UnimplementableFsMethods: unimplementableFsMethods, + UnimplementableObjectMethods: unimplementableObjectMethods, + }) +} diff --git a/backend/archive/archive_unsupported.go b/backend/archive/archive_unsupported.go new file mode 100644 index 000000000..c4b1f28c5 --- /dev/null +++ b/backend/archive/archive_unsupported.go @@ -0,0 +1,7 @@ +// Build for archive for unsupported platforms to stop go complaining +// about "no buildable Go source files " + +//go:build plan9 + +// Package archive implements a backend to access archive files in a remote +package archive diff --git a/backend/archive/archiver/archiver.go b/backend/archive/archiver/archiver.go new file mode 100644 index 000000000..a065960fb --- /dev/null +++ b/backend/archive/archiver/archiver.go @@ -0,0 +1,24 @@ +// Package archiver registers all the archivers +package archiver + +import ( + "context" + + "github.com/rclone/rclone/fs" +) + +// Archiver describes an archive package +type Archiver struct { + // New constructs an Fs from the (wrappedFs, remote) with the objects + // prefix with prefix and rooted at root + New func(ctx context.Context, f fs.Fs, remote, prefix, root string) (fs.Fs, error) + Extension string +} + +// Archivers is a slice of all registered archivers +var Archivers []Archiver + +// Register adds the archivers provided to the list of known archivers +func Register(as ...Archiver) { + Archivers = append(Archivers, as...) +} diff --git a/backend/archive/base/base.go b/backend/archive/base/base.go new file mode 100644 index 000000000..7f52a07b6 --- /dev/null +++ b/backend/archive/base/base.go @@ -0,0 +1,233 @@ +// Package base is a base archive Fs +package base + +import ( + "context" + "errors" + "fmt" + "io" + "path" + "time" + + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fs/hash" + "github.com/rclone/rclone/vfs" +) + +// Fs represents a wrapped fs.Fs +type Fs struct { + f fs.Fs + wrapper fs.Fs + name string + features *fs.Features // optional features + vfs *vfs.VFS + node vfs.Node // archive object + remote string // remote of the archive object + prefix string // position for objects + prefixSlash string // position for objects with a slash on + root string // position to read from within the archive +} + +var errNotImplemented = errors.New("internal error: method not implemented in archiver") + +// New constructs an Fs from the (wrappedFs, remote) with the objects +// prefix with prefix and rooted at root +func New(ctx context.Context, wrappedFs fs.Fs, remote, prefix, root string) (*Fs, error) { + // FIXME vfs cache? + // FIXME could factor out ReadFileHandle and just use that rather than the full VFS + fs.Debugf(nil, "New: remote=%q, prefix=%q, root=%q", remote, prefix, root) + VFS := vfs.New(wrappedFs, nil) + node, err := VFS.Stat(remote) + if err != nil { + return nil, fmt.Errorf("failed to find %q archive: %w", remote, err) + } + + f := &Fs{ + f: wrappedFs, + name: path.Join(fs.ConfigString(wrappedFs), remote), + vfs: VFS, + node: node, + remote: remote, + root: root, + prefix: prefix, + prefixSlash: prefix + "/", + } + + // FIXME + // the features here are ones we could support, and they are + // ANDed with the ones from wrappedFs + // + // FIXME some of these need to be forced on - CanHaveEmptyDirectories + f.features = (&fs.Features{ + CaseInsensitive: false, + DuplicateFiles: false, + ReadMimeType: false, // MimeTypes not supported with gzip + WriteMimeType: false, + BucketBased: false, + CanHaveEmptyDirectories: true, + }).Fill(ctx, f).Mask(ctx, wrappedFs).WrapsFs(f, wrappedFs) + + return f, nil +} + +// Name of the remote (as passed into NewFs) +func (f *Fs) Name() string { + return f.name +} + +// Root of the remote (as passed into NewFs) +func (f *Fs) Root() string { + return f.root +} + +// Features returns the optional features of this Fs +func (f *Fs) Features() *fs.Features { + return f.features +} + +// String returns a description of the FS +func (f *Fs) String() string { + return f.name +} + +// List the objects and directories in dir into entries. The +// entries can be returned in any order but should be for a +// complete directory. +// +// dir should be "" to list the root, and should not have +// trailing slashes. +// +// This should return ErrDirNotFound if the directory isn't +// found. +func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) { + return nil, errNotImplemented +} + +// NewObject finds the Object at remote. +func (f *Fs) NewObject(ctx context.Context, remote string) (o fs.Object, err error) { + return nil, errNotImplemented +} + +// Precision of the ModTimes in this Fs +func (f *Fs) Precision() time.Duration { + return time.Second +} + +// Mkdir makes the directory (container, bucket) +// +// Shouldn't return an error if it already exists +func (f *Fs) Mkdir(ctx context.Context, dir string) error { + return vfs.EROFS +} + +// Rmdir removes the directory (container, bucket) if empty +// +// Return an error if it doesn't exist or isn't empty +func (f *Fs) Rmdir(ctx context.Context, dir string) error { + return vfs.EROFS +} + +// Put in to the remote path with the modTime given of the given size +// +// May create the object even if it returns an error - if so +// will return the object and the error, otherwise will return +// nil and the error +func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (o fs.Object, err error) { + return nil, vfs.EROFS +} + +// Hashes returns the supported hash sets. +func (f *Fs) Hashes() hash.Set { + return hash.Set(hash.None) +} + +// UnWrap returns the Fs that this Fs is wrapping +func (f *Fs) UnWrap() fs.Fs { + return f.f +} + +// WrapFs returns the Fs that is wrapping this Fs +func (f *Fs) WrapFs() fs.Fs { + return f.wrapper +} + +// SetWrapper sets the Fs that is wrapping this Fs +func (f *Fs) SetWrapper(wrapper fs.Fs) { + f.wrapper = wrapper +} + +// Object describes an object to be read from the raw zip file +type Object struct { + f *Fs + remote string +} + +// Fs returns read only access to the Fs that this object is part of +func (o *Object) Fs() fs.Info { + return o.f +} + +// Return a string version +func (o *Object) String() string { + if o == nil { + return "" + } + return o.Remote() +} + +// Remote returns the remote path +func (o *Object) Remote() string { + return o.remote +} + +// Size returns the size of the file +func (o *Object) Size() int64 { + return -1 +} + +// ModTime returns the modification time of the object +// +// It attempts to read the objects mtime and if that isn't present the +// LastModified returned in the http headers +func (o *Object) ModTime(ctx context.Context) time.Time { + return time.Now() +} + +// SetModTime sets the modification time of the local fs object +func (o *Object) SetModTime(ctx context.Context, modTime time.Time) error { + return vfs.EROFS +} + +// Storable raturns a boolean indicating if this object is storable +func (o *Object) Storable() bool { + return true +} + +// Hash returns the selected checksum of the file +// If no checksum is available it returns "" +func (o *Object) Hash(ctx context.Context, ht hash.Type) (string, error) { + return "", hash.ErrUnsupported +} + +// Open opens the file for read. Call Close() on the returned io.ReadCloser +func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (rc io.ReadCloser, err error) { + return nil, errNotImplemented +} + +// Update in to the object with the modTime given of the given size +func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error { + return vfs.EROFS +} + +// Remove an object +func (o *Object) Remove(ctx context.Context) error { + return vfs.EROFS +} + +// Check the interfaces are satisfied +var ( + _ fs.Fs = (*Fs)(nil) + _ fs.UnWrapper = (*Fs)(nil) + _ fs.Wrapper = (*Fs)(nil) + _ fs.Object = (*Object)(nil) +) diff --git a/backend/archive/squashfs/cache.go b/backend/archive/squashfs/cache.go new file mode 100644 index 000000000..b67b21879 --- /dev/null +++ b/backend/archive/squashfs/cache.go @@ -0,0 +1,165 @@ +package squashfs + +// Could just be using bare object Open with RangeRequest which +// would transfer the minimum amount of data but may be slower. + +import ( + "errors" + "fmt" + "io/fs" + "os" + "sync" + + "github.com/diskfs/go-diskfs/backend" + "github.com/rclone/rclone/vfs" +) + +// Cache file handles for accessing the file +type cache struct { + node vfs.Node + fhsMu sync.Mutex + fhs []cacheHandle +} + +// A cached file handle +type cacheHandle struct { + offset int64 + fh vfs.Handle +} + +// Make a new cache +func newCache(node vfs.Node) *cache { + return &cache{ + node: node, + } +} + +// Get a vfs.Handle from the pool or open one +// +// This tries to find an open file handle which doesn't require seeking. +func (c *cache) open(off int64) (fh vfs.Handle, err error) { + c.fhsMu.Lock() + defer c.fhsMu.Unlock() + + if len(c.fhs) > 0 { + // Look for exact match first + for i, cfh := range c.fhs { + if cfh.offset == off { + // fs.Debugf(nil, "CACHE MATCH") + c.fhs = append(c.fhs[:i], c.fhs[i+1:]...) + return cfh.fh, nil + + } + } + // fs.Debugf(nil, "CACHE MISS") + // Just take the first one if not found + cfh := c.fhs[0] + c.fhs = c.fhs[1:] + return cfh.fh, nil + } + + fh, err = c.node.Open(os.O_RDONLY) + if err != nil { + return nil, fmt.Errorf("failed to open squashfs archive: %w", err) + } + + return fh, nil +} + +// Close a vfs.Handle or return it to the pool +// +// off should be the offset the file handle would read from without seeking +func (c *cache) close(fh vfs.Handle, off int64) { + c.fhsMu.Lock() + defer c.fhsMu.Unlock() + + c.fhs = append(c.fhs, cacheHandle{ + offset: off, + fh: fh, + }) +} + +// ReadAt reads len(p) bytes into p starting at offset off in the underlying +// input source. It returns the number of bytes read (0 <= n <= len(p)) and any +// error encountered. +// +// When ReadAt returns n < len(p), it returns a non-nil error explaining why +// more bytes were not returned. In this respect, ReadAt is stricter than Read. +// +// Even if ReadAt returns n < len(p), it may use all of p as scratch +// space during the call. If some data is available but not len(p) bytes, +// ReadAt blocks until either all the data is available or an error occurs. +// In this respect ReadAt is different from Read. +// +// If the n = len(p) bytes returned by ReadAt are at the end of the input +// source, ReadAt may return either err == EOF or err == nil. +// +// If ReadAt is reading from an input source with a seek offset, ReadAt should +// not affect nor be affected by the underlying seek offset. +// +// Clients of ReadAt can execute parallel ReadAt calls on the same input +// source. +// +// Implementations must not retain p. +func (c *cache) ReadAt(p []byte, off int64) (n int, err error) { + fh, err := c.open(off) + if err != nil { + return n, err + } + defer func() { + c.close(fh, off+int64(len(p))) + }() + // fs.Debugf(nil, "ReadAt(p[%d], off=%d, fh=%p)", len(p), off, fh) + return fh.ReadAt(p, off) +} + +var errCacheNotImplemented = errors.New("internal error: squashfs cache doesn't implement method") + +// WriteAt method dummy stub to satisfy interface +func (c *cache) WriteAt(p []byte, off int64) (n int, err error) { + return 0, errCacheNotImplemented +} + +// Seek method dummy stub to satisfy interface +func (c *cache) Seek(offset int64, whence int) (int64, error) { + return 0, errCacheNotImplemented +} + +// Read method dummy stub to satisfy interface +func (c *cache) Read(p []byte) (n int, err error) { + return 0, errCacheNotImplemented +} + +func (c *cache) Stat() (fs.FileInfo, error) { + return nil, errCacheNotImplemented +} + +// Close the file +func (c *cache) Close() (err error) { + c.fhsMu.Lock() + defer c.fhsMu.Unlock() + + // Close any open file handles + for i := range c.fhs { + fh := &c.fhs[i] + newErr := fh.fh.Close() + if err == nil { + err = newErr + } + } + c.fhs = nil + return err +} + +// Sys returns OS-specific file for ioctl calls via fd +func (c *cache) Sys() (*os.File, error) { + return nil, errCacheNotImplemented +} + +// Writable returns file for read-write operations +func (c *cache) Writable() (backend.WritableFile, error) { + return nil, errCacheNotImplemented +} + +// check interfaces +var _ backend.Storage = (*cache)(nil) diff --git a/backend/archive/squashfs/squashfs.go b/backend/archive/squashfs/squashfs.go new file mode 100644 index 000000000..2610db276 --- /dev/null +++ b/backend/archive/squashfs/squashfs.go @@ -0,0 +1,446 @@ +// Package squashfs implements a squashfs archiver for the archive backend +package squashfs + +import ( + "context" + "fmt" + "io" + "path" + "strings" + "time" + + "github.com/diskfs/go-diskfs/filesystem/squashfs" + "github.com/rclone/rclone/backend/archive/archiver" + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fs/hash" + "github.com/rclone/rclone/fs/log" + "github.com/rclone/rclone/lib/readers" + "github.com/rclone/rclone/vfs" + "github.com/rclone/rclone/vfs/vfscommon" +) + +func init() { + archiver.Register(archiver.Archiver{ + New: New, + Extension: ".sqfs", + }) +} + +// Fs represents a wrapped fs.Fs +type Fs struct { + f fs.Fs + wrapper fs.Fs + name string + features *fs.Features // optional features + vfs *vfs.VFS + sqfs *squashfs.FileSystem // interface to the squashfs + c *cache + node vfs.Node // squashfs file object - set if reading + remote string // remote of the squashfs file object + prefix string // position for objects + prefixSlash string // position for objects with a slash on + root string // position to read from within the archive +} + +// New constructs an Fs from the (wrappedFs, remote) with the objects +// prefix with prefix and rooted at root +func New(ctx context.Context, wrappedFs fs.Fs, remote, prefix, root string) (fs.Fs, error) { + // FIXME vfs cache? + // FIXME could factor out ReadFileHandle and just use that rather than the full VFS + fs.Debugf(nil, "Squashfs: New: remote=%q, prefix=%q, root=%q", remote, prefix, root) + vfsOpt := vfscommon.Opt + vfsOpt.ReadWait = 0 + VFS := vfs.New(wrappedFs, &vfsOpt) + node, err := VFS.Stat(remote) + if err != nil { + return nil, fmt.Errorf("failed to find %q archive: %w", remote, err) + } + + c := newCache(node) + + // FIXME blocksize + sqfs, err := squashfs.Read(c, node.Size(), 0, 1024*1024) + if err != nil { + return nil, fmt.Errorf("failed to read squashfs: %w", err) + } + + f := &Fs{ + f: wrappedFs, + name: path.Join(fs.ConfigString(wrappedFs), remote), + vfs: VFS, + node: node, + sqfs: sqfs, + c: c, + remote: remote, + root: strings.Trim(root, "/"), + prefix: prefix, + prefixSlash: prefix + "/", + } + if prefix == "" { + f.prefixSlash = "" + } + + singleObject := false + + // Find the directory the root points to + if f.root != "" && !strings.HasSuffix(root, "/") { + native, err := f.toNative("") + if err == nil { + native = strings.TrimRight(native, "/") + _, err := f.newObjectNative(native) + if err == nil { + // If it pointed to a file, find the directory above + f.root = path.Dir(f.root) + if f.root == "." || f.root == "/" { + f.root = "" + } + } + } + } + + // FIXME + // the features here are ones we could support, and they are + // ANDed with the ones from wrappedFs + // + // FIXME some of these need to be forced on - CanHaveEmptyDirectories + f.features = (&fs.Features{ + CaseInsensitive: false, + DuplicateFiles: false, + ReadMimeType: false, // MimeTypes not supported with gsquashfs + WriteMimeType: false, + BucketBased: false, + CanHaveEmptyDirectories: true, + }).Fill(ctx, f).Mask(ctx, wrappedFs).WrapsFs(f, wrappedFs) + + if singleObject { + return f, fs.ErrorIsFile + } + return f, nil +} + +// Name of the remote (as passed into NewFs) +func (f *Fs) Name() string { + return f.name +} + +// Root of the remote (as passed into NewFs) +func (f *Fs) Root() string { + return f.root +} + +// Features returns the optional features of this Fs +func (f *Fs) Features() *fs.Features { + return f.features +} + +// String returns a description of the FS +func (f *Fs) String() string { + return fmt.Sprintf("Squashfs %q", f.name) +} + +// This turns a remote into a native path in the squashfs starting with a / +func (f *Fs) toNative(remote string) (string, error) { + native := strings.Trim(remote, "/") + if f.prefix == "" { + native = "/" + native + } else if native == f.prefix { + native = "/" + } else if !strings.HasPrefix(native, f.prefixSlash) { + return "", fmt.Errorf("internal error: %q doesn't start with prefix %q", native, f.prefixSlash) + } else { + native = native[len(f.prefix):] + } + if f.root != "" { + native = "/" + f.root + native + } + return native, nil +} + +// Turn a (nativeDir, leaf) into a remote +func (f *Fs) fromNative(nativeDir string, leaf string) string { + // fs.Debugf(nil, "nativeDir = %q, leaf = %q, root=%q", nativeDir, leaf, f.root) + dir := nativeDir + if f.root != "" { + dir = strings.TrimPrefix(dir, "/"+f.root) + } + remote := f.prefixSlash + strings.Trim(path.Join(dir, leaf), "/") + // fs.Debugf(nil, "dir = %q, remote=%q", dir, remote) + return remote +} + +// Convert a FileInfo into an Object from native dir +func (f *Fs) objectFromFileInfo(nativeDir string, item squashfs.FileStat) *Object { + return &Object{ + fs: f, + remote: f.fromNative(nativeDir, item.Name()), + size: item.Size(), + modTime: item.ModTime(), + item: item, + } +} + +// List the objects and directories in dir into entries. The +// entries can be returned in any order but should be for a +// complete directory. +// +// dir should be "" to list the root, and should not have +// trailing slashes. +// +// This should return ErrDirNotFound if the directory isn't +// found. +func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) { + defer log.Trace(f, "dir=%q", dir)("entries=%v, err=%v", &entries, &err) + + nativeDir, err := f.toNative(dir) + if err != nil { + return nil, err + } + + items, err := f.sqfs.ReadDir(nativeDir) + if err != nil { + return nil, fmt.Errorf("read squashfs: couldn't read directory: %w", err) + } + + entries = make(fs.DirEntries, 0, len(items)) + for _, fi := range items { + item, ok := fi.(squashfs.FileStat) + if !ok { + return nil, fmt.Errorf("internal error: unexpected type for %q: %T", fi.Name(), fi) + } + // fs.Debugf(item.Name(), "entry = %#v", item) + var entry fs.DirEntry + if err != nil { + return nil, fmt.Errorf("error reading item %q: %q", item.Name(), err) + } + if item.IsDir() { + var remote = f.fromNative(nativeDir, item.Name()) + entry = fs.NewDir(remote, item.ModTime()) + } else { + if item.Mode().IsRegular() { + entry = f.objectFromFileInfo(nativeDir, item) + } else { + fs.Debugf(item.Name(), "FIXME Not regular file - skipping") + continue + } + } + entries = append(entries, entry) + } + + // fs.Debugf(f, "dir=%q, entries=%v", dir, entries) + return entries, nil +} + +// newObjectNative finds the object at the native path passed in +func (f *Fs) newObjectNative(nativePath string) (o fs.Object, err error) { + // get the path and filename + dir, leaf := path.Split(nativePath) + dir = strings.TrimRight(dir, "/") + leaf = strings.Trim(leaf, "/") + + // FIXME need to detect directory not found + fis, err := f.sqfs.ReadDir(dir) + if err != nil { + + return nil, fs.ErrorObjectNotFound + } + + for _, fi := range fis { + if fi.Name() == leaf { + if fi.IsDir() { + return nil, fs.ErrorNotAFile + } + item, ok := fi.(squashfs.FileStat) + if !ok { + return nil, fmt.Errorf("internal error: unexpected type for %q: %T", fi.Name(), fi) + } + o = f.objectFromFileInfo(dir, item) + break + } + } + if o == nil { + return nil, fs.ErrorObjectNotFound + } + return o, nil +} + +// NewObject finds the Object at remote. +func (f *Fs) NewObject(ctx context.Context, remote string) (o fs.Object, err error) { + defer log.Trace(f, "remote=%q", remote)("obj=%v, err=%v", &o, &err) + + nativePath, err := f.toNative(remote) + if err != nil { + return nil, err + } + return f.newObjectNative(nativePath) +} + +// Precision of the ModTimes in this Fs +func (f *Fs) Precision() time.Duration { + return time.Second +} + +// Mkdir makes the directory (container, bucket) +// +// Shouldn't return an error if it already exists +func (f *Fs) Mkdir(ctx context.Context, dir string) error { + return vfs.EROFS +} + +// Rmdir removes the directory (container, bucket) if empty +// +// Return an error if it doesn't exist or isn't empty +func (f *Fs) Rmdir(ctx context.Context, dir string) error { + return vfs.EROFS +} + +// Put in to the remote path with the modTime given of the given size +// +// May create the object even if it returns an error - if so +// will return the object and the error, otherwise will return +// nil and the error +func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (o fs.Object, err error) { + return nil, vfs.EROFS +} + +// Hashes returns the supported hash sets. +func (f *Fs) Hashes() hash.Set { + return hash.Set(hash.None) +} + +// UnWrap returns the Fs that this Fs is wrapping +func (f *Fs) UnWrap() fs.Fs { + return f.f +} + +// WrapFs returns the Fs that is wrapping this Fs +func (f *Fs) WrapFs() fs.Fs { + return f.wrapper +} + +// SetWrapper sets the Fs that is wrapping this Fs +func (f *Fs) SetWrapper(wrapper fs.Fs) { + f.wrapper = wrapper +} + +// Object describes an object to be read from the raw squashfs file +type Object struct { + fs *Fs + remote string + size int64 + modTime time.Time + item squashfs.FileStat +} + +// Fs returns read only access to the Fs that this object is part of +func (o *Object) Fs() fs.Info { + return o.fs +} + +// Return a string version +func (o *Object) String() string { + if o == nil { + return "" + } + return o.Remote() +} + +// Turn a squashfs path into a full path for the parent Fs +// func (o *Object) path(remote string) string { +// return path.Join(o.fs.prefix, remote) +// } + +// Remote returns the remote path +func (o *Object) Remote() string { + return o.remote +} + +// Size returns the size of the file +func (o *Object) Size() int64 { + return o.size +} + +// ModTime returns the modification time of the object +// +// It attempts to read the objects mtime and if that isn't present the +// LastModified returned in the http headers +func (o *Object) ModTime(ctx context.Context) time.Time { + return o.modTime +} + +// SetModTime sets the modification time of the local fs object +func (o *Object) SetModTime(ctx context.Context, modTime time.Time) error { + return vfs.EROFS +} + +// Storable raturns a boolean indicating if this object is storable +func (o *Object) Storable() bool { + return true +} + +// Hash returns the selected checksum of the file +// If no checksum is available it returns "" +func (o *Object) Hash(ctx context.Context, ht hash.Type) (string, error) { + return "", hash.ErrUnsupported +} + +// Open opens the file for read. Call Close() on the returned io.ReadCloser +func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (rc io.ReadCloser, err error) { + var offset, limit int64 = 0, -1 + for _, option := range options { + switch x := option.(type) { + case *fs.SeekOption: + offset = x.Offset + case *fs.RangeOption: + offset, limit = x.Decode(o.Size()) + default: + if option.Mandatory() { + fs.Logf(o, "Unsupported mandatory option: %v", option) + } + } + } + + remote, err := o.fs.toNative(o.remote) + if err != nil { + return nil, err + } + + fs.Debugf(o, "Opening %q", remote) + //fh, err := o.fs.sqfs.OpenFile(remote, os.O_RDONLY) + fh, err := o.item.Open() + if err != nil { + return nil, err + } + + // discard data from start as necessary + if offset > 0 { + _, err = fh.Seek(offset, io.SeekStart) + if err != nil { + return nil, err + } + } + // If limited then don't return everything + if limit >= 0 { + fs.Debugf(nil, "limit=%d, offset=%d, options=%v", limit, offset, options) + return readers.NewLimitedReadCloser(fh, limit), nil + } + + return fh, nil +} + +// Update in to the object with the modTime given of the given size +func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error { + return vfs.EROFS +} + +// Remove an object +func (o *Object) Remove(ctx context.Context) error { + return vfs.EROFS +} + +// Check the interfaces are satisfied +var ( + _ fs.Fs = (*Fs)(nil) + _ fs.UnWrapper = (*Fs)(nil) + _ fs.Wrapper = (*Fs)(nil) + _ fs.Object = (*Object)(nil) +) diff --git a/backend/archive/zip/zip.go b/backend/archive/zip/zip.go new file mode 100644 index 000000000..509b4317e --- /dev/null +++ b/backend/archive/zip/zip.go @@ -0,0 +1,385 @@ +// Package zip implements a zip archiver for the archive backend +package zip + +import ( + "archive/zip" + "context" + "errors" + "fmt" + "io" + "os" + "path" + "strings" + "time" + + "github.com/rclone/rclone/backend/archive/archiver" + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fs/dirtree" + "github.com/rclone/rclone/fs/hash" + "github.com/rclone/rclone/fs/log" + "github.com/rclone/rclone/lib/readers" + "github.com/rclone/rclone/vfs" + "github.com/rclone/rclone/vfs/vfscommon" +) + +func init() { + archiver.Register(archiver.Archiver{ + New: New, + Extension: ".zip", + }) +} + +// Fs represents a wrapped fs.Fs +type Fs struct { + f fs.Fs + wrapper fs.Fs + name string + features *fs.Features // optional features + vfs *vfs.VFS + node vfs.Node // zip file object - set if reading + remote string // remote of the zip file object + prefix string // position for objects + prefixSlash string // position for objects with a slash on + root string // position to read from within the archive + dt dirtree.DirTree // read from zipfile +} + +// New constructs an Fs from the (wrappedFs, remote) with the objects +// prefix with prefix and rooted at root +func New(ctx context.Context, wrappedFs fs.Fs, remote, prefix, root string) (fs.Fs, error) { + // FIXME vfs cache? + // FIXME could factor out ReadFileHandle and just use that rather than the full VFS + fs.Debugf(nil, "Zip: New: remote=%q, prefix=%q, root=%q", remote, prefix, root) + vfsOpt := vfscommon.Opt + vfsOpt.ReadWait = 0 + VFS := vfs.New(wrappedFs, &vfsOpt) + node, err := VFS.Stat(remote) + if err != nil { + return nil, fmt.Errorf("failed to find %q archive: %w", remote, err) + } + + f := &Fs{ + f: wrappedFs, + name: path.Join(fs.ConfigString(wrappedFs), remote), + vfs: VFS, + node: node, + remote: remote, + root: root, + prefix: prefix, + prefixSlash: prefix + "/", + } + + // Read the contents of the zip file + singleObject, err := f.readZip() + if err != nil { + return nil, fmt.Errorf("failed to open zip file: %w", err) + } + + // FIXME + // the features here are ones we could support, and they are + // ANDed with the ones from wrappedFs + // + // FIXME some of these need to be forced on - CanHaveEmptyDirectories + f.features = (&fs.Features{ + CaseInsensitive: false, + DuplicateFiles: false, + ReadMimeType: false, // MimeTypes not supported with gzip + WriteMimeType: false, + BucketBased: false, + CanHaveEmptyDirectories: true, + }).Fill(ctx, f).Mask(ctx, wrappedFs).WrapsFs(f, wrappedFs) + + if singleObject { + return f, fs.ErrorIsFile + } + return f, nil +} + +// Name of the remote (as passed into NewFs) +func (f *Fs) Name() string { + return f.name +} + +// Root of the remote (as passed into NewFs) +func (f *Fs) Root() string { + return f.root +} + +// Features returns the optional features of this Fs +func (f *Fs) Features() *fs.Features { + return f.features +} + +// String returns a description of the FS +func (f *Fs) String() string { + return fmt.Sprintf("Zip %q", f.name) +} + +// readZip the zip file into f +// +// Returns singleObject=true if f.root points to a file +func (f *Fs) readZip() (singleObject bool, err error) { + if f.node == nil { + return singleObject, fs.ErrorDirNotFound + } + size := f.node.Size() + if size < 0 { + return singleObject, errors.New("can't read from zip file with unknown size") + } + r, err := f.node.Open(os.O_RDONLY) + if err != nil { + return singleObject, fmt.Errorf("failed to open zip file: %w", err) + } + zr, err := zip.NewReader(r, size) + if err != nil { + return singleObject, fmt.Errorf("failed to read zip file: %w", err) + } + dt := dirtree.New() + for _, file := range zr.File { + remote := strings.Trim(path.Clean(file.Name), "/") + if remote == "." { + remote = "" + } + remote = path.Join(f.prefix, remote) + if f.root != "" { + // Ignore all files outside the root + if !strings.HasPrefix(remote, f.root) { + continue + } + if remote == f.root { + remote = "" + } else { + remote = strings.TrimPrefix(remote, f.root+"/") + } + } + if strings.HasSuffix(file.Name, "/") { + dir := fs.NewDir(remote, file.Modified) + dt.AddDir(dir) + } else { + if remote == "" { + remote = path.Base(f.root) + singleObject = true + dt = dirtree.New() + } + o := &Object{ + f: f, + remote: remote, + fh: &file.FileHeader, + file: file, + } + dt.Add(o) + if singleObject { + break + } + } + } + dt.CheckParents("") + dt.Sort() + f.dt = dt + //fs.Debugf(nil, "dt = %v", dt) + return singleObject, nil +} + +// List the objects and directories in dir into entries. The +// entries can be returned in any order but should be for a +// complete directory. +// +// dir should be "" to list the root, and should not have +// trailing slashes. +// +// This should return ErrDirNotFound if the directory isn't +// found. +func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) { + defer log.Trace(f, "dir=%q", dir)("entries=%v, err=%v", &entries, &err) + // _, err = f.strip(dir) + // if err != nil { + // return nil, err + // } + entries, ok := f.dt[dir] + if !ok { + return nil, fs.ErrorDirNotFound + } + fs.Debugf(f, "dir=%q, entries=%v", dir, entries) + return entries, nil +} + +// NewObject finds the Object at remote. +func (f *Fs) NewObject(ctx context.Context, remote string) (o fs.Object, err error) { + defer log.Trace(f, "remote=%q", remote)("obj=%v, err=%v", &o, &err) + if f.dt == nil { + return nil, fs.ErrorObjectNotFound + } + _, entry := f.dt.Find(remote) + if entry == nil { + return nil, fs.ErrorObjectNotFound + } + o, ok := entry.(*Object) + if !ok { + return nil, fs.ErrorNotAFile + } + return o, nil +} + +// Precision of the ModTimes in this Fs +func (f *Fs) Precision() time.Duration { + return time.Second +} + +// Mkdir makes the directory (container, bucket) +// +// Shouldn't return an error if it already exists +func (f *Fs) Mkdir(ctx context.Context, dir string) error { + return vfs.EROFS +} + +// Rmdir removes the directory (container, bucket) if empty +// +// Return an error if it doesn't exist or isn't empty +func (f *Fs) Rmdir(ctx context.Context, dir string) error { + return vfs.EROFS +} + +// Put in to the remote path with the modTime given of the given size +// +// May create the object even if it returns an error - if so +// will return the object and the error, otherwise will return +// nil and the error +func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (o fs.Object, err error) { + return nil, vfs.EROFS +} + +// Hashes returns the supported hash sets. +func (f *Fs) Hashes() hash.Set { + return hash.Set(hash.CRC32) +} + +// UnWrap returns the Fs that this Fs is wrapping +func (f *Fs) UnWrap() fs.Fs { + return f.f +} + +// WrapFs returns the Fs that is wrapping this Fs +func (f *Fs) WrapFs() fs.Fs { + return f.wrapper +} + +// SetWrapper sets the Fs that is wrapping this Fs +func (f *Fs) SetWrapper(wrapper fs.Fs) { + f.wrapper = wrapper +} + +// Object describes an object to be read from the raw zip file +type Object struct { + f *Fs + remote string + fh *zip.FileHeader + file *zip.File +} + +// Fs returns read only access to the Fs that this object is part of +func (o *Object) Fs() fs.Info { + return o.f +} + +// Return a string version +func (o *Object) String() string { + if o == nil { + return "" + } + return o.Remote() +} + +// Remote returns the remote path +func (o *Object) Remote() string { + return o.remote +} + +// Size returns the size of the file +func (o *Object) Size() int64 { + return int64(o.fh.UncompressedSize64) +} + +// ModTime returns the modification time of the object +// +// It attempts to read the objects mtime and if that isn't present the +// LastModified returned in the http headers +func (o *Object) ModTime(ctx context.Context) time.Time { + return o.fh.Modified +} + +// SetModTime sets the modification time of the local fs object +func (o *Object) SetModTime(ctx context.Context, modTime time.Time) error { + return vfs.EROFS +} + +// Storable raturns a boolean indicating if this object is storable +func (o *Object) Storable() bool { + return true +} + +// Hash returns the selected checksum of the file +// If no checksum is available it returns "" +func (o *Object) Hash(ctx context.Context, ht hash.Type) (string, error) { + if ht == hash.CRC32 { + // FIXME return empty CRC if writing + if o.f.dt == nil { + return "", nil + } + return fmt.Sprintf("%08x", o.fh.CRC32), nil + } + return "", hash.ErrUnsupported +} + +// Open opens the file for read. Call Close() on the returned io.ReadCloser +func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (rc io.ReadCloser, err error) { + var offset, limit int64 = 0, -1 + for _, option := range options { + switch x := option.(type) { + case *fs.SeekOption: + offset = x.Offset + case *fs.RangeOption: + offset, limit = x.Decode(o.Size()) + default: + if option.Mandatory() { + fs.Logf(o, "Unsupported mandatory option: %v", option) + } + } + } + + rc, err = o.file.Open() + if err != nil { + return nil, err + } + + // discard data from start as necessary + if offset > 0 { + _, err = io.CopyN(io.Discard, rc, offset) + if err != nil { + return nil, err + } + } + // If limited then don't return everything + if limit >= 0 { + return readers.NewLimitedReadCloser(rc, limit), nil + } + + return rc, nil +} + +// Update in to the object with the modTime given of the given size +func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error { + return vfs.EROFS +} + +// Remove an object +func (o *Object) Remove(ctx context.Context) error { + return vfs.EROFS +} + +// Check the interfaces are satisfied +var ( + _ fs.Fs = (*Fs)(nil) + _ fs.UnWrapper = (*Fs)(nil) + _ fs.Wrapper = (*Fs)(nil) + _ fs.Object = (*Object)(nil) +) diff --git a/bin/make_manual.py b/bin/make_manual.py index f83e95b8a..4f100ef10 100755 --- a/bin/make_manual.py +++ b/bin/make_manual.py @@ -32,6 +32,7 @@ docs = [ "fichier.md", "alias.md", "s3.md", + "archive.md", "b2.md", "box.md", "cache.md", diff --git a/docs/content/_index.md b/docs/content/_index.md index 88333c668..18c6c8d83 100644 --- a/docs/content/_index.md +++ b/docs/content/_index.md @@ -218,6 +218,7 @@ WebDAV or S3, that work out of the box.) These backends adapt or modify other storage providers: {{< provider name="Alias: Rename existing remotes" home="/alias/" config="/alias/" >}} +{{< provider name="Archive: Read archive files" home="/archive/" config="/archive/" >}} {{< provider name="Cache: Cache remotes (DEPRECATED)" home="/cache/" config="/cache/" >}} {{< provider name="Chunker: Split large files" home="/chunker/" config="/chunker/" >}} {{< provider name="Combine: Combine multiple remotes into a directory tree" home="/combine/" config="/combine/" >}} diff --git a/docs/content/archive.md b/docs/content/archive.md new file mode 100644 index 000000000..3745682e3 --- /dev/null +++ b/docs/content/archive.md @@ -0,0 +1,270 @@ +--- +title: "Archive" +description: "Archive Remote" +versionIntroduced: "v1.72" +--- + +# {{< icon "fas fa-archive" >}} Archive + +The Archive backend allows read only access to the content of archive +files on cloud storage without downloading them completely. + +The archive files are recognised by their extension. + +| Archive | Extension | +| -------- | --------- | +| Zip | `.zip` | +| Squashfs | `.sqfs` | + +The supported archive file types are cloud friendly - a single file +can be found and downloaded without downloading the whole archive. + +## Configuration + +This backend is best used without configuration. + +Use it by putting the string `:archive:` in front of another remote, +say `remote:dir` to make `:archive:remote:dir`. + +Any archives in `remote:dir` will become directories and any files may +be read out of them individually. + +For example + +``` +$ rclone lsf s3:rclone/dir +100files.sqfs +100files.zip +``` + +Note that `100files.zip` and `100files.sqfs` are now directories: + +``` +$ rclone lsf :archive:s3:rclone/dir +100files.sqfs/ +100files.zip/ +``` + +Which we can look inside: + +``` +$ rclone lsf :archive:s3:rclone/dir/100files.zip/ +cofofiy5jun +gigi +hevupaz5z +kacak/ +kozemof/ +lamapaq4 +qejahen +quhenen2rey +soboves8 +vibat/ +wose +xade +zilupot +``` + +Files not in an archive can be read and written as normal. Files in an archive can only be read. + +The archive backend can also be used in a configuration file. Use the `remote` variable to point to the destination of the archive. + +``` +[remote] +type = archive +remote = s3:rclone/dir/100files.zip +``` + +Gives + +``` +$ rclone lsf remote: +cofofiy5jun +gigi +hevupaz5z +kacak/ +... +``` + + +## Modification times + +Modification times are preserved with an accuracy depending on the archive type. + +``` +$ rclone lsl --max-depth 1 :archive:s3:rclone/dir/100files.zip + 12 2025-10-27 14:39:20.000000000 cofofiy5jun + 81 2025-10-27 14:39:20.000000000 gigi + 58 2025-10-27 14:39:20.000000000 hevupaz5z + 6 2025-10-27 14:39:20.000000000 lamapaq4 + 43 2025-10-27 14:39:20.000000000 qejahen + 66 2025-10-27 14:39:20.000000000 quhenen2rey + 95 2025-10-27 14:39:20.000000000 soboves8 + 71 2025-10-27 14:39:20.000000000 wose + 76 2025-10-27 14:39:20.000000000 xade + 15 2025-10-27 14:39:20.000000000 zilupot +``` + +For `zip` and `squashfs` files this is 1s. + +## Hashes + +Which hash is supported depends on the archive type. Zip files use +CRC32, Squashfs don't support any hashes. For example: + +``` +$ rclone hashsum crc32 :archive:s3:rclone/dir/100files.zip/ +b2288554 cofofiy5jun +a87e62b6 wose +f90f630b xade +c7d0ef29 gigi +f1c64740 soboves8 +cb7b4a5d quhenen2rey +5115242b kozemof/fonaxo +afeabd9a qejahen +71202402 kozemof/fijubey5di +bd99e512 kozemof/napux +... +``` + +Hashes will be checked when the file is read from the archive and used +as part of syncing if possible. + +``` +$ rclone copy -vv :archive:s3:rclone/dir/100files.zip /tmp/100files +... +2025/10/27 14:56:44 DEBUG : kacak/turovat5c/yuyuquk: crc32 = abd05cc8 OK +2025/10/27 14:56:44 DEBUG : kacak/turovat5c/yuyuquk.aeb661dc.partial: renamed to: kacak/turovat5c/yuyuquk +2025/10/27 14:56:44 INFO : kacak/turovat5c/yuyuquk: Copied (new) +... +``` + +## Zip + +The [Zip file format](https://en.wikipedia.org/wiki/ZIP_(file_format)) +is a widely used archive format that bundles one or more files and +folders into a single file, primarily for easier storage or +transmission. It typically uses compression (most commonly the DEFLATE +algorithm) to reduce the overall size of the archived content. Zip +files are supported natively by most modern operating systems. + +Rclone does not support the following advanced features of Zip files: + +- Splitting large archives into smaller parts +- Password protection +- Zstd compression + +## Squashfs + +Squashfs is a compressed, read-only file system format primarily used +in Linux-based systems. It's designed to compress entire file systems +(including files, directories, and metadata) into a single archive +file, which can then be mounted and read directly, appearing as a +normal directory structure. Because it's read-only and highly +compressed, Squashfs is ideal for live CDs/USBs, embedded devices with +limited storage, and software package distribution, as it saves space +and ensures the integrity of the original files. + +Rclone supports the following squashfs compression formats: + +- `Gzip` +- `Lzma` +- `Xz` +- `Zstd` + +These are not yet working: + +- `Lzo` - Not yet supported +- `Lz4` - Broken with "error decompressing: lz4: bad magic number" + +Rclone works fastest with large squashfs block sizes. For example: + +``` +mksquashfs 100files 100files.sqfs -comp zstd -b 1M +``` + +## Limitations + +Files in archive are read only. It isn't possible to create archives yet. + +Only `.zip` and `.sqfs` archives are supported as these are the only +common archiving formats which make it easy to read directory listings +from the archive without downloading the whole archive. + +Internally the archive backend uses the VFS to access files. It isn't +possible to configure the internal VFS yet which might be useful. + +## Archive Formats + +Here's a table rating common archive formats on their Cloud +Optimization which is based on their ability to access a single file +without reading the entire archive. + +This capability depends on whether the format has a central **index** +(or "table of contents") that a program can read first to find the +exact location of a specific file. + +| Format | Extensions | Cloud Optimized | Explanation | +| :--- | :--- | :--- | :--- | +| **ZIP** | `.zip` | **Excellent** | **Zip files have an index** (the "central directory") stored at the *end* of the file. A program can seek to the end, read the index to find a file's location and size, and then seek directly to that file's data to extract it. | +| **SquashFS** | `.squashfs`, `.sqfs`, `.sfs` | **Excellent** | This is a compressed read-only *filesystem image*, not just an archive. It is **specifically designed for random access**. It uses metadata and index tables to allow the system to find and decompress individual files or data blocks on demand. | +| **ISO Image** | `.iso` | **Excellent** | Like SquashFS, this is a *filesystem image* (for optical media). It contains a filesystem (like ISO 9660 or UDF) with a **table of contents at a known location**, allowing for direct access to any file without reading the whole disk. | +| **RAR** | `.rar` | **Good** | RAR supports "non-solid" and "solid" modes. In the common **non-solid** mode, files are compressed separately, and an index allows for easy single-file extraction (like ZIP). In "solid" mode, this rating would be "Very Poor." | +| **7z** | `.7z` | **Poor** | By default, 7z uses "solid" archives to maximize compression. This compresses files as one continuous stream. To extract a file from the middle, all preceding files must be decompressed first. (If explicitly created as "non-solid," its rating would be "Excellent"). | +| **tar** | `.tar` | **Poor** | "Tape Archive" is a *streaming* format with **no central index**. To find a file, you must read the archive from the beginning, checking each file header one by one until you find the one you want. This is slow but doesn't require decompressing data. | +| **Gzipped Tar** | `.tar.gz`, `.tgz` | **Very Poor** | This is a `tar` file (already "Poor") compressed with `gzip` as a **single, non-seekable stream**. You cannot seek. To get *any* file, you must decompress the *entire* archive from the beginning up to that file. | +| **Bzipped/XZ Tar** | `.tar.bz2`, `.tar.xz` | **Very Poor** | This is the same principle as `tar.gz`. The entire archive is one large compressed block, making random access impossible. | + +## Ideas for improvements + +It would be possible to add ISO support fairly easily as the library we use ([go-diskfs](https://github.com/diskfs/go-diskfs/)) supports it. We could also add `ext4` and `fat32` the same way, however in my experience these are not very common as files so probably not worth it. Go-diskfs can also read partitions which we could potentially take advantage of. + +It would be possible to add write support, but this would only be for creating new archives, not for updating existing archives. + +{{< rem autogenerated options start" - DO NOT EDIT - instead edit fs.RegInfo in backend/archive/archive.go then run make backenddocs" >}} +### Standard options + +Here are the Standard options specific to archive (Read archives). + +#### --archive-remote + +Remote to wrap to read archives from. + +Normally should contain a ':' and a path, e.g. "myremote:path/to/dir", +"myremote:bucket" or "myremote:". + +If this is left empty, then the archive backend will use the root as +the remote. + +This means that you can use :archive:remote:path and it will be +equivalent to setting remote="remote:path". + + +Properties: + +- Config: remote +- Env Var: RCLONE_ARCHIVE_REMOTE +- Type: string +- Required: false + +### Advanced options + +Here are the Advanced options specific to archive (Read archives). + +#### --archive-description + +Description of the remote. + +Properties: + +- Config: description +- Env Var: RCLONE_ARCHIVE_DESCRIPTION +- Type: string +- Required: false + +### Metadata + +Any metadata supported by the underlying remote is read and written. + +See the [metadata](/docs/#metadata) docs for more info. + +{{< rem autogenerated options stop >}} diff --git a/docs/content/docs.md b/docs/content/docs.md index dd27f9d44..3c178af79 100644 --- a/docs/content/docs.md +++ b/docs/content/docs.md @@ -31,6 +31,7 @@ See the following for detailed instructions for - [1Fichier](/fichier/) - [Akamai Netstorage](/netstorage/) - [Alias](/alias/) +- [Archive](/archive/) - [Amazon S3](/s3/) - [Backblaze B2](/b2/) - [Box](/box/) diff --git a/docs/layouts/chrome/navbar.html b/docs/layouts/chrome/navbar.html index 3180139dd..b6a9ea351 100644 --- a/docs/layouts/chrome/navbar.html +++ b/docs/layouts/chrome/navbar.html @@ -56,6 +56,7 @@ Akamai NetStorage Alias Amazon S3 + Archive Backblaze B2 Box Chunker (splits large files) diff --git a/fstest/test_all/config.yaml b/fstest/test_all/config.yaml index 5cec06838..eba583e38 100644 --- a/fstest/test_all/config.yaml +++ b/fstest/test_all/config.yaml @@ -659,3 +659,14 @@ backends: ignoretests: - cmd/bisync - cmd/gitannex + - backend: "archive" + remote: "TestArchive:" + fastlist: false + ignoretests: + - cmd/bisync + - cmd/gitannex + ignore: + # These are caused by the archive backend returning the underlying objects + # with the parent backend having a different precision. + - TestServerSideCopyOverSelf + - TestServerSideMoveOverSelf diff --git a/go.mod b/go.mod index 6033ce8b8..715e9be7e 100644 --- a/go.mod +++ b/go.mod @@ -29,6 +29,7 @@ require ( github.com/colinmarc/hdfs/v2 v2.4.0 github.com/coreos/go-semver v0.3.1 github.com/coreos/go-systemd/v22 v22.6.0 + github.com/diskfs/go-diskfs v1.7.0 github.com/dop251/scsu v0.0.0-20220106150536-84ac88021d00 github.com/dropbox/dropbox-sdk-go-unofficial/v6 v6.0.5 github.com/gabriel-vasile/mimetype v1.4.10 @@ -111,6 +112,7 @@ require ( github.com/PuerkitoBio/goquery v1.10.3 // indirect github.com/akavel/rsrc v0.10.2 // indirect github.com/anacrolix/generics v0.1.0 // indirect + github.com/anchore/go-lzo v0.1.0 // indirect github.com/andybalholm/cascadia v1.3.3 // indirect github.com/appscode/go-querystring v0.0.0-20170504095604-0126cfb3f1dc // indirect github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect @@ -199,6 +201,7 @@ require ( github.com/panjf2000/ants/v2 v2.11.3 // indirect github.com/pengsrc/go-shared v0.2.1-0.20190131101655-1999055a4a14 // indirect github.com/philhofer/fwd v1.2.0 // indirect + github.com/pierrec/lz4/v4 v4.1.22 // indirect github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect github.com/pkg/errors v0.9.1 // indirect github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect @@ -213,13 +216,14 @@ require ( github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 // indirect github.com/samber/lo v1.51.0 // indirect github.com/shabbyrobe/gocovmerge v0.0.0-20230507112040-c3350d9342df // indirect - github.com/sirupsen/logrus v1.9.3 // indirect + github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af // indirect github.com/smartystreets/goconvey v1.8.1 // indirect github.com/sony/gobreaker v1.0.0 // indirect github.com/spacemonkeygo/monkit/v3 v3.0.24 // indirect github.com/tinylib/msgp v1.4.0 // indirect github.com/tklauser/go-sysconf v0.3.15 // indirect github.com/tklauser/numcpus v0.10.0 // indirect + github.com/ulikunitz/xz v0.5.15 // indirect github.com/willscott/go-nfs-client v0.0.0-20240104095149-b44639837b00 // indirect github.com/yusufpapurcu/wmi v1.2.4 // indirect github.com/zeebo/errs v1.4.0 // indirect diff --git a/go.sum b/go.sum index 853032209..6b369a8b9 100644 --- a/go.sum +++ b/go.sum @@ -102,6 +102,8 @@ github.com/anacrolix/generics v0.1.0 h1:r6OgogjCdml3K5A8ixUG0X9DM4jrQiMfIkZiBOGv github.com/anacrolix/generics v0.1.0/go.mod h1:MN3ve08Z3zSV/rTuX/ouI4lNdlfTxgdafQJiLzyNRB8= github.com/anacrolix/log v0.17.0 h1:cZvEGRPCbIg+WK+qAxWj/ap2Gj8cx1haOCSVxNZQpK4= github.com/anacrolix/log v0.17.0/go.mod h1:m0poRtlr41mriZlXBQ9SOVZ8yZBkLjOkDhd5Li5pITA= +github.com/anchore/go-lzo v0.1.0 h1:NgAacnzqPeGH49Ky19QKLBZEuFRqtTG9cdaucc3Vncs= +github.com/anchore/go-lzo v0.1.0/go.mod h1:3kLx0bve2oN1iDwgM1U5zGku1Tfbdb0No5qp1eL1fIk= github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM= github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA= github.com/appscode/go-querystring v0.0.0-20170504095604-0126cfb3f1dc h1:LoL75er+LKDHDUfU5tRvFwxH0LjPpZN8OoG8Ll+liGU= @@ -207,6 +209,10 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/diskfs/go-diskfs v1.7.0 h1:vonWmt5CMowXwUc79jWyGrf2DIMeoOjkLlMnQYGVOs8= +github.com/diskfs/go-diskfs v1.7.0/go.mod h1:LhQyXqOugWFRahYUSw47NyZJPezFzB9UELwhpszLP/k= +github.com/djherbis/times v1.6.0 h1:w2ctJ92J8fBvWPxugmXIv7Nz7Q3iDMKNx9v5ocVH20c= +github.com/djherbis/times v1.6.0/go.mod h1:gOHeRAz2h+VJNZ5Gmc/o7iD9k4wW7NMVqieYCY99oc0= github.com/dnaeon/go-vcr v1.2.0 h1:zHCHvJYTMh1N7xnV7zf1m1GPBF9Ad0Jk/whtQ1663qI= github.com/dnaeon/go-vcr v1.2.0/go.mod h1:R4UdLID7HZT3taECzJs4YgbbH6PIGXB6W/sc5OLb6RQ= github.com/dop251/scsu v0.0.0-20220106150536-84ac88021d00 h1:xJBhC00smQpSZw3Kr0ErMUBXhUSjYoLRm2szxdbRBL0= @@ -219,6 +225,8 @@ github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkp github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/ebitengine/purego v0.9.0 h1:mh0zpKBIXDceC63hpvPuGLiJ8ZAa3DfrFTudmfi8A4k= github.com/ebitengine/purego v0.9.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ= +github.com/elliotwutingfeng/asciiset v0.0.0-20230602022725-51bbb787efab h1:h1UgjJdAAhj+uPL68n7XASS6bU+07ZX1WJvVS2eyoeY= +github.com/elliotwutingfeng/asciiset v0.0.0-20230602022725-51bbb787efab/go.mod h1:GLo/8fDswSAniFG+BFIaiSPcK610jyzgEhWYPQwuQdw= github.com/emersion/go-message v0.18.2 h1:rl55SQdjd9oJcIoQNhubD2Acs1E6IzlZISRTK7x/Lpg= github.com/emersion/go-message v0.18.2/go.mod h1:XpJyL70LwRvq2a8rVbHXikPgKj8+aI0kGdHlg16ibYA= github.com/emersion/go-vcard v0.0.0-20241024213814-c9703dde27ff h1:4N8wnS3f1hNHSmFD5zgFkWCyA4L1kCDkImPAtK7D6tg= @@ -507,6 +515,8 @@ github.com/peterh/liner v1.2.2 h1:aJ4AOodmL+JxOZZEL2u9iJf8omNRpqHc/EbrK+3mAXw= github.com/peterh/liner v1.2.2/go.mod h1:xFwJyiKIXJZUKItq5dGHZSTBRAuG/CpeNpWLyiNRNwI= github.com/philhofer/fwd v1.2.0 h1:e6DnBTl7vGY+Gz322/ASL4Gyp1FspeMvx1RNDoToZuM= github.com/philhofer/fwd v1.2.0/go.mod h1:RqIHx9QI14HlwKwm98g9Re5prTQ6LdeRQn+gXJFxsJM= +github.com/pierrec/lz4/v4 v4.1.22 h1:cKFw6uJDK+/gfw5BcDL0JL5aBsAFdsIT18eRtLj7VIU= +github.com/pierrec/lz4/v4 v4.1.22/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU= github.com/pkg/diff v0.0.0-20200914180035-5b29258ca4f7/go.mod h1:zO8QMzTeZd5cpnIkz/Gn6iK0jDfGicM1nynOkkPIl28= @@ -568,8 +578,8 @@ github.com/shabbyrobe/gocovmerge v0.0.0-20230507112040-c3350d9342df/go.mod h1:dc github.com/shirou/gopsutil/v4 v4.25.8 h1:NnAsw9lN7587WHxjJA9ryDnqhJpFH6A+wagYWTOH970= github.com/shirou/gopsutil/v4 v4.25.8/go.mod h1:q9QdMmfAOVIw7a+eF86P7ISEU6ka+NLgkUxlopV4RwI= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= -github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= -github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af h1:Sp5TG9f7K39yfB+If0vjp97vuT74F72r8hfRpP8jLU0= +github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966 h1:JIAuq3EEf9cgbU6AtGPK4CTG3Zf6CKMNqf0MHTggAUA= github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966/go.mod h1:sUM3LWHvSMaG192sy56D9F7CNvL7jUJVXoqM1QKLnog= github.com/smarty/assertions v1.15.0 h1:cR//PqUBUiQRakZWqBiFFQ9wb8emQGDb0HeGdqGByCY= @@ -620,6 +630,8 @@ github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE= github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= +github.com/ulikunitz/xz v0.5.15 h1:9DNdB5s+SgV3bQ2ApL10xRc35ck0DuIX/isZvIk+ubY= +github.com/ulikunitz/xz v0.5.15/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/unknwon/goconfig v1.0.0 h1:rS7O+CmUdli1T+oDm7fYj1MwqNWtEJfNj+FqcUHML8U= github.com/unknwon/goconfig v1.0.0/go.mod h1:qu2ZQ/wcC/if2u32263HTVC39PeOQRSmidQk3DuDFQ8= github.com/willscott/go-nfs v0.0.3 h1:Z5fHVxMsppgEucdkKBN26Vou19MtEM875NmRwj156RE=