From 8f74e7d3317a6d87876daa7b8757641992749c7d Mon Sep 17 00:00:00 2001 From: Alex <64072843+A1ex3@users.noreply.github.com> Date: Tue, 4 Nov 2025 21:50:56 +0700 Subject: [PATCH] backend/compress: add zstd compression Added support for reading and writing zstd-compressed archives in seekable format using "github.com/klauspost/compress/zstd" and "github.com/SaveTheRbtz/zstd-seekable-format-go/pkg". Bumped Go version from 1.24.0 to 1.24.4 due to requirements of "github.com/SaveTheRbtz/zstd-seekable-format-go/pkg". --- backend/compress/compress.go | 318 +++++++++------------- backend/compress/compress_test.go | 22 +- backend/compress/gzip_handler.go | 207 ++++++++++++++ backend/compress/szstd_helper.go | 327 +++++++++++++++++++++++ backend/compress/uncompressed_handler.go | 65 +++++ backend/compress/unknown_handler.go | 65 +++++ backend/compress/zstd_handler.go | 192 +++++++++++++ docs/content/compress.md | 114 +++++--- fstest/test_all/config.yaml | 3 + go.mod | 4 +- go.sum | 4 + 11 files changed, 1093 insertions(+), 228 deletions(-) create mode 100644 backend/compress/gzip_handler.go create mode 100644 backend/compress/szstd_helper.go create mode 100644 backend/compress/uncompressed_handler.go create mode 100644 backend/compress/unknown_handler.go create mode 100644 backend/compress/zstd_handler.go diff --git a/backend/compress/compress.go b/backend/compress/compress.go index 0cc56bc62..961d3898f 100644 --- a/backend/compress/compress.go +++ b/backend/compress/compress.go @@ -2,10 +2,8 @@ package compress import ( - "bufio" "bytes" "context" - "crypto/md5" "encoding/base64" "encoding/binary" "encoding/hex" @@ -46,6 +44,7 @@ const ( minCompressionRatio = 1.1 gzFileExt = ".gz" + zstdFileExt = ".zst" metaFileExt = ".json" uncompressedFileExt = ".bin" ) @@ -54,6 +53,7 @@ const ( const ( Uncompressed = 0 Gzip = 2 + Zstd = 4 ) var nameRegexp = regexp.MustCompile(`^(.+?)\.([A-Za-z0-9-_]{11})$`) @@ -66,6 +66,10 @@ func init() { Value: "gzip", Help: "Standard gzip compression with fastest parameters.", }, + { + Value: "zstd", + Help: "Zstandard compression — fast modern algorithm offering adjustable speed-to-compression tradeoffs.", + }, } // Register our remote @@ -87,17 +91,23 @@ func init() { Examples: compressionModeOptions, }, { Name: "level", - Help: `GZIP compression level (-2 to 9). - -Generally -1 (default, equivalent to 5) is recommended. -Levels 1 to 9 increase compression at the cost of speed. Going past 6 -generally offers very little return. - -Level -2 uses Huffman encoding only. Only use if you know what you -are doing. -Level 0 turns off compression.`, - Default: sgzip.DefaultCompression, - Advanced: true, + Help: `GZIP (levels -2 to 9): +- -2 — Huffman encoding only. Only use if you know what you're doing. +- -1 (default) — recommended; equivalent to level 5. +- 0 — turns off compression. +- 1–9 — increase compression at the cost of speed. Going past 6 generally offers very little return. + +ZSTD (levels 0 to 4): +- 0 — turns off compression entirely. +- 1 — fastest compression with the lowest ratio. +- 2 (default) — good balance of speed and compression. +- 3 — better compression, but uses about 2–3x more CPU than the default. +- 4 — best possible compression ratio (highest CPU cost). + +Notes: +- Choose GZIP for wide compatibility; ZSTD for better speed/ratio tradeoffs. +- Negative gzip levels: -2 = Huffman-only, -1 = default (≈ level 5).`, + Required: true, }, { Name: "ram_cache_limit", Help: `Some remotes don't allow the upload of files with unknown size. @@ -112,6 +122,47 @@ this limit will be cached on disk.`, }) } +// compressionModeHandler defines the interface for handling different compression modes +type compressionModeHandler interface { + // processFileNameGetFileExtension returns the file extension for the given compression mode + processFileNameGetFileExtension(compressionMode int) string + + // newObjectGetOriginalSize returns the original file size from the metadata + newObjectGetOriginalSize(meta *ObjectMetadata) (int64, error) + + // isCompressible checks the compression ratio of the provided data and returns true if the ratio exceeds + // the configured threshold + isCompressible(r io.Reader, compressionMode int) (bool, error) + + // putCompress compresses the input data and uploads it to the remote, returning the new object and its metadata + putCompress( + ctx context.Context, + f *Fs, + in io.Reader, + src fs.ObjectInfo, + options []fs.OpenOption, + mimeType string, + ) (fs.Object, *ObjectMetadata, error) + + // openGetReadCloser opens a compressed object and returns a ReadCloser in the Open method + openGetReadCloser( + ctx context.Context, + o *Object, + offset int64, + limit int64, + cr chunkedreader.ChunkedReader, + closer io.Closer, + options ...fs.OpenOption, + ) (rc io.ReadCloser, err error) + + // putUncompressGetNewMetadata returns metadata in the putUncompress method for a specific compression algorithm + putUncompressGetNewMetadata(o fs.Object, mode int, md5 string, mimeType string, sum []byte) (fs.Object, *ObjectMetadata, error) + + // This function generates a metadata object for sgzip.GzipMetadata or SzstdMetadata. + // Warning: This function panics if cmeta is not of the expected type. + newMetadata(size int64, mode int, cmeta any, md5 string, mimeType string) *ObjectMetadata +} + // Options defines the configuration for this backend type Options struct { Remote string `config:"remote"` @@ -125,12 +176,13 @@ type Options struct { // Fs represents a wrapped fs.Fs type Fs struct { fs.Fs - wrapper fs.Fs - name string - root string - opt Options - mode int // compression mode id - features *fs.Features // optional features + wrapper fs.Fs + name string + root string + opt Options + mode int // compression mode id + features *fs.Features // optional features + modeHandler compressionModeHandler // compression mode handler } // NewFs constructs an Fs from the path, container:path @@ -167,13 +219,28 @@ func NewFs(ctx context.Context, name, rpath string, m configmap.Mapper) (fs.Fs, return nil, fmt.Errorf("failed to make remote %s:%q to wrap: %w", wName, remotePath, err) } + compressionMode := compressionModeFromName(opt.CompressionMode) + var modeHandler compressionModeHandler + + switch compressionMode { + case Gzip: + modeHandler = &gzipModeHandler{} + case Zstd: + modeHandler = &zstdModeHandler{} + case Uncompressed: + modeHandler = &uncompressedModeHandler{} + default: + modeHandler = &unknownModeHandler{} + } + // Create the wrapping fs f := &Fs{ - Fs: wrappedFs, - name: name, - root: rpath, - opt: *opt, - mode: compressionModeFromName(opt.CompressionMode), + Fs: wrappedFs, + name: name, + root: rpath, + opt: *opt, + mode: compressionMode, + modeHandler: modeHandler, } // Correct root if definitely pointing to a file if err == fs.ErrorIsFile { @@ -215,10 +282,13 @@ func NewFs(ctx context.Context, name, rpath string, m configmap.Mapper) (fs.Fs, return f, err } +// compressionModeFromName converts a compression mode name to its int representation. func compressionModeFromName(name string) int { switch name { case "gzip": return Gzip + case "zstd": + return Zstd default: return Uncompressed } @@ -242,7 +312,7 @@ func base64ToInt64(str string) (int64, error) { // Processes a file name for a compressed file. Returns the original file name, the extension, and the size of the original file. // Returns -2 for the original size if the file is uncompressed. -func processFileName(compressedFileName string) (origFileName string, extension string, origSize int64, err error) { +func processFileName(compressedFileName string, modeHandler compressionModeHandler) (origFileName string, extension string, origSize int64, err error) { // Separate the filename and size from the extension extensionPos := strings.LastIndex(compressedFileName, ".") if extensionPos == -1 { @@ -261,7 +331,8 @@ func processFileName(compressedFileName string) (origFileName string, extension if err != nil { return "", "", 0, errors.New("could not decode size") } - return match[1], gzFileExt, size, nil + ext := modeHandler.processFileNameGetFileExtension(compressionModeFromName(compressedFileName[extensionPos+1:])) + return match[1], ext, size, nil } // Generates the file name for a metadata file @@ -286,11 +357,15 @@ func unwrapMetadataFile(filename string) (string, bool) { // makeDataName generates the file name for a data file with specified compression mode func makeDataName(remote string, size int64, mode int) (newRemote string) { - if mode != Uncompressed { + switch mode { + case Gzip: newRemote = remote + "." + int64ToBase64(size) + gzFileExt - } else { + case Zstd: + newRemote = remote + "." + int64ToBase64(size) + zstdFileExt + default: newRemote = remote + uncompressedFileExt } + return newRemote } @@ -304,7 +379,7 @@ func (f *Fs) dataName(remote string, size int64, compressed bool) (name string) // addData parses an object and adds it to the DirEntries func (f *Fs) addData(entries *fs.DirEntries, o fs.Object) { - origFileName, _, size, err := processFileName(o.Remote()) + origFileName, _, size, err := processFileName(o.Remote(), f.modeHandler) if err != nil { fs.Errorf(o, "Error on parsing file name: %v", err) return @@ -427,8 +502,12 @@ func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) { if err != nil { return nil, fmt.Errorf("error decoding metadata: %w", err) } + size, err := f.modeHandler.newObjectGetOriginalSize(meta) + if err != nil { + return nil, fmt.Errorf("error reading metadata: %w", err) + } // Create our Object - o, err := f.Fs.NewObject(ctx, makeDataName(remote, meta.CompressionMetadata.Size, meta.Mode)) + o, err := f.Fs.NewObject(ctx, makeDataName(remote, size, meta.Mode)) if err != nil { return nil, err } @@ -437,7 +516,7 @@ func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) { // checkCompressAndType checks if an object is compressible and determines it's mime type // returns a multireader with the bytes that were read to determine mime type -func checkCompressAndType(in io.Reader) (newReader io.Reader, compressible bool, mimeType string, err error) { +func checkCompressAndType(in io.Reader, compressionMode int, modeHandler compressionModeHandler) (newReader io.Reader, compressible bool, mimeType string, err error) { in, wrap := accounting.UnWrap(in) buf := make([]byte, heuristicBytes) n, err := in.Read(buf) @@ -446,7 +525,7 @@ func checkCompressAndType(in io.Reader) (newReader io.Reader, compressible bool, return nil, false, "", err } mime := mimetype.Detect(buf) - compressible, err = isCompressible(bytes.NewReader(buf)) + compressible, err = modeHandler.isCompressible(bytes.NewReader(buf), compressionMode) if err != nil { return nil, false, "", err } @@ -454,26 +533,6 @@ func checkCompressAndType(in io.Reader) (newReader io.Reader, compressible bool, return wrap(in), compressible, mime.String(), nil } -// isCompressible checks the compression ratio of the provided data and returns true if the ratio exceeds -// the configured threshold -func isCompressible(r io.Reader) (bool, error) { - var b bytes.Buffer - w, err := sgzip.NewWriterLevel(&b, sgzip.DefaultCompression) - if err != nil { - return false, err - } - n, err := io.Copy(w, r) - if err != nil { - return false, err - } - err = w.Close() - if err != nil { - return false, err - } - ratio := float64(n) / float64(b.Len()) - return ratio > minCompressionRatio, nil -} - // verifyObjectHash verifies the Objects hash func (f *Fs) verifyObjectHash(ctx context.Context, o fs.Object, hasher *hash.MultiHasher, ht hash.Type) error { srcHash := hasher.Sums()[ht] @@ -494,9 +553,9 @@ func (f *Fs) verifyObjectHash(ctx context.Context, o fs.Object, hasher *hash.Mul type putFn func(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) -type compressionResult struct { +type compressionResult[T sgzip.GzipMetadata | SzstdMetadata] struct { err error - meta sgzip.GzipMetadata + meta T } // replicating some of operations.Rcat functionality because we want to support remotes without streaming @@ -537,106 +596,18 @@ func (f *Fs) rcat(ctx context.Context, dstFileName string, in io.ReadCloser, mod return nil, fmt.Errorf("failed to write temporary local file: %w", err) } if _, err = tempFile.Seek(0, 0); err != nil { - return nil, err + return nil, fmt.Errorf("failed to seek temporary local file: %w", err) } finfo, err := tempFile.Stat() if err != nil { - return nil, err + return nil, fmt.Errorf("failed to stat temporary local file: %w", err) } return f.Fs.Put(ctx, tempFile, object.NewStaticObjectInfo(dstFileName, modTime, finfo.Size(), false, nil, f.Fs)) } // Put a compressed version of a file. Returns a wrappable object and metadata. func (f *Fs) putCompress(ctx context.Context, in io.Reader, src fs.ObjectInfo, options []fs.OpenOption, mimeType string) (fs.Object, *ObjectMetadata, error) { - // Unwrap reader accounting - in, wrap := accounting.UnWrap(in) - - // Add the metadata hasher - metaHasher := md5.New() - in = io.TeeReader(in, metaHasher) - - // Compress the file - pipeReader, pipeWriter := io.Pipe() - results := make(chan compressionResult) - go func() { - gz, err := sgzip.NewWriterLevel(pipeWriter, f.opt.CompressionLevel) - if err != nil { - results <- compressionResult{err: err, meta: sgzip.GzipMetadata{}} - return - } - _, err = io.Copy(gz, in) - gzErr := gz.Close() - if gzErr != nil { - fs.Errorf(nil, "Failed to close compress: %v", gzErr) - if err == nil { - err = gzErr - } - } - closeErr := pipeWriter.Close() - if closeErr != nil { - fs.Errorf(nil, "Failed to close pipe: %v", closeErr) - if err == nil { - err = closeErr - } - } - results <- compressionResult{err: err, meta: gz.MetaData()} - }() - wrappedIn := wrap(bufio.NewReaderSize(pipeReader, bufferSize)) // Probably no longer needed as sgzip has it's own buffering - - // Find a hash the destination supports to compute a hash of - // the compressed data. - ht := f.Fs.Hashes().GetOne() - var hasher *hash.MultiHasher - var err error - if ht != hash.None { - // unwrap the accounting again - wrappedIn, wrap = accounting.UnWrap(wrappedIn) - hasher, err = hash.NewMultiHasherTypes(hash.NewHashSet(ht)) - if err != nil { - return nil, nil, err - } - // add the hasher and re-wrap the accounting - wrappedIn = io.TeeReader(wrappedIn, hasher) - wrappedIn = wrap(wrappedIn) - } - - // Transfer the data - o, err := f.rcat(ctx, makeDataName(src.Remote(), src.Size(), f.mode), io.NopCloser(wrappedIn), src.ModTime(ctx), options) - //o, err := operations.Rcat(ctx, f.Fs, makeDataName(src.Remote(), src.Size(), f.mode), io.NopCloser(wrappedIn), src.ModTime(ctx)) - if err != nil { - if o != nil { - removeErr := o.Remove(ctx) - if removeErr != nil { - fs.Errorf(o, "Failed to remove partially transferred object: %v", err) - } - } - return nil, nil, err - } - // Check whether we got an error during compression - result := <-results - err = result.err - if err != nil { - if o != nil { - removeErr := o.Remove(ctx) - if removeErr != nil { - fs.Errorf(o, "Failed to remove partially compressed object: %v", err) - } - } - return nil, nil, err - } - - // Generate metadata - meta := newMetadata(result.meta.Size, f.mode, result.meta, hex.EncodeToString(metaHasher.Sum(nil)), mimeType) - - // Check the hashes of the compressed data if we were comparing them - if ht != hash.None && hasher != nil { - err = f.verifyObjectHash(ctx, o, hasher, ht) - if err != nil { - return nil, nil, err - } - } - - return o, meta, nil + return f.modeHandler.putCompress(ctx, f, in, src, options, mimeType) } // Put an uncompressed version of a file. Returns a wrappable object and metadata. @@ -680,7 +651,8 @@ func (f *Fs) putUncompress(ctx context.Context, in io.Reader, src fs.ObjectInfo, if err != nil { return nil, nil, err } - return o, newMetadata(o.Size(), Uncompressed, sgzip.GzipMetadata{}, hex.EncodeToString(sum), mimeType), nil + + return f.modeHandler.putUncompressGetNewMetadata(o, Uncompressed, hex.EncodeToString(sum), mimeType, sum) } // This function will write a metadata struct to a metadata Object for an src. Returns a wrappable metadata object. @@ -751,7 +723,7 @@ func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options . o, err := f.NewObject(ctx, src.Remote()) if err == fs.ErrorObjectNotFound { // Get our file compressibility - in, compressible, mimeType, err := checkCompressAndType(in) + in, compressible, mimeType, err := checkCompressAndType(in, f.mode, f.modeHandler) if err != nil { return nil, err } @@ -771,7 +743,7 @@ func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, opt } found := err == nil - in, compressible, mimeType, err := checkCompressAndType(in) + in, compressible, mimeType, err := checkCompressAndType(in, f.mode, f.modeHandler) if err != nil { return nil, err } @@ -1090,11 +1062,12 @@ func (f *Fs) PublicLink(ctx context.Context, remote string, duration fs.Duration // ObjectMetadata describes the metadata for an Object. type ObjectMetadata struct { - Mode int // Compression mode of the file. - Size int64 // Size of the object. - MD5 string // MD5 hash of the file. - MimeType string // Mime type of the file - CompressionMetadata sgzip.GzipMetadata + Mode int // Compression mode of the file. + Size int64 // Size of the object. + MD5 string // MD5 hash of the file. + MimeType string // Mime type of the file + CompressionMetadataGzip *sgzip.GzipMetadata // Metadata for Gzip compression + CompressionMetadataZstd *SzstdMetadata // Metadata for Zstd compression } // Object with external metadata @@ -1107,17 +1080,6 @@ type Object struct { meta *ObjectMetadata // Metadata struct for this object (nil if not loaded) } -// This function generates a metadata object -func newMetadata(size int64, mode int, cmeta sgzip.GzipMetadata, md5 string, mimeType string) *ObjectMetadata { - meta := new(ObjectMetadata) - meta.Size = size - meta.Mode = mode - meta.CompressionMetadata = cmeta - meta.MD5 = md5 - meta.MimeType = mimeType - return meta -} - // This function will read the metadata from a metadata object. func readMetadata(ctx context.Context, mo fs.Object) (meta *ObjectMetadata, err error) { // Open our meradata object @@ -1165,7 +1127,7 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op return o.mo, o.mo.Update(ctx, in, src, options...) } - in, compressible, mimeType, err := checkCompressAndType(in) + in, compressible, mimeType, err := checkCompressAndType(in, o.meta.Mode, o.f.modeHandler) if err != nil { return err } @@ -1278,7 +1240,7 @@ func (o *Object) String() string { // Remote returns the remote path func (o *Object) Remote() string { - origFileName, _, _, err := processFileName(o.Object.Remote()) + origFileName, _, _, err := processFileName(o.Object.Remote(), o.f.modeHandler) if err != nil { fs.Errorf(o.f, "Could not get remote path for: %s", o.Object.Remote()) return o.Object.Remote() @@ -1381,7 +1343,6 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (rc io.Read return o.Object.Open(ctx, options...) } // Get offset and limit from OpenOptions, pass the rest to the underlying remote - var openOptions = []fs.OpenOption{&fs.SeekOption{Offset: 0}} var offset, limit int64 = 0, -1 for _, option := range options { switch x := option.(type) { @@ -1389,31 +1350,12 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (rc io.Read offset = x.Offset case *fs.RangeOption: offset, limit = x.Decode(o.Size()) - default: - openOptions = append(openOptions, option) } } // Get a chunkedreader for the wrapped object chunkedReader := chunkedreader.New(ctx, o.Object, initialChunkSize, maxChunkSize, chunkStreams) - // Get file handle - var file io.Reader - if offset != 0 { - file, err = sgzip.NewReaderAt(chunkedReader, &o.meta.CompressionMetadata, offset) - } else { - file, err = sgzip.NewReader(chunkedReader) - } - if err != nil { - return nil, err - } - - var fileReader io.Reader - if limit != -1 { - fileReader = io.LimitReader(file, limit) - } else { - fileReader = file - } - // Return a ReadCloser - return ReadCloserWrapper{Reader: fileReader, Closer: chunkedReader}, nil + var retCloser io.Closer = chunkedReader + return o.f.modeHandler.openGetReadCloser(ctx, o, offset, limit, chunkedReader, retCloser, options...) } // ObjectInfo describes a wrapped fs.ObjectInfo for being the source diff --git a/backend/compress/compress_test.go b/backend/compress/compress_test.go index 8fc954dfa..000d70330 100644 --- a/backend/compress/compress_test.go +++ b/backend/compress/compress_test.go @@ -48,7 +48,27 @@ func TestRemoteGzip(t *testing.T) { opt.ExtraConfig = []fstests.ExtraConfigItem{ {Name: name, Key: "type", Value: "compress"}, {Name: name, Key: "remote", Value: tempdir}, - {Name: name, Key: "compression_mode", Value: "gzip"}, + {Name: name, Key: "mode", Value: "gzip"}, + {Name: name, Key: "level", Value: "-1"}, + } + opt.QuickTestOK = true + fstests.Run(t, &opt) +} + +// TestRemoteZstd tests ZSTD compression +func TestRemoteZstd(t *testing.T) { + if *fstest.RemoteName != "" { + t.Skip("Skipping as -remote set") + } + tempdir := filepath.Join(os.TempDir(), "rclone-compress-test-zstd") + name := "TestCompressZstd" + opt := defaultOpt + opt.RemoteName = name + ":" + opt.ExtraConfig = []fstests.ExtraConfigItem{ + {Name: name, Key: "type", Value: "compress"}, + {Name: name, Key: "remote", Value: tempdir}, + {Name: name, Key: "mode", Value: "zstd"}, + {Name: name, Key: "level", Value: "2"}, } opt.QuickTestOK = true fstests.Run(t, &opt) diff --git a/backend/compress/gzip_handler.go b/backend/compress/gzip_handler.go new file mode 100644 index 000000000..a5abbf7c8 --- /dev/null +++ b/backend/compress/gzip_handler.go @@ -0,0 +1,207 @@ +package compress + +import ( + "bufio" + "bytes" + "context" + "crypto/md5" + "encoding/hex" + "errors" + "io" + + "github.com/buengese/sgzip" + + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fs/accounting" + "github.com/rclone/rclone/fs/chunkedreader" + "github.com/rclone/rclone/fs/hash" +) + +// gzipModeHandler implements compressionModeHandler for gzip +type gzipModeHandler struct{} + +// isCompressible checks the compression ratio of the provided data and returns true if the ratio exceeds +// the configured threshold +func (g *gzipModeHandler) isCompressible(r io.Reader, compressionMode int) (bool, error) { + var b bytes.Buffer + var n int64 + w, err := sgzip.NewWriterLevel(&b, sgzip.DefaultCompression) + if err != nil { + return false, err + } + n, err = io.Copy(w, r) + if err != nil { + return false, err + } + err = w.Close() + if err != nil { + return false, err + } + ratio := float64(n) / float64(b.Len()) + return ratio > minCompressionRatio, nil +} + +// newObjectGetOriginalSize returns the original file size from the metadata +func (g *gzipModeHandler) newObjectGetOriginalSize(meta *ObjectMetadata) (int64, error) { + if meta.CompressionMetadataGzip == nil { + return 0, errors.New("missing gzip metadata") + } + return meta.CompressionMetadataGzip.Size, nil +} + +// openGetReadCloser opens a compressed object and returns a ReadCloser in the Open method +func (g *gzipModeHandler) openGetReadCloser( + ctx context.Context, + o *Object, + offset int64, + limit int64, + cr chunkedreader.ChunkedReader, + closer io.Closer, + options ...fs.OpenOption, +) (rc io.ReadCloser, err error) { + var file io.Reader + + if offset != 0 { + file, err = sgzip.NewReaderAt(cr, o.meta.CompressionMetadataGzip, offset) + } else { + file, err = sgzip.NewReader(cr) + } + if err != nil { + return nil, err + } + + var fileReader io.Reader + if limit != -1 { + fileReader = io.LimitReader(file, limit) + } else { + fileReader = file + } + // Return a ReadCloser + return ReadCloserWrapper{Reader: fileReader, Closer: closer}, nil +} + +// processFileNameGetFileExtension returns the file extension for the given compression mode +func (g *gzipModeHandler) processFileNameGetFileExtension(compressionMode int) string { + if compressionMode == Gzip { + return gzFileExt + } + + return "" +} + +// putCompress compresses the input data and uploads it to the remote, returning the new object and its metadata +func (g *gzipModeHandler) putCompress( + ctx context.Context, + f *Fs, + in io.Reader, + src fs.ObjectInfo, + options []fs.OpenOption, + mimeType string, +) (fs.Object, *ObjectMetadata, error) { + // Unwrap reader accounting + in, wrap := accounting.UnWrap(in) + + // Add the metadata hasher + metaHasher := md5.New() + in = io.TeeReader(in, metaHasher) + + // Compress the file + pipeReader, pipeWriter := io.Pipe() + + resultsGzip := make(chan compressionResult[sgzip.GzipMetadata]) + go func() { + gz, err := sgzip.NewWriterLevel(pipeWriter, f.opt.CompressionLevel) + if err != nil { + resultsGzip <- compressionResult[sgzip.GzipMetadata]{err: err, meta: sgzip.GzipMetadata{}} + close(resultsGzip) + return + } + _, err = io.Copy(gz, in) + gzErr := gz.Close() + if gzErr != nil && err == nil { + err = gzErr + } + closeErr := pipeWriter.Close() + if closeErr != nil && err == nil { + err = closeErr + } + resultsGzip <- compressionResult[sgzip.GzipMetadata]{err: err, meta: gz.MetaData()} + close(resultsGzip) + }() + + wrappedIn := wrap(bufio.NewReaderSize(pipeReader, bufferSize)) // Probably no longer needed as sgzip has it's own buffering + + // Find a hash the destination supports to compute a hash of + // the compressed data. + ht := f.Fs.Hashes().GetOne() + var hasher *hash.MultiHasher + var err error + if ht != hash.None { + // unwrap the accounting again + wrappedIn, wrap = accounting.UnWrap(wrappedIn) + hasher, err = hash.NewMultiHasherTypes(hash.NewHashSet(ht)) + if err != nil { + return nil, nil, err + } + // add the hasher and re-wrap the accounting + wrappedIn = io.TeeReader(wrappedIn, hasher) + wrappedIn = wrap(wrappedIn) + } + + // Transfer the data + o, err := f.rcat(ctx, makeDataName(src.Remote(), src.Size(), f.mode), io.NopCloser(wrappedIn), src.ModTime(ctx), options) + if err != nil { + if o != nil { + if removeErr := o.Remove(ctx); removeErr != nil { + fs.Errorf(o, "Failed to remove partially transferred object: %v", removeErr) + } + } + return nil, nil, err + } + // Check whether we got an error during compression + result := <-resultsGzip + if result.err != nil { + if o != nil { + if removeErr := o.Remove(ctx); removeErr != nil { + fs.Errorf(o, "Failed to remove partially compressed object: %v", removeErr) + } + } + return nil, nil, result.err + } + + // Generate metadata + meta := g.newMetadata(result.meta.Size, f.mode, result.meta, hex.EncodeToString(metaHasher.Sum(nil)), mimeType) + + // Check the hashes of the compressed data if we were comparing them + if ht != hash.None && hasher != nil { + err = f.verifyObjectHash(ctx, o, hasher, ht) + if err != nil { + return nil, nil, err + } + } + return o, meta, nil +} + +// putUncompressGetNewMetadata returns metadata in the putUncompress method for a specific compression algorithm +func (g *gzipModeHandler) putUncompressGetNewMetadata(o fs.Object, mode int, md5 string, mimeType string, sum []byte) (fs.Object, *ObjectMetadata, error) { + return o, g.newMetadata(o.Size(), mode, sgzip.GzipMetadata{}, hex.EncodeToString(sum), mimeType), nil +} + +// This function generates a metadata object for sgzip.GzipMetadata or SzstdMetadata. +// Warning: This function panics if cmeta is not of the expected type. +func (g *gzipModeHandler) newMetadata(size int64, mode int, cmeta any, md5 string, mimeType string) *ObjectMetadata { + meta, ok := cmeta.(sgzip.GzipMetadata) + if !ok { + panic("invalid cmeta type: expected sgzip.GzipMetadata") + } + + objMeta := new(ObjectMetadata) + objMeta.Size = size + objMeta.Mode = mode + objMeta.CompressionMetadataGzip = &meta + objMeta.CompressionMetadataZstd = nil + objMeta.MD5 = md5 + objMeta.MimeType = mimeType + + return objMeta +} diff --git a/backend/compress/szstd_helper.go b/backend/compress/szstd_helper.go new file mode 100644 index 000000000..97f591f5f --- /dev/null +++ b/backend/compress/szstd_helper.go @@ -0,0 +1,327 @@ +package compress + +import ( + "context" + "errors" + "io" + "runtime" + "sync" + + szstd "github.com/a1ex3/zstd-seekable-format-go/pkg" + "github.com/klauspost/compress/zstd" +) + +const szstdChunkSize int = 1 << 20 // 1 MiB chunk size + +// SzstdMetadata holds metadata for szstd compressed files. +type SzstdMetadata struct { + BlockSize int // BlockSize is the size of the blocks in the zstd file + Size int64 // Size is the uncompressed size of the file + BlockData []uint32 // BlockData is the block data for the zstd file, used for seeking +} + +// SzstdWriter is a writer that compresses data in szstd format. +type SzstdWriter struct { + enc *zstd.Encoder + w szstd.ConcurrentWriter + metadata SzstdMetadata + mu sync.Mutex +} + +// NewWriterSzstd creates a new szstd writer with the specified options. +// It initializes the szstd writer with a zstd encoder and returns a pointer to the SzstdWriter. +// The writer can be used to write data in chunks, and it will automatically handle block sizes and metadata. +func NewWriterSzstd(w io.Writer, opts ...zstd.EOption) (*SzstdWriter, error) { + encoder, err := zstd.NewWriter(nil, opts...) + if err != nil { + return nil, err + } + + sw, err := szstd.NewWriter(w, encoder) + if err != nil { + if err := encoder.Close(); err != nil { + return nil, err + } + return nil, err + } + + return &SzstdWriter{ + enc: encoder, + w: sw, + metadata: SzstdMetadata{ + BlockSize: szstdChunkSize, + Size: 0, + }, + }, nil +} + +// Write writes data to the szstd writer in chunks of szstdChunkSize. +// It handles the block size and metadata updates automatically. +func (w *SzstdWriter) Write(p []byte) (int, error) { + if len(p) == 0 { + return 0, nil + } + + if w.metadata.BlockData == nil { + numBlocks := (len(p) + w.metadata.BlockSize - 1) / w.metadata.BlockSize + w.metadata.BlockData = make([]uint32, 1, numBlocks+1) + w.metadata.BlockData[0] = 0 + } + + start := 0 + total := len(p) + + var writerFunc szstd.FrameSource = func() ([]byte, error) { + if start >= total { + return nil, nil + } + + end := min(start+w.metadata.BlockSize, total) + chunk := p[start:end] + size := end - start + + w.mu.Lock() + w.metadata.Size += int64(size) + w.mu.Unlock() + + start = end + return chunk, nil + } + + // write sizes of compressed blocks in the callback + err := w.w.WriteMany(context.Background(), writerFunc, + szstd.WithWriteCallback(func(size uint32) { + w.mu.Lock() + lastOffset := w.metadata.BlockData[len(w.metadata.BlockData)-1] + w.metadata.BlockData = append(w.metadata.BlockData, lastOffset+size) + w.mu.Unlock() + }), + ) + if err != nil { + return 0, err + } + + return total, nil +} + +// Close closes the SzstdWriter and its underlying encoder. +func (w *SzstdWriter) Close() error { + if err := w.w.Close(); err != nil { + return err + } + if err := w.enc.Close(); err != nil { + return err + } + + return nil +} + +// GetMetadata returns the metadata of the szstd writer. +func (w *SzstdWriter) GetMetadata() SzstdMetadata { + return w.metadata +} + +// SzstdReaderAt is a reader that allows random access in szstd compressed data. +type SzstdReaderAt struct { + r szstd.Reader + decoder *zstd.Decoder + metadata *SzstdMetadata + pos int64 + mu sync.Mutex +} + +// NewReaderAtSzstd creates a new SzstdReaderAt at the specified io.ReadSeeker. +func NewReaderAtSzstd(rs io.ReadSeeker, meta *SzstdMetadata, offset int64, opts ...zstd.DOption) (*SzstdReaderAt, error) { + decoder, err := zstd.NewReader(nil, opts...) + if err != nil { + return nil, err + } + + r, err := szstd.NewReader(rs, decoder) + if err != nil { + decoder.Close() + return nil, err + } + + sr := &SzstdReaderAt{ + r: r, + decoder: decoder, + metadata: meta, + pos: 0, + } + + // Set initial position to the provided offset + if _, err := sr.Seek(offset, io.SeekStart); err != nil { + if err := sr.Close(); err != nil { + return nil, err + } + return nil, err + } + + return sr, nil +} + +// Seek sets the offset for the next Read. +func (s *SzstdReaderAt) Seek(offset int64, whence int) (int64, error) { + s.mu.Lock() + defer s.mu.Unlock() + + pos, err := s.r.Seek(offset, whence) + if err == nil { + s.pos = pos + } + return pos, err +} + +func (s *SzstdReaderAt) Read(p []byte) (int, error) { + s.mu.Lock() + defer s.mu.Unlock() + + n, err := s.r.Read(p) + if err == nil { + s.pos += int64(n) + } + return n, err +} + +// ReadAt reads data at the specified offset. +func (s *SzstdReaderAt) ReadAt(p []byte, off int64) (int, error) { + if off < 0 { + return 0, errors.New("invalid offset") + } + if off >= s.metadata.Size { + return 0, io.EOF + } + + endOff := min(off+int64(len(p)), s.metadata.Size) + + // Find all blocks covered by the range + type blockInfo struct { + index int // Block index + offsetInBlock int64 // Offset within the block for starting reading + bytesToRead int64 // How many bytes to read from this block + } + + var blocks []blockInfo + uncompressedOffset := int64(0) + currentOff := off + + for i := 0; i < len(s.metadata.BlockData)-1; i++ { + blockUncompressedEnd := min(uncompressedOffset+int64(s.metadata.BlockSize), s.metadata.Size) + + if currentOff < blockUncompressedEnd && endOff > uncompressedOffset { + offsetInBlock := max(0, currentOff-uncompressedOffset) + bytesToRead := min(blockUncompressedEnd-uncompressedOffset-offsetInBlock, endOff-currentOff) + + blocks = append(blocks, blockInfo{ + index: i, + offsetInBlock: offsetInBlock, + bytesToRead: bytesToRead, + }) + + currentOff += bytesToRead + if currentOff >= endOff { + break + } + } + uncompressedOffset = blockUncompressedEnd + } + + if len(blocks) == 0 { + return 0, io.EOF + } + + // Parallel block decoding + type decodeResult struct { + index int + data []byte + err error + } + + resultCh := make(chan decodeResult, len(blocks)) + var wg sync.WaitGroup + sem := make(chan struct{}, runtime.NumCPU()) + + for _, block := range blocks { + wg.Add(1) + go func(block blockInfo) { + defer wg.Done() + sem <- struct{}{} + defer func() { <-sem }() + + startOffset := int64(s.metadata.BlockData[block.index]) + endOffset := int64(s.metadata.BlockData[block.index+1]) + compressedSize := endOffset - startOffset + + compressed := make([]byte, compressedSize) + _, err := s.r.ReadAt(compressed, startOffset) + if err != nil && err != io.EOF { + resultCh <- decodeResult{index: block.index, err: err} + return + } + + decoded, err := s.decoder.DecodeAll(compressed, nil) + if err != nil { + resultCh <- decodeResult{index: block.index, err: err} + return + } + + resultCh <- decodeResult{index: block.index, data: decoded, err: nil} + }(block) + } + + go func() { + wg.Wait() + close(resultCh) + }() + + // Collect results in block index order + totalRead := 0 + results := make(map[int]decodeResult) + expected := len(blocks) + minIndex := blocks[0].index + + for res := range resultCh { + results[res.index] = res + for { + if result, ok := results[minIndex]; ok { + if result.err != nil { + return 0, result.err + } + // find the corresponding blockInfo + var blk blockInfo + for _, b := range blocks { + if b.index == result.index { + blk = b + break + } + } + + start := blk.offsetInBlock + end := start + blk.bytesToRead + copy(p[totalRead:totalRead+int(blk.bytesToRead)], result.data[start:end]) + totalRead += int(blk.bytesToRead) + minIndex++ + if minIndex-blocks[0].index >= len(blocks) { + break + } + } else { + break + } + } + if len(results) == expected && minIndex-blocks[0].index >= len(blocks) { + break + } + } + + return totalRead, nil +} + +// Close closes the SzstdReaderAt and underlying decoder. +func (s *SzstdReaderAt) Close() error { + if err := s.r.Close(); err != nil { + return err + } + s.decoder.Close() + return nil +} diff --git a/backend/compress/uncompressed_handler.go b/backend/compress/uncompressed_handler.go new file mode 100644 index 000000000..c1fc0c794 --- /dev/null +++ b/backend/compress/uncompressed_handler.go @@ -0,0 +1,65 @@ +package compress + +import ( + "context" + "fmt" + "io" + + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fs/chunkedreader" +) + +// uncompressedModeHandler implements compressionModeHandler for uncompressed files +type uncompressedModeHandler struct{} + +// isCompressible checks the compression ratio of the provided data and returns true if the ratio exceeds +// the configured threshold +func (u *uncompressedModeHandler) isCompressible(r io.Reader, compressionMode int) (bool, error) { + return false, nil +} + +// newObjectGetOriginalSize returns the original file size from the metadata +func (u *uncompressedModeHandler) newObjectGetOriginalSize(meta *ObjectMetadata) (int64, error) { + return 0, nil +} + +// openGetReadCloser opens a compressed object and returns a ReadCloser in the Open method +func (u *uncompressedModeHandler) openGetReadCloser( + ctx context.Context, + o *Object, + offset int64, + limit int64, + cr chunkedreader.ChunkedReader, + closer io.Closer, + options ...fs.OpenOption, +) (rc io.ReadCloser, err error) { + return o.Object.Open(ctx, options...) +} + +// processFileNameGetFileExtension returns the file extension for the given compression mode +func (u *uncompressedModeHandler) processFileNameGetFileExtension(compressionMode int) string { + return "" +} + +// putCompress compresses the input data and uploads it to the remote, returning the new object and its metadata +func (u *uncompressedModeHandler) putCompress( + ctx context.Context, + f *Fs, + in io.Reader, + src fs.ObjectInfo, + options []fs.OpenOption, + mimeType string, +) (fs.Object, *ObjectMetadata, error) { + return nil, nil, fmt.Errorf("unsupported compression mode %d", f.mode) +} + +// putUncompressGetNewMetadata returns metadata in the putUncompress method for a specific compression algorithm +func (u *uncompressedModeHandler) putUncompressGetNewMetadata(o fs.Object, mode int, md5 string, mimeType string, sum []byte) (fs.Object, *ObjectMetadata, error) { + return nil, nil, fmt.Errorf("unsupported compression mode %d", Uncompressed) +} + +// This function generates a metadata object for sgzip.GzipMetadata or SzstdMetadata. +// Warning: This function panics if cmeta is not of the expected type. +func (u *uncompressedModeHandler) newMetadata(size int64, mode int, cmeta any, md5 string, mimeType string) *ObjectMetadata { + return nil +} diff --git a/backend/compress/unknown_handler.go b/backend/compress/unknown_handler.go new file mode 100644 index 000000000..f8d9df589 --- /dev/null +++ b/backend/compress/unknown_handler.go @@ -0,0 +1,65 @@ +package compress + +import ( + "context" + "fmt" + "io" + + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fs/chunkedreader" +) + +// unknownModeHandler implements compressionModeHandler for unknown compression types +type unknownModeHandler struct{} + +// isCompressible checks the compression ratio of the provided data and returns true if the ratio exceeds +// the configured threshold +func (unk *unknownModeHandler) isCompressible(r io.Reader, compressionMode int) (bool, error) { + return false, fmt.Errorf("unknown compression mode %d", compressionMode) +} + +// newObjectGetOriginalSize returns the original file size from the metadata +func (unk *unknownModeHandler) newObjectGetOriginalSize(meta *ObjectMetadata) (int64, error) { + return 0, nil +} + +// openGetReadCloser opens a compressed object and returns a ReadCloser in the Open method +func (unk *unknownModeHandler) openGetReadCloser( + ctx context.Context, + o *Object, + offset int64, + limit int64, + cr chunkedreader.ChunkedReader, + closer io.Closer, + options ...fs.OpenOption, +) (rc io.ReadCloser, err error) { + return nil, fmt.Errorf("unknown compression mode %d", o.meta.Mode) +} + +// processFileNameGetFileExtension returns the file extension for the given compression mode +func (unk *unknownModeHandler) processFileNameGetFileExtension(compressionMode int) string { + return "" +} + +// putCompress compresses the input data and uploads it to the remote, returning the new object and its metadata +func (unk *unknownModeHandler) putCompress( + ctx context.Context, + f *Fs, + in io.Reader, + src fs.ObjectInfo, + options []fs.OpenOption, + mimeType string, +) (fs.Object, *ObjectMetadata, error) { + return nil, nil, fmt.Errorf("unknown compression mode %d", f.mode) +} + +// putUncompressGetNewMetadata returns metadata in the putUncompress method for a specific compression algorithm +func (unk *unknownModeHandler) putUncompressGetNewMetadata(o fs.Object, mode int, md5 string, mimeType string, sum []byte) (fs.Object, *ObjectMetadata, error) { + return nil, nil, fmt.Errorf("unknown compression mode") +} + +// This function generates a metadata object for sgzip.GzipMetadata or SzstdMetadata. +// Warning: This function panics if cmeta is not of the expected type. +func (unk *unknownModeHandler) newMetadata(size int64, mode int, cmeta any, md5 string, mimeType string) *ObjectMetadata { + return nil +} diff --git a/backend/compress/zstd_handler.go b/backend/compress/zstd_handler.go new file mode 100644 index 000000000..43914427b --- /dev/null +++ b/backend/compress/zstd_handler.go @@ -0,0 +1,192 @@ +package compress + +import ( + "bufio" + "bytes" + "context" + "crypto/md5" + "encoding/hex" + "errors" + "io" + + "github.com/klauspost/compress/zstd" + + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fs/accounting" + "github.com/rclone/rclone/fs/chunkedreader" + "github.com/rclone/rclone/fs/hash" +) + +// zstdModeHandler implements compressionModeHandler for zstd +type zstdModeHandler struct{} + +// isCompressible checks the compression ratio of the provided data and returns true if the ratio exceeds +// the configured threshold +func (z *zstdModeHandler) isCompressible(r io.Reader, compressionMode int) (bool, error) { + var b bytes.Buffer + var n int64 + w, err := NewWriterSzstd(&b, zstd.WithEncoderLevel(zstd.SpeedDefault)) + if err != nil { + return false, err + } + n, err = io.Copy(w, r) + if err != nil { + return false, err + } + err = w.Close() + if err != nil { + return false, err + } + ratio := float64(n) / float64(b.Len()) + return ratio > minCompressionRatio, nil +} + +// newObjectGetOriginalSize returns the original file size from the metadata +func (z *zstdModeHandler) newObjectGetOriginalSize(meta *ObjectMetadata) (int64, error) { + if meta.CompressionMetadataZstd == nil { + return 0, errors.New("missing zstd metadata") + } + return meta.CompressionMetadataZstd.Size, nil +} + +// openGetReadCloser opens a compressed object and returns a ReadCloser in the Open method +func (z *zstdModeHandler) openGetReadCloser( + ctx context.Context, + o *Object, + offset int64, + limit int64, + cr chunkedreader.ChunkedReader, + closer io.Closer, + options ...fs.OpenOption, +) (rc io.ReadCloser, err error) { + var file io.Reader + + if offset != 0 { + file, err = NewReaderAtSzstd(cr, o.meta.CompressionMetadataZstd, offset) + } else { + file, err = zstd.NewReader(cr) + } + if err != nil { + return nil, err + } + + var fileReader io.Reader + if limit != -1 { + fileReader = io.LimitReader(file, limit) + } else { + fileReader = file + } + // Return a ReadCloser + return ReadCloserWrapper{Reader: fileReader, Closer: closer}, nil +} + +// processFileNameGetFileExtension returns the file extension for the given compression mode +func (z *zstdModeHandler) processFileNameGetFileExtension(compressionMode int) string { + if compressionMode == Zstd { + return zstdFileExt + } + + return "" +} + +// putCompress compresses the input data and uploads it to the remote, returning the new object and its metadata +func (z *zstdModeHandler) putCompress( + ctx context.Context, + f *Fs, + in io.Reader, + src fs.ObjectInfo, + options []fs.OpenOption, + mimeType string, +) (fs.Object, *ObjectMetadata, error) { + // Unwrap reader accounting + in, wrap := accounting.UnWrap(in) + + // Add the metadata hasher + metaHasher := md5.New() + in = io.TeeReader(in, metaHasher) + + // Compress the file + pipeReader, pipeWriter := io.Pipe() + + resultsZstd := make(chan compressionResult[SzstdMetadata]) + go func() { + writer, err := NewWriterSzstd(pipeWriter, zstd.WithEncoderLevel(zstd.EncoderLevel(f.opt.CompressionLevel))) + if err != nil { + resultsZstd <- compressionResult[SzstdMetadata]{err: err} + close(resultsZstd) + return + } + _, err = io.Copy(writer, in) + if wErr := writer.Close(); wErr != nil && err == nil { + err = wErr + } + if cErr := pipeWriter.Close(); cErr != nil && err == nil { + err = cErr + } + + resultsZstd <- compressionResult[SzstdMetadata]{err: err, meta: writer.GetMetadata()} + close(resultsZstd) + }() + + wrappedIn := wrap(bufio.NewReaderSize(pipeReader, bufferSize)) + + ht := f.Fs.Hashes().GetOne() + var hasher *hash.MultiHasher + var err error + if ht != hash.None { + wrappedIn, wrap = accounting.UnWrap(wrappedIn) + hasher, err = hash.NewMultiHasherTypes(hash.NewHashSet(ht)) + if err != nil { + return nil, nil, err + } + wrappedIn = io.TeeReader(wrappedIn, hasher) + wrappedIn = wrap(wrappedIn) + } + + o, err := f.rcat(ctx, makeDataName(src.Remote(), src.Size(), f.mode), io.NopCloser(wrappedIn), src.ModTime(ctx), options) + if err != nil { + return nil, nil, err + } + + result := <-resultsZstd + if result.err != nil { + if o != nil { + _ = o.Remove(ctx) + } + return nil, nil, result.err + } + + // Build metadata using uncompressed size for filename + meta := z.newMetadata(result.meta.Size, f.mode, result.meta, hex.EncodeToString(metaHasher.Sum(nil)), mimeType) + if ht != hash.None && hasher != nil { + err = f.verifyObjectHash(ctx, o, hasher, ht) + if err != nil { + return nil, nil, err + } + } + return o, meta, nil +} + +// putUncompressGetNewMetadata returns metadata in the putUncompress method for a specific compression algorithm +func (z *zstdModeHandler) putUncompressGetNewMetadata(o fs.Object, mode int, md5 string, mimeType string, sum []byte) (fs.Object, *ObjectMetadata, error) { + return o, z.newMetadata(o.Size(), mode, SzstdMetadata{}, hex.EncodeToString(sum), mimeType), nil +} + +// This function generates a metadata object for sgzip.GzipMetadata or SzstdMetadata. +// Warning: This function panics if cmeta is not of the expected type. +func (z *zstdModeHandler) newMetadata(size int64, mode int, cmeta any, md5 string, mimeType string) *ObjectMetadata { + meta, ok := cmeta.(SzstdMetadata) + if !ok { + panic("invalid cmeta type: expected SzstdMetadata") + } + + objMeta := new(ObjectMetadata) + objMeta.Size = size + objMeta.Mode = mode + objMeta.CompressionMetadataGzip = nil + objMeta.CompressionMetadataZstd = &meta + objMeta.MD5 = md5 + objMeta.MimeType = mimeType + + return objMeta +} diff --git a/docs/content/compress.md b/docs/content/compress.md index 9fe8d518a..e4c090eec 100644 --- a/docs/content/compress.md +++ b/docs/content/compress.md @@ -23,6 +23,7 @@ To use this remote, all you need to do is specify another remote and a compression mode to use: ```text +$ rclone config Current remotes: Name Type @@ -30,7 +31,6 @@ Name Type remote_to_press sometype e) Edit existing remote -$ rclone config n) New remote d) Delete remote r) Rename remote @@ -39,45 +39,74 @@ s) Set configuration password q) Quit config e/n/d/r/c/s/q> n name> compress + +Option Storage. +Type of storage to configure. +Choose a number from below, or type in your own value. ... - 8 / Compress a remote - \ "compress" +12 / Compress a remote + \ (compress) ... Storage> compress -** See help for compress backend at: https://rclone.org/compress/ ** +Option remote. Remote to compress. -Enter a string value. Press Enter for the default (""). +Enter a value. remote> remote_to_press:subdir + +Option mode. Compression mode. -Enter a string value. Press Enter for the default ("gzip"). -Choose a number from below, or type in your own value - 1 / Gzip compression balanced for speed and compression strength. - \ "gzip" -compression_mode> gzip -Edit advanced config? (y/n) +Choose a number from below, or type in your own value of type string. +Press Enter for the default (gzip). + 1 / Standard gzip compression with fastest parameters. + \ (gzip) + 2 / Zstandard compression — fast modern algorithm offering adjustable speed-to-compression tradeoffs. + \ (zstd) +mode> gzip + +Option level. +GZIP (levels -2 to 9): +- -2 — Huffman encoding only. Only use if you know what you're doing. +- -1 (default) — recommended; equivalent to level 5. +- 0 — turns off compression. +- 1–9 — increase compression at the cost of speed. Going past 6 generally offers very little return. + +ZSTD (levels 0 to 4): +- 0 — turns off compression entirely. +- 1 — fastest compression with the lowest ratio. +- 2 (default) — good balance of speed and compression. +- 3 — better compression, but uses about 2–3x more CPU than the default. +- 4 — best possible compression ratio (highest CPU cost). + +Notes: +- Choose GZIP for wide compatibility; ZSTD for better speed/ratio tradeoffs. +- Negative gzip levels: -2 = Huffman-only, -1 = default (≈ level 5). +Enter a value. +level> -1 + +Edit advanced config? y) Yes n) No (default) y/n> n -Remote config --------------------- -[compress] -type = compress -remote = remote_to_press:subdir -compression_mode = gzip --------------------- + +Configuration complete. +Options: +- type: compress +- remote: remote_to_press:subdir +- mode: gzip +- level: -1 +Keep this "compress" remote? y) Yes this is OK (default) e) Edit this remote d) Delete this remote y/e/d> y ``` -### Compression Modes +### Compression Algorithms -Currently only gzip compression is supported. It provides a decent balance -between speed and size and is well supported by other applications. Compression -strength can further be configured via an advanced setting where 0 is no -compression and 9 is strongest compression. +- **GZIP** – a well-established and widely adopted algorithm that strikes a solid balance between compression speed and ratio. It supports compression levels from -2 to 9, with the default -1 (roughly equivalent to level 5) offering an effective middle ground for most scenarios. + +- **Zstandard (zstd)** – a modern, high-performance algorithm that offers precise control over the trade-off between speed and compression efficiency. Compression levels range from 0 (no compression) to 4 (maximum compression). ### File types @@ -124,29 +153,38 @@ Properties: - Examples: - "gzip" - Standard gzip compression with fastest parameters. - -### Advanced options - -Here are the Advanced options specific to compress (Compress a remote). + - "zstd" + - Zstandard compression — fast modern algorithm offering adjustable speed-to-compression tradeoffs. #### --compress-level -GZIP compression level (-2 to 9). - -Generally -1 (default, equivalent to 5) is recommended. -Levels 1 to 9 increase compression at the cost of speed. Going past 6 -generally offers very little return. - -Level -2 uses Huffman encoding only. Only use if you know what you -are doing. -Level 0 turns off compression. +GZIP (levels -2 to 9): +- -2 — Huffman encoding only. Only use if you know what you're doing. +- -1 (default) — recommended; equivalent to level 5. +- 0 — turns off compression. +- 1–9 — increase compression at the cost of speed. Going past 6 generally offers very little return. + +ZSTD (levels 0 to 4): +- 0 — turns off compression entirely. +- 1 — fastest compression with the lowest ratio. +- 2 (default) — good balance of speed and compression. +- 3 — better compression, but uses about 2–3x more CPU than the default. +- 4 — best possible compression ratio (highest CPU cost). + +Notes: +- Choose GZIP for wide compatibility; ZSTD for better speed/ratio tradeoffs. +- Negative gzip levels: -2 = Huffman-only, -1 = default (≈ level 5). Properties: - Config: level - Env Var: RCLONE_COMPRESS_LEVEL -- Type: int -- Default: -1 +- Type: string +- Required: true + +### Advanced options + +Here are the Advanced options specific to compress (Compress a remote). #### --compress-ram-cache-limit diff --git a/fstest/test_all/config.yaml b/fstest/test_all/config.yaml index eba583e38..a3bc56081 100644 --- a/fstest/test_all/config.yaml +++ b/fstest/test_all/config.yaml @@ -133,6 +133,9 @@ backends: - backend: "compress" remote: "TestCompress:" fastlist: false + - backend: "compress" + remote: "TestCompressZstd:" + fastlist: false # - backend: "compress" # remote: "TestCompressSwift:" # fastlist: false diff --git a/go.mod b/go.mod index d29278d89..9a526e04e 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/rclone/rclone -go 1.24.0 +go 1.24.4 require ( bazil.org/fuse v0.0.0-20230120002735-62a210ff1fd5 @@ -11,6 +11,7 @@ require ( github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 github.com/Files-com/files-sdk-go/v3 v3.2.242 github.com/Max-Sum/base32768 v0.0.0-20230304063302-18e6ce5945fd + github.com/a1ex3/zstd-seekable-format-go/pkg v0.10.0 github.com/a8m/tree v0.0.0-20240104212747-2c8764a5f17e github.com/aalpar/deheap v0.0.0-20210914013432-0cc84d79dec3 github.com/abbot/go-http-auth v0.4.0 @@ -173,6 +174,7 @@ require ( github.com/gofrs/flock v0.12.1 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang-jwt/jwt/v5 v5.3.0 // indirect + github.com/google/btree v1.1.3 // indirect github.com/google/s2a-go v0.1.9 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.6 // indirect github.com/googleapis/gax-go/v2 v2.15.0 // indirect diff --git a/go.sum b/go.sum index 88e176a4b..ab9b93c18 100644 --- a/go.sum +++ b/go.sum @@ -90,6 +90,8 @@ github.com/PuerkitoBio/goquery v1.10.3 h1:pFYcNSqHxBD06Fpj/KsbStFRsgRATgnf3LeXiU github.com/PuerkitoBio/goquery v1.10.3/go.mod h1:tMUX0zDMHXYlAQk6p35XxQMqMweEKB7iK7iLNd4RH4Y= github.com/STARRY-S/zip v0.2.3 h1:luE4dMvRPDOWQdeDdUxUoZkzUIpTccdKdhHHsQJ1fm4= github.com/STARRY-S/zip v0.2.3/go.mod h1:lqJ9JdeRipyOQJrYSOtpNAiaesFO6zVDsE8GIGFaoSk= +github.com/a1ex3/zstd-seekable-format-go/pkg v0.10.0 h1:iLDOF0rdGTrol/q8OfPIIs5kLD8XvA2q75o6Uq/tgak= +github.com/a1ex3/zstd-seekable-format-go/pkg v0.10.0/go.mod h1:DrEWcQJjz7t5iF2duaiyhg4jyoF0kxOD6LtECNGkZ/Q= github.com/a8m/tree v0.0.0-20240104212747-2c8764a5f17e h1:KMVieI1/Ub++GYfnhyFPoGE3g5TUiG4srE3TMGr5nM4= github.com/a8m/tree v0.0.0-20240104212747-2c8764a5f17e/go.mod h1:j5astEcUkZQX8lK+KKlQ3NRQ50f4EE8ZjyZpCz3mrH4= github.com/aalpar/deheap v0.0.0-20210914013432-0cc84d79dec3 h1:hhdWprfSpFbN7lz3W1gM40vOgvSh1WCSMxYD6gGB4Hs= @@ -344,6 +346,8 @@ github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= +github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg= +github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=