1
0
mirror of https://github.com/rclone/rclone.git synced 2025-12-06 00:03:32 +00:00

backend/compress: add zstd compression

Added support for reading and writing zstd-compressed archives in seekable format
using "github.com/klauspost/compress/zstd" and
"github.com/SaveTheRbtz/zstd-seekable-format-go/pkg".

Bumped Go version from 1.24.0 to 1.24.4 due to requirements of
"github.com/SaveTheRbtz/zstd-seekable-format-go/pkg".
This commit is contained in:
Alex
2025-11-04 21:50:56 +07:00
committed by GitHub
parent ee92673e1b
commit 8f74e7d331
11 changed files with 1093 additions and 228 deletions

View File

@@ -2,10 +2,8 @@
package compress package compress
import ( import (
"bufio"
"bytes" "bytes"
"context" "context"
"crypto/md5"
"encoding/base64" "encoding/base64"
"encoding/binary" "encoding/binary"
"encoding/hex" "encoding/hex"
@@ -46,6 +44,7 @@ const (
minCompressionRatio = 1.1 minCompressionRatio = 1.1
gzFileExt = ".gz" gzFileExt = ".gz"
zstdFileExt = ".zst"
metaFileExt = ".json" metaFileExt = ".json"
uncompressedFileExt = ".bin" uncompressedFileExt = ".bin"
) )
@@ -54,6 +53,7 @@ const (
const ( const (
Uncompressed = 0 Uncompressed = 0
Gzip = 2 Gzip = 2
Zstd = 4
) )
var nameRegexp = regexp.MustCompile(`^(.+?)\.([A-Za-z0-9-_]{11})$`) var nameRegexp = regexp.MustCompile(`^(.+?)\.([A-Za-z0-9-_]{11})$`)
@@ -66,6 +66,10 @@ func init() {
Value: "gzip", Value: "gzip",
Help: "Standard gzip compression with fastest parameters.", Help: "Standard gzip compression with fastest parameters.",
}, },
{
Value: "zstd",
Help: "Zstandard compression — fast modern algorithm offering adjustable speed-to-compression tradeoffs.",
},
} }
// Register our remote // Register our remote
@@ -87,17 +91,23 @@ func init() {
Examples: compressionModeOptions, Examples: compressionModeOptions,
}, { }, {
Name: "level", Name: "level",
Help: `GZIP compression level (-2 to 9). Help: `GZIP (levels -2 to 9):
- -2 — Huffman encoding only. Only use if you know what you're doing.
Generally -1 (default, equivalent to 5) is recommended. - -1 (default) — recommended; equivalent to level 5.
Levels 1 to 9 increase compression at the cost of speed. Going past 6 - 0 — turns off compression.
generally offers very little return. - 19 — increase compression at the cost of speed. Going past 6 generally offers very little return.
Level -2 uses Huffman encoding only. Only use if you know what you ZSTD (levels 0 to 4):
are doing. - 0 — turns off compression entirely.
Level 0 turns off compression.`, - 1 — fastest compression with the lowest ratio.
Default: sgzip.DefaultCompression, - 2 (default) — good balance of speed and compression.
Advanced: true, - 3 — better compression, but uses about 23x more CPU than the default.
- 4 — best possible compression ratio (highest CPU cost).
Notes:
- Choose GZIP for wide compatibility; ZSTD for better speed/ratio tradeoffs.
- Negative gzip levels: -2 = Huffman-only, -1 = default (≈ level 5).`,
Required: true,
}, { }, {
Name: "ram_cache_limit", Name: "ram_cache_limit",
Help: `Some remotes don't allow the upload of files with unknown size. Help: `Some remotes don't allow the upload of files with unknown size.
@@ -112,6 +122,47 @@ this limit will be cached on disk.`,
}) })
} }
// compressionModeHandler defines the interface for handling different compression modes
type compressionModeHandler interface {
// processFileNameGetFileExtension returns the file extension for the given compression mode
processFileNameGetFileExtension(compressionMode int) string
// newObjectGetOriginalSize returns the original file size from the metadata
newObjectGetOriginalSize(meta *ObjectMetadata) (int64, error)
// isCompressible checks the compression ratio of the provided data and returns true if the ratio exceeds
// the configured threshold
isCompressible(r io.Reader, compressionMode int) (bool, error)
// putCompress compresses the input data and uploads it to the remote, returning the new object and its metadata
putCompress(
ctx context.Context,
f *Fs,
in io.Reader,
src fs.ObjectInfo,
options []fs.OpenOption,
mimeType string,
) (fs.Object, *ObjectMetadata, error)
// openGetReadCloser opens a compressed object and returns a ReadCloser in the Open method
openGetReadCloser(
ctx context.Context,
o *Object,
offset int64,
limit int64,
cr chunkedreader.ChunkedReader,
closer io.Closer,
options ...fs.OpenOption,
) (rc io.ReadCloser, err error)
// putUncompressGetNewMetadata returns metadata in the putUncompress method for a specific compression algorithm
putUncompressGetNewMetadata(o fs.Object, mode int, md5 string, mimeType string, sum []byte) (fs.Object, *ObjectMetadata, error)
// This function generates a metadata object for sgzip.GzipMetadata or SzstdMetadata.
// Warning: This function panics if cmeta is not of the expected type.
newMetadata(size int64, mode int, cmeta any, md5 string, mimeType string) *ObjectMetadata
}
// Options defines the configuration for this backend // Options defines the configuration for this backend
type Options struct { type Options struct {
Remote string `config:"remote"` Remote string `config:"remote"`
@@ -125,12 +176,13 @@ type Options struct {
// Fs represents a wrapped fs.Fs // Fs represents a wrapped fs.Fs
type Fs struct { type Fs struct {
fs.Fs fs.Fs
wrapper fs.Fs wrapper fs.Fs
name string name string
root string root string
opt Options opt Options
mode int // compression mode id mode int // compression mode id
features *fs.Features // optional features features *fs.Features // optional features
modeHandler compressionModeHandler // compression mode handler
} }
// NewFs constructs an Fs from the path, container:path // NewFs constructs an Fs from the path, container:path
@@ -167,13 +219,28 @@ func NewFs(ctx context.Context, name, rpath string, m configmap.Mapper) (fs.Fs,
return nil, fmt.Errorf("failed to make remote %s:%q to wrap: %w", wName, remotePath, err) return nil, fmt.Errorf("failed to make remote %s:%q to wrap: %w", wName, remotePath, err)
} }
compressionMode := compressionModeFromName(opt.CompressionMode)
var modeHandler compressionModeHandler
switch compressionMode {
case Gzip:
modeHandler = &gzipModeHandler{}
case Zstd:
modeHandler = &zstdModeHandler{}
case Uncompressed:
modeHandler = &uncompressedModeHandler{}
default:
modeHandler = &unknownModeHandler{}
}
// Create the wrapping fs // Create the wrapping fs
f := &Fs{ f := &Fs{
Fs: wrappedFs, Fs: wrappedFs,
name: name, name: name,
root: rpath, root: rpath,
opt: *opt, opt: *opt,
mode: compressionModeFromName(opt.CompressionMode), mode: compressionMode,
modeHandler: modeHandler,
} }
// Correct root if definitely pointing to a file // Correct root if definitely pointing to a file
if err == fs.ErrorIsFile { if err == fs.ErrorIsFile {
@@ -215,10 +282,13 @@ func NewFs(ctx context.Context, name, rpath string, m configmap.Mapper) (fs.Fs,
return f, err return f, err
} }
// compressionModeFromName converts a compression mode name to its int representation.
func compressionModeFromName(name string) int { func compressionModeFromName(name string) int {
switch name { switch name {
case "gzip": case "gzip":
return Gzip return Gzip
case "zstd":
return Zstd
default: default:
return Uncompressed return Uncompressed
} }
@@ -242,7 +312,7 @@ func base64ToInt64(str string) (int64, error) {
// Processes a file name for a compressed file. Returns the original file name, the extension, and the size of the original file. // Processes a file name for a compressed file. Returns the original file name, the extension, and the size of the original file.
// Returns -2 for the original size if the file is uncompressed. // Returns -2 for the original size if the file is uncompressed.
func processFileName(compressedFileName string) (origFileName string, extension string, origSize int64, err error) { func processFileName(compressedFileName string, modeHandler compressionModeHandler) (origFileName string, extension string, origSize int64, err error) {
// Separate the filename and size from the extension // Separate the filename and size from the extension
extensionPos := strings.LastIndex(compressedFileName, ".") extensionPos := strings.LastIndex(compressedFileName, ".")
if extensionPos == -1 { if extensionPos == -1 {
@@ -261,7 +331,8 @@ func processFileName(compressedFileName string) (origFileName string, extension
if err != nil { if err != nil {
return "", "", 0, errors.New("could not decode size") return "", "", 0, errors.New("could not decode size")
} }
return match[1], gzFileExt, size, nil ext := modeHandler.processFileNameGetFileExtension(compressionModeFromName(compressedFileName[extensionPos+1:]))
return match[1], ext, size, nil
} }
// Generates the file name for a metadata file // Generates the file name for a metadata file
@@ -286,11 +357,15 @@ func unwrapMetadataFile(filename string) (string, bool) {
// makeDataName generates the file name for a data file with specified compression mode // makeDataName generates the file name for a data file with specified compression mode
func makeDataName(remote string, size int64, mode int) (newRemote string) { func makeDataName(remote string, size int64, mode int) (newRemote string) {
if mode != Uncompressed { switch mode {
case Gzip:
newRemote = remote + "." + int64ToBase64(size) + gzFileExt newRemote = remote + "." + int64ToBase64(size) + gzFileExt
} else { case Zstd:
newRemote = remote + "." + int64ToBase64(size) + zstdFileExt
default:
newRemote = remote + uncompressedFileExt newRemote = remote + uncompressedFileExt
} }
return newRemote return newRemote
} }
@@ -304,7 +379,7 @@ func (f *Fs) dataName(remote string, size int64, compressed bool) (name string)
// addData parses an object and adds it to the DirEntries // addData parses an object and adds it to the DirEntries
func (f *Fs) addData(entries *fs.DirEntries, o fs.Object) { func (f *Fs) addData(entries *fs.DirEntries, o fs.Object) {
origFileName, _, size, err := processFileName(o.Remote()) origFileName, _, size, err := processFileName(o.Remote(), f.modeHandler)
if err != nil { if err != nil {
fs.Errorf(o, "Error on parsing file name: %v", err) fs.Errorf(o, "Error on parsing file name: %v", err)
return return
@@ -427,8 +502,12 @@ func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
if err != nil { if err != nil {
return nil, fmt.Errorf("error decoding metadata: %w", err) return nil, fmt.Errorf("error decoding metadata: %w", err)
} }
size, err := f.modeHandler.newObjectGetOriginalSize(meta)
if err != nil {
return nil, fmt.Errorf("error reading metadata: %w", err)
}
// Create our Object // Create our Object
o, err := f.Fs.NewObject(ctx, makeDataName(remote, meta.CompressionMetadata.Size, meta.Mode)) o, err := f.Fs.NewObject(ctx, makeDataName(remote, size, meta.Mode))
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -437,7 +516,7 @@ func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
// checkCompressAndType checks if an object is compressible and determines it's mime type // checkCompressAndType checks if an object is compressible and determines it's mime type
// returns a multireader with the bytes that were read to determine mime type // returns a multireader with the bytes that were read to determine mime type
func checkCompressAndType(in io.Reader) (newReader io.Reader, compressible bool, mimeType string, err error) { func checkCompressAndType(in io.Reader, compressionMode int, modeHandler compressionModeHandler) (newReader io.Reader, compressible bool, mimeType string, err error) {
in, wrap := accounting.UnWrap(in) in, wrap := accounting.UnWrap(in)
buf := make([]byte, heuristicBytes) buf := make([]byte, heuristicBytes)
n, err := in.Read(buf) n, err := in.Read(buf)
@@ -446,7 +525,7 @@ func checkCompressAndType(in io.Reader) (newReader io.Reader, compressible bool,
return nil, false, "", err return nil, false, "", err
} }
mime := mimetype.Detect(buf) mime := mimetype.Detect(buf)
compressible, err = isCompressible(bytes.NewReader(buf)) compressible, err = modeHandler.isCompressible(bytes.NewReader(buf), compressionMode)
if err != nil { if err != nil {
return nil, false, "", err return nil, false, "", err
} }
@@ -454,26 +533,6 @@ func checkCompressAndType(in io.Reader) (newReader io.Reader, compressible bool,
return wrap(in), compressible, mime.String(), nil return wrap(in), compressible, mime.String(), nil
} }
// isCompressible checks the compression ratio of the provided data and returns true if the ratio exceeds
// the configured threshold
func isCompressible(r io.Reader) (bool, error) {
var b bytes.Buffer
w, err := sgzip.NewWriterLevel(&b, sgzip.DefaultCompression)
if err != nil {
return false, err
}
n, err := io.Copy(w, r)
if err != nil {
return false, err
}
err = w.Close()
if err != nil {
return false, err
}
ratio := float64(n) / float64(b.Len())
return ratio > minCompressionRatio, nil
}
// verifyObjectHash verifies the Objects hash // verifyObjectHash verifies the Objects hash
func (f *Fs) verifyObjectHash(ctx context.Context, o fs.Object, hasher *hash.MultiHasher, ht hash.Type) error { func (f *Fs) verifyObjectHash(ctx context.Context, o fs.Object, hasher *hash.MultiHasher, ht hash.Type) error {
srcHash := hasher.Sums()[ht] srcHash := hasher.Sums()[ht]
@@ -494,9 +553,9 @@ func (f *Fs) verifyObjectHash(ctx context.Context, o fs.Object, hasher *hash.Mul
type putFn func(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) type putFn func(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error)
type compressionResult struct { type compressionResult[T sgzip.GzipMetadata | SzstdMetadata] struct {
err error err error
meta sgzip.GzipMetadata meta T
} }
// replicating some of operations.Rcat functionality because we want to support remotes without streaming // replicating some of operations.Rcat functionality because we want to support remotes without streaming
@@ -537,106 +596,18 @@ func (f *Fs) rcat(ctx context.Context, dstFileName string, in io.ReadCloser, mod
return nil, fmt.Errorf("failed to write temporary local file: %w", err) return nil, fmt.Errorf("failed to write temporary local file: %w", err)
} }
if _, err = tempFile.Seek(0, 0); err != nil { if _, err = tempFile.Seek(0, 0); err != nil {
return nil, err return nil, fmt.Errorf("failed to seek temporary local file: %w", err)
} }
finfo, err := tempFile.Stat() finfo, err := tempFile.Stat()
if err != nil { if err != nil {
return nil, err return nil, fmt.Errorf("failed to stat temporary local file: %w", err)
} }
return f.Fs.Put(ctx, tempFile, object.NewStaticObjectInfo(dstFileName, modTime, finfo.Size(), false, nil, f.Fs)) return f.Fs.Put(ctx, tempFile, object.NewStaticObjectInfo(dstFileName, modTime, finfo.Size(), false, nil, f.Fs))
} }
// Put a compressed version of a file. Returns a wrappable object and metadata. // Put a compressed version of a file. Returns a wrappable object and metadata.
func (f *Fs) putCompress(ctx context.Context, in io.Reader, src fs.ObjectInfo, options []fs.OpenOption, mimeType string) (fs.Object, *ObjectMetadata, error) { func (f *Fs) putCompress(ctx context.Context, in io.Reader, src fs.ObjectInfo, options []fs.OpenOption, mimeType string) (fs.Object, *ObjectMetadata, error) {
// Unwrap reader accounting return f.modeHandler.putCompress(ctx, f, in, src, options, mimeType)
in, wrap := accounting.UnWrap(in)
// Add the metadata hasher
metaHasher := md5.New()
in = io.TeeReader(in, metaHasher)
// Compress the file
pipeReader, pipeWriter := io.Pipe()
results := make(chan compressionResult)
go func() {
gz, err := sgzip.NewWriterLevel(pipeWriter, f.opt.CompressionLevel)
if err != nil {
results <- compressionResult{err: err, meta: sgzip.GzipMetadata{}}
return
}
_, err = io.Copy(gz, in)
gzErr := gz.Close()
if gzErr != nil {
fs.Errorf(nil, "Failed to close compress: %v", gzErr)
if err == nil {
err = gzErr
}
}
closeErr := pipeWriter.Close()
if closeErr != nil {
fs.Errorf(nil, "Failed to close pipe: %v", closeErr)
if err == nil {
err = closeErr
}
}
results <- compressionResult{err: err, meta: gz.MetaData()}
}()
wrappedIn := wrap(bufio.NewReaderSize(pipeReader, bufferSize)) // Probably no longer needed as sgzip has it's own buffering
// Find a hash the destination supports to compute a hash of
// the compressed data.
ht := f.Fs.Hashes().GetOne()
var hasher *hash.MultiHasher
var err error
if ht != hash.None {
// unwrap the accounting again
wrappedIn, wrap = accounting.UnWrap(wrappedIn)
hasher, err = hash.NewMultiHasherTypes(hash.NewHashSet(ht))
if err != nil {
return nil, nil, err
}
// add the hasher and re-wrap the accounting
wrappedIn = io.TeeReader(wrappedIn, hasher)
wrappedIn = wrap(wrappedIn)
}
// Transfer the data
o, err := f.rcat(ctx, makeDataName(src.Remote(), src.Size(), f.mode), io.NopCloser(wrappedIn), src.ModTime(ctx), options)
//o, err := operations.Rcat(ctx, f.Fs, makeDataName(src.Remote(), src.Size(), f.mode), io.NopCloser(wrappedIn), src.ModTime(ctx))
if err != nil {
if o != nil {
removeErr := o.Remove(ctx)
if removeErr != nil {
fs.Errorf(o, "Failed to remove partially transferred object: %v", err)
}
}
return nil, nil, err
}
// Check whether we got an error during compression
result := <-results
err = result.err
if err != nil {
if o != nil {
removeErr := o.Remove(ctx)
if removeErr != nil {
fs.Errorf(o, "Failed to remove partially compressed object: %v", err)
}
}
return nil, nil, err
}
// Generate metadata
meta := newMetadata(result.meta.Size, f.mode, result.meta, hex.EncodeToString(metaHasher.Sum(nil)), mimeType)
// Check the hashes of the compressed data if we were comparing them
if ht != hash.None && hasher != nil {
err = f.verifyObjectHash(ctx, o, hasher, ht)
if err != nil {
return nil, nil, err
}
}
return o, meta, nil
} }
// Put an uncompressed version of a file. Returns a wrappable object and metadata. // Put an uncompressed version of a file. Returns a wrappable object and metadata.
@@ -680,7 +651,8 @@ func (f *Fs) putUncompress(ctx context.Context, in io.Reader, src fs.ObjectInfo,
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
} }
return o, newMetadata(o.Size(), Uncompressed, sgzip.GzipMetadata{}, hex.EncodeToString(sum), mimeType), nil
return f.modeHandler.putUncompressGetNewMetadata(o, Uncompressed, hex.EncodeToString(sum), mimeType, sum)
} }
// This function will write a metadata struct to a metadata Object for an src. Returns a wrappable metadata object. // This function will write a metadata struct to a metadata Object for an src. Returns a wrappable metadata object.
@@ -751,7 +723,7 @@ func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options .
o, err := f.NewObject(ctx, src.Remote()) o, err := f.NewObject(ctx, src.Remote())
if err == fs.ErrorObjectNotFound { if err == fs.ErrorObjectNotFound {
// Get our file compressibility // Get our file compressibility
in, compressible, mimeType, err := checkCompressAndType(in) in, compressible, mimeType, err := checkCompressAndType(in, f.mode, f.modeHandler)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -771,7 +743,7 @@ func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, opt
} }
found := err == nil found := err == nil
in, compressible, mimeType, err := checkCompressAndType(in) in, compressible, mimeType, err := checkCompressAndType(in, f.mode, f.modeHandler)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -1090,11 +1062,12 @@ func (f *Fs) PublicLink(ctx context.Context, remote string, duration fs.Duration
// ObjectMetadata describes the metadata for an Object. // ObjectMetadata describes the metadata for an Object.
type ObjectMetadata struct { type ObjectMetadata struct {
Mode int // Compression mode of the file. Mode int // Compression mode of the file.
Size int64 // Size of the object. Size int64 // Size of the object.
MD5 string // MD5 hash of the file. MD5 string // MD5 hash of the file.
MimeType string // Mime type of the file MimeType string // Mime type of the file
CompressionMetadata sgzip.GzipMetadata CompressionMetadataGzip *sgzip.GzipMetadata // Metadata for Gzip compression
CompressionMetadataZstd *SzstdMetadata // Metadata for Zstd compression
} }
// Object with external metadata // Object with external metadata
@@ -1107,17 +1080,6 @@ type Object struct {
meta *ObjectMetadata // Metadata struct for this object (nil if not loaded) meta *ObjectMetadata // Metadata struct for this object (nil if not loaded)
} }
// This function generates a metadata object
func newMetadata(size int64, mode int, cmeta sgzip.GzipMetadata, md5 string, mimeType string) *ObjectMetadata {
meta := new(ObjectMetadata)
meta.Size = size
meta.Mode = mode
meta.CompressionMetadata = cmeta
meta.MD5 = md5
meta.MimeType = mimeType
return meta
}
// This function will read the metadata from a metadata object. // This function will read the metadata from a metadata object.
func readMetadata(ctx context.Context, mo fs.Object) (meta *ObjectMetadata, err error) { func readMetadata(ctx context.Context, mo fs.Object) (meta *ObjectMetadata, err error) {
// Open our meradata object // Open our meradata object
@@ -1165,7 +1127,7 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op
return o.mo, o.mo.Update(ctx, in, src, options...) return o.mo, o.mo.Update(ctx, in, src, options...)
} }
in, compressible, mimeType, err := checkCompressAndType(in) in, compressible, mimeType, err := checkCompressAndType(in, o.meta.Mode, o.f.modeHandler)
if err != nil { if err != nil {
return err return err
} }
@@ -1278,7 +1240,7 @@ func (o *Object) String() string {
// Remote returns the remote path // Remote returns the remote path
func (o *Object) Remote() string { func (o *Object) Remote() string {
origFileName, _, _, err := processFileName(o.Object.Remote()) origFileName, _, _, err := processFileName(o.Object.Remote(), o.f.modeHandler)
if err != nil { if err != nil {
fs.Errorf(o.f, "Could not get remote path for: %s", o.Object.Remote()) fs.Errorf(o.f, "Could not get remote path for: %s", o.Object.Remote())
return o.Object.Remote() return o.Object.Remote()
@@ -1381,7 +1343,6 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (rc io.Read
return o.Object.Open(ctx, options...) return o.Object.Open(ctx, options...)
} }
// Get offset and limit from OpenOptions, pass the rest to the underlying remote // Get offset and limit from OpenOptions, pass the rest to the underlying remote
var openOptions = []fs.OpenOption{&fs.SeekOption{Offset: 0}}
var offset, limit int64 = 0, -1 var offset, limit int64 = 0, -1
for _, option := range options { for _, option := range options {
switch x := option.(type) { switch x := option.(type) {
@@ -1389,31 +1350,12 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (rc io.Read
offset = x.Offset offset = x.Offset
case *fs.RangeOption: case *fs.RangeOption:
offset, limit = x.Decode(o.Size()) offset, limit = x.Decode(o.Size())
default:
openOptions = append(openOptions, option)
} }
} }
// Get a chunkedreader for the wrapped object // Get a chunkedreader for the wrapped object
chunkedReader := chunkedreader.New(ctx, o.Object, initialChunkSize, maxChunkSize, chunkStreams) chunkedReader := chunkedreader.New(ctx, o.Object, initialChunkSize, maxChunkSize, chunkStreams)
// Get file handle var retCloser io.Closer = chunkedReader
var file io.Reader return o.f.modeHandler.openGetReadCloser(ctx, o, offset, limit, chunkedReader, retCloser, options...)
if offset != 0 {
file, err = sgzip.NewReaderAt(chunkedReader, &o.meta.CompressionMetadata, offset)
} else {
file, err = sgzip.NewReader(chunkedReader)
}
if err != nil {
return nil, err
}
var fileReader io.Reader
if limit != -1 {
fileReader = io.LimitReader(file, limit)
} else {
fileReader = file
}
// Return a ReadCloser
return ReadCloserWrapper{Reader: fileReader, Closer: chunkedReader}, nil
} }
// ObjectInfo describes a wrapped fs.ObjectInfo for being the source // ObjectInfo describes a wrapped fs.ObjectInfo for being the source

View File

@@ -48,7 +48,27 @@ func TestRemoteGzip(t *testing.T) {
opt.ExtraConfig = []fstests.ExtraConfigItem{ opt.ExtraConfig = []fstests.ExtraConfigItem{
{Name: name, Key: "type", Value: "compress"}, {Name: name, Key: "type", Value: "compress"},
{Name: name, Key: "remote", Value: tempdir}, {Name: name, Key: "remote", Value: tempdir},
{Name: name, Key: "compression_mode", Value: "gzip"}, {Name: name, Key: "mode", Value: "gzip"},
{Name: name, Key: "level", Value: "-1"},
}
opt.QuickTestOK = true
fstests.Run(t, &opt)
}
// TestRemoteZstd tests ZSTD compression
func TestRemoteZstd(t *testing.T) {
if *fstest.RemoteName != "" {
t.Skip("Skipping as -remote set")
}
tempdir := filepath.Join(os.TempDir(), "rclone-compress-test-zstd")
name := "TestCompressZstd"
opt := defaultOpt
opt.RemoteName = name + ":"
opt.ExtraConfig = []fstests.ExtraConfigItem{
{Name: name, Key: "type", Value: "compress"},
{Name: name, Key: "remote", Value: tempdir},
{Name: name, Key: "mode", Value: "zstd"},
{Name: name, Key: "level", Value: "2"},
} }
opt.QuickTestOK = true opt.QuickTestOK = true
fstests.Run(t, &opt) fstests.Run(t, &opt)

View File

@@ -0,0 +1,207 @@
package compress
import (
"bufio"
"bytes"
"context"
"crypto/md5"
"encoding/hex"
"errors"
"io"
"github.com/buengese/sgzip"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/accounting"
"github.com/rclone/rclone/fs/chunkedreader"
"github.com/rclone/rclone/fs/hash"
)
// gzipModeHandler implements compressionModeHandler for gzip
type gzipModeHandler struct{}
// isCompressible checks the compression ratio of the provided data and returns true if the ratio exceeds
// the configured threshold
func (g *gzipModeHandler) isCompressible(r io.Reader, compressionMode int) (bool, error) {
var b bytes.Buffer
var n int64
w, err := sgzip.NewWriterLevel(&b, sgzip.DefaultCompression)
if err != nil {
return false, err
}
n, err = io.Copy(w, r)
if err != nil {
return false, err
}
err = w.Close()
if err != nil {
return false, err
}
ratio := float64(n) / float64(b.Len())
return ratio > minCompressionRatio, nil
}
// newObjectGetOriginalSize returns the original file size from the metadata
func (g *gzipModeHandler) newObjectGetOriginalSize(meta *ObjectMetadata) (int64, error) {
if meta.CompressionMetadataGzip == nil {
return 0, errors.New("missing gzip metadata")
}
return meta.CompressionMetadataGzip.Size, nil
}
// openGetReadCloser opens a compressed object and returns a ReadCloser in the Open method
func (g *gzipModeHandler) openGetReadCloser(
ctx context.Context,
o *Object,
offset int64,
limit int64,
cr chunkedreader.ChunkedReader,
closer io.Closer,
options ...fs.OpenOption,
) (rc io.ReadCloser, err error) {
var file io.Reader
if offset != 0 {
file, err = sgzip.NewReaderAt(cr, o.meta.CompressionMetadataGzip, offset)
} else {
file, err = sgzip.NewReader(cr)
}
if err != nil {
return nil, err
}
var fileReader io.Reader
if limit != -1 {
fileReader = io.LimitReader(file, limit)
} else {
fileReader = file
}
// Return a ReadCloser
return ReadCloserWrapper{Reader: fileReader, Closer: closer}, nil
}
// processFileNameGetFileExtension returns the file extension for the given compression mode
func (g *gzipModeHandler) processFileNameGetFileExtension(compressionMode int) string {
if compressionMode == Gzip {
return gzFileExt
}
return ""
}
// putCompress compresses the input data and uploads it to the remote, returning the new object and its metadata
func (g *gzipModeHandler) putCompress(
ctx context.Context,
f *Fs,
in io.Reader,
src fs.ObjectInfo,
options []fs.OpenOption,
mimeType string,
) (fs.Object, *ObjectMetadata, error) {
// Unwrap reader accounting
in, wrap := accounting.UnWrap(in)
// Add the metadata hasher
metaHasher := md5.New()
in = io.TeeReader(in, metaHasher)
// Compress the file
pipeReader, pipeWriter := io.Pipe()
resultsGzip := make(chan compressionResult[sgzip.GzipMetadata])
go func() {
gz, err := sgzip.NewWriterLevel(pipeWriter, f.opt.CompressionLevel)
if err != nil {
resultsGzip <- compressionResult[sgzip.GzipMetadata]{err: err, meta: sgzip.GzipMetadata{}}
close(resultsGzip)
return
}
_, err = io.Copy(gz, in)
gzErr := gz.Close()
if gzErr != nil && err == nil {
err = gzErr
}
closeErr := pipeWriter.Close()
if closeErr != nil && err == nil {
err = closeErr
}
resultsGzip <- compressionResult[sgzip.GzipMetadata]{err: err, meta: gz.MetaData()}
close(resultsGzip)
}()
wrappedIn := wrap(bufio.NewReaderSize(pipeReader, bufferSize)) // Probably no longer needed as sgzip has it's own buffering
// Find a hash the destination supports to compute a hash of
// the compressed data.
ht := f.Fs.Hashes().GetOne()
var hasher *hash.MultiHasher
var err error
if ht != hash.None {
// unwrap the accounting again
wrappedIn, wrap = accounting.UnWrap(wrappedIn)
hasher, err = hash.NewMultiHasherTypes(hash.NewHashSet(ht))
if err != nil {
return nil, nil, err
}
// add the hasher and re-wrap the accounting
wrappedIn = io.TeeReader(wrappedIn, hasher)
wrappedIn = wrap(wrappedIn)
}
// Transfer the data
o, err := f.rcat(ctx, makeDataName(src.Remote(), src.Size(), f.mode), io.NopCloser(wrappedIn), src.ModTime(ctx), options)
if err != nil {
if o != nil {
if removeErr := o.Remove(ctx); removeErr != nil {
fs.Errorf(o, "Failed to remove partially transferred object: %v", removeErr)
}
}
return nil, nil, err
}
// Check whether we got an error during compression
result := <-resultsGzip
if result.err != nil {
if o != nil {
if removeErr := o.Remove(ctx); removeErr != nil {
fs.Errorf(o, "Failed to remove partially compressed object: %v", removeErr)
}
}
return nil, nil, result.err
}
// Generate metadata
meta := g.newMetadata(result.meta.Size, f.mode, result.meta, hex.EncodeToString(metaHasher.Sum(nil)), mimeType)
// Check the hashes of the compressed data if we were comparing them
if ht != hash.None && hasher != nil {
err = f.verifyObjectHash(ctx, o, hasher, ht)
if err != nil {
return nil, nil, err
}
}
return o, meta, nil
}
// putUncompressGetNewMetadata returns metadata in the putUncompress method for a specific compression algorithm
func (g *gzipModeHandler) putUncompressGetNewMetadata(o fs.Object, mode int, md5 string, mimeType string, sum []byte) (fs.Object, *ObjectMetadata, error) {
return o, g.newMetadata(o.Size(), mode, sgzip.GzipMetadata{}, hex.EncodeToString(sum), mimeType), nil
}
// This function generates a metadata object for sgzip.GzipMetadata or SzstdMetadata.
// Warning: This function panics if cmeta is not of the expected type.
func (g *gzipModeHandler) newMetadata(size int64, mode int, cmeta any, md5 string, mimeType string) *ObjectMetadata {
meta, ok := cmeta.(sgzip.GzipMetadata)
if !ok {
panic("invalid cmeta type: expected sgzip.GzipMetadata")
}
objMeta := new(ObjectMetadata)
objMeta.Size = size
objMeta.Mode = mode
objMeta.CompressionMetadataGzip = &meta
objMeta.CompressionMetadataZstd = nil
objMeta.MD5 = md5
objMeta.MimeType = mimeType
return objMeta
}

View File

@@ -0,0 +1,327 @@
package compress
import (
"context"
"errors"
"io"
"runtime"
"sync"
szstd "github.com/a1ex3/zstd-seekable-format-go/pkg"
"github.com/klauspost/compress/zstd"
)
const szstdChunkSize int = 1 << 20 // 1 MiB chunk size
// SzstdMetadata holds metadata for szstd compressed files.
type SzstdMetadata struct {
BlockSize int // BlockSize is the size of the blocks in the zstd file
Size int64 // Size is the uncompressed size of the file
BlockData []uint32 // BlockData is the block data for the zstd file, used for seeking
}
// SzstdWriter is a writer that compresses data in szstd format.
type SzstdWriter struct {
enc *zstd.Encoder
w szstd.ConcurrentWriter
metadata SzstdMetadata
mu sync.Mutex
}
// NewWriterSzstd creates a new szstd writer with the specified options.
// It initializes the szstd writer with a zstd encoder and returns a pointer to the SzstdWriter.
// The writer can be used to write data in chunks, and it will automatically handle block sizes and metadata.
func NewWriterSzstd(w io.Writer, opts ...zstd.EOption) (*SzstdWriter, error) {
encoder, err := zstd.NewWriter(nil, opts...)
if err != nil {
return nil, err
}
sw, err := szstd.NewWriter(w, encoder)
if err != nil {
if err := encoder.Close(); err != nil {
return nil, err
}
return nil, err
}
return &SzstdWriter{
enc: encoder,
w: sw,
metadata: SzstdMetadata{
BlockSize: szstdChunkSize,
Size: 0,
},
}, nil
}
// Write writes data to the szstd writer in chunks of szstdChunkSize.
// It handles the block size and metadata updates automatically.
func (w *SzstdWriter) Write(p []byte) (int, error) {
if len(p) == 0 {
return 0, nil
}
if w.metadata.BlockData == nil {
numBlocks := (len(p) + w.metadata.BlockSize - 1) / w.metadata.BlockSize
w.metadata.BlockData = make([]uint32, 1, numBlocks+1)
w.metadata.BlockData[0] = 0
}
start := 0
total := len(p)
var writerFunc szstd.FrameSource = func() ([]byte, error) {
if start >= total {
return nil, nil
}
end := min(start+w.metadata.BlockSize, total)
chunk := p[start:end]
size := end - start
w.mu.Lock()
w.metadata.Size += int64(size)
w.mu.Unlock()
start = end
return chunk, nil
}
// write sizes of compressed blocks in the callback
err := w.w.WriteMany(context.Background(), writerFunc,
szstd.WithWriteCallback(func(size uint32) {
w.mu.Lock()
lastOffset := w.metadata.BlockData[len(w.metadata.BlockData)-1]
w.metadata.BlockData = append(w.metadata.BlockData, lastOffset+size)
w.mu.Unlock()
}),
)
if err != nil {
return 0, err
}
return total, nil
}
// Close closes the SzstdWriter and its underlying encoder.
func (w *SzstdWriter) Close() error {
if err := w.w.Close(); err != nil {
return err
}
if err := w.enc.Close(); err != nil {
return err
}
return nil
}
// GetMetadata returns the metadata of the szstd writer.
func (w *SzstdWriter) GetMetadata() SzstdMetadata {
return w.metadata
}
// SzstdReaderAt is a reader that allows random access in szstd compressed data.
type SzstdReaderAt struct {
r szstd.Reader
decoder *zstd.Decoder
metadata *SzstdMetadata
pos int64
mu sync.Mutex
}
// NewReaderAtSzstd creates a new SzstdReaderAt at the specified io.ReadSeeker.
func NewReaderAtSzstd(rs io.ReadSeeker, meta *SzstdMetadata, offset int64, opts ...zstd.DOption) (*SzstdReaderAt, error) {
decoder, err := zstd.NewReader(nil, opts...)
if err != nil {
return nil, err
}
r, err := szstd.NewReader(rs, decoder)
if err != nil {
decoder.Close()
return nil, err
}
sr := &SzstdReaderAt{
r: r,
decoder: decoder,
metadata: meta,
pos: 0,
}
// Set initial position to the provided offset
if _, err := sr.Seek(offset, io.SeekStart); err != nil {
if err := sr.Close(); err != nil {
return nil, err
}
return nil, err
}
return sr, nil
}
// Seek sets the offset for the next Read.
func (s *SzstdReaderAt) Seek(offset int64, whence int) (int64, error) {
s.mu.Lock()
defer s.mu.Unlock()
pos, err := s.r.Seek(offset, whence)
if err == nil {
s.pos = pos
}
return pos, err
}
func (s *SzstdReaderAt) Read(p []byte) (int, error) {
s.mu.Lock()
defer s.mu.Unlock()
n, err := s.r.Read(p)
if err == nil {
s.pos += int64(n)
}
return n, err
}
// ReadAt reads data at the specified offset.
func (s *SzstdReaderAt) ReadAt(p []byte, off int64) (int, error) {
if off < 0 {
return 0, errors.New("invalid offset")
}
if off >= s.metadata.Size {
return 0, io.EOF
}
endOff := min(off+int64(len(p)), s.metadata.Size)
// Find all blocks covered by the range
type blockInfo struct {
index int // Block index
offsetInBlock int64 // Offset within the block for starting reading
bytesToRead int64 // How many bytes to read from this block
}
var blocks []blockInfo
uncompressedOffset := int64(0)
currentOff := off
for i := 0; i < len(s.metadata.BlockData)-1; i++ {
blockUncompressedEnd := min(uncompressedOffset+int64(s.metadata.BlockSize), s.metadata.Size)
if currentOff < blockUncompressedEnd && endOff > uncompressedOffset {
offsetInBlock := max(0, currentOff-uncompressedOffset)
bytesToRead := min(blockUncompressedEnd-uncompressedOffset-offsetInBlock, endOff-currentOff)
blocks = append(blocks, blockInfo{
index: i,
offsetInBlock: offsetInBlock,
bytesToRead: bytesToRead,
})
currentOff += bytesToRead
if currentOff >= endOff {
break
}
}
uncompressedOffset = blockUncompressedEnd
}
if len(blocks) == 0 {
return 0, io.EOF
}
// Parallel block decoding
type decodeResult struct {
index int
data []byte
err error
}
resultCh := make(chan decodeResult, len(blocks))
var wg sync.WaitGroup
sem := make(chan struct{}, runtime.NumCPU())
for _, block := range blocks {
wg.Add(1)
go func(block blockInfo) {
defer wg.Done()
sem <- struct{}{}
defer func() { <-sem }()
startOffset := int64(s.metadata.BlockData[block.index])
endOffset := int64(s.metadata.BlockData[block.index+1])
compressedSize := endOffset - startOffset
compressed := make([]byte, compressedSize)
_, err := s.r.ReadAt(compressed, startOffset)
if err != nil && err != io.EOF {
resultCh <- decodeResult{index: block.index, err: err}
return
}
decoded, err := s.decoder.DecodeAll(compressed, nil)
if err != nil {
resultCh <- decodeResult{index: block.index, err: err}
return
}
resultCh <- decodeResult{index: block.index, data: decoded, err: nil}
}(block)
}
go func() {
wg.Wait()
close(resultCh)
}()
// Collect results in block index order
totalRead := 0
results := make(map[int]decodeResult)
expected := len(blocks)
minIndex := blocks[0].index
for res := range resultCh {
results[res.index] = res
for {
if result, ok := results[minIndex]; ok {
if result.err != nil {
return 0, result.err
}
// find the corresponding blockInfo
var blk blockInfo
for _, b := range blocks {
if b.index == result.index {
blk = b
break
}
}
start := blk.offsetInBlock
end := start + blk.bytesToRead
copy(p[totalRead:totalRead+int(blk.bytesToRead)], result.data[start:end])
totalRead += int(blk.bytesToRead)
minIndex++
if minIndex-blocks[0].index >= len(blocks) {
break
}
} else {
break
}
}
if len(results) == expected && minIndex-blocks[0].index >= len(blocks) {
break
}
}
return totalRead, nil
}
// Close closes the SzstdReaderAt and underlying decoder.
func (s *SzstdReaderAt) Close() error {
if err := s.r.Close(); err != nil {
return err
}
s.decoder.Close()
return nil
}

View File

@@ -0,0 +1,65 @@
package compress
import (
"context"
"fmt"
"io"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/chunkedreader"
)
// uncompressedModeHandler implements compressionModeHandler for uncompressed files
type uncompressedModeHandler struct{}
// isCompressible checks the compression ratio of the provided data and returns true if the ratio exceeds
// the configured threshold
func (u *uncompressedModeHandler) isCompressible(r io.Reader, compressionMode int) (bool, error) {
return false, nil
}
// newObjectGetOriginalSize returns the original file size from the metadata
func (u *uncompressedModeHandler) newObjectGetOriginalSize(meta *ObjectMetadata) (int64, error) {
return 0, nil
}
// openGetReadCloser opens a compressed object and returns a ReadCloser in the Open method
func (u *uncompressedModeHandler) openGetReadCloser(
ctx context.Context,
o *Object,
offset int64,
limit int64,
cr chunkedreader.ChunkedReader,
closer io.Closer,
options ...fs.OpenOption,
) (rc io.ReadCloser, err error) {
return o.Object.Open(ctx, options...)
}
// processFileNameGetFileExtension returns the file extension for the given compression mode
func (u *uncompressedModeHandler) processFileNameGetFileExtension(compressionMode int) string {
return ""
}
// putCompress compresses the input data and uploads it to the remote, returning the new object and its metadata
func (u *uncompressedModeHandler) putCompress(
ctx context.Context,
f *Fs,
in io.Reader,
src fs.ObjectInfo,
options []fs.OpenOption,
mimeType string,
) (fs.Object, *ObjectMetadata, error) {
return nil, nil, fmt.Errorf("unsupported compression mode %d", f.mode)
}
// putUncompressGetNewMetadata returns metadata in the putUncompress method for a specific compression algorithm
func (u *uncompressedModeHandler) putUncompressGetNewMetadata(o fs.Object, mode int, md5 string, mimeType string, sum []byte) (fs.Object, *ObjectMetadata, error) {
return nil, nil, fmt.Errorf("unsupported compression mode %d", Uncompressed)
}
// This function generates a metadata object for sgzip.GzipMetadata or SzstdMetadata.
// Warning: This function panics if cmeta is not of the expected type.
func (u *uncompressedModeHandler) newMetadata(size int64, mode int, cmeta any, md5 string, mimeType string) *ObjectMetadata {
return nil
}

View File

@@ -0,0 +1,65 @@
package compress
import (
"context"
"fmt"
"io"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/chunkedreader"
)
// unknownModeHandler implements compressionModeHandler for unknown compression types
type unknownModeHandler struct{}
// isCompressible checks the compression ratio of the provided data and returns true if the ratio exceeds
// the configured threshold
func (unk *unknownModeHandler) isCompressible(r io.Reader, compressionMode int) (bool, error) {
return false, fmt.Errorf("unknown compression mode %d", compressionMode)
}
// newObjectGetOriginalSize returns the original file size from the metadata
func (unk *unknownModeHandler) newObjectGetOriginalSize(meta *ObjectMetadata) (int64, error) {
return 0, nil
}
// openGetReadCloser opens a compressed object and returns a ReadCloser in the Open method
func (unk *unknownModeHandler) openGetReadCloser(
ctx context.Context,
o *Object,
offset int64,
limit int64,
cr chunkedreader.ChunkedReader,
closer io.Closer,
options ...fs.OpenOption,
) (rc io.ReadCloser, err error) {
return nil, fmt.Errorf("unknown compression mode %d", o.meta.Mode)
}
// processFileNameGetFileExtension returns the file extension for the given compression mode
func (unk *unknownModeHandler) processFileNameGetFileExtension(compressionMode int) string {
return ""
}
// putCompress compresses the input data and uploads it to the remote, returning the new object and its metadata
func (unk *unknownModeHandler) putCompress(
ctx context.Context,
f *Fs,
in io.Reader,
src fs.ObjectInfo,
options []fs.OpenOption,
mimeType string,
) (fs.Object, *ObjectMetadata, error) {
return nil, nil, fmt.Errorf("unknown compression mode %d", f.mode)
}
// putUncompressGetNewMetadata returns metadata in the putUncompress method for a specific compression algorithm
func (unk *unknownModeHandler) putUncompressGetNewMetadata(o fs.Object, mode int, md5 string, mimeType string, sum []byte) (fs.Object, *ObjectMetadata, error) {
return nil, nil, fmt.Errorf("unknown compression mode")
}
// This function generates a metadata object for sgzip.GzipMetadata or SzstdMetadata.
// Warning: This function panics if cmeta is not of the expected type.
func (unk *unknownModeHandler) newMetadata(size int64, mode int, cmeta any, md5 string, mimeType string) *ObjectMetadata {
return nil
}

View File

@@ -0,0 +1,192 @@
package compress
import (
"bufio"
"bytes"
"context"
"crypto/md5"
"encoding/hex"
"errors"
"io"
"github.com/klauspost/compress/zstd"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/accounting"
"github.com/rclone/rclone/fs/chunkedreader"
"github.com/rclone/rclone/fs/hash"
)
// zstdModeHandler implements compressionModeHandler for zstd
type zstdModeHandler struct{}
// isCompressible checks the compression ratio of the provided data and returns true if the ratio exceeds
// the configured threshold
func (z *zstdModeHandler) isCompressible(r io.Reader, compressionMode int) (bool, error) {
var b bytes.Buffer
var n int64
w, err := NewWriterSzstd(&b, zstd.WithEncoderLevel(zstd.SpeedDefault))
if err != nil {
return false, err
}
n, err = io.Copy(w, r)
if err != nil {
return false, err
}
err = w.Close()
if err != nil {
return false, err
}
ratio := float64(n) / float64(b.Len())
return ratio > minCompressionRatio, nil
}
// newObjectGetOriginalSize returns the original file size from the metadata
func (z *zstdModeHandler) newObjectGetOriginalSize(meta *ObjectMetadata) (int64, error) {
if meta.CompressionMetadataZstd == nil {
return 0, errors.New("missing zstd metadata")
}
return meta.CompressionMetadataZstd.Size, nil
}
// openGetReadCloser opens a compressed object and returns a ReadCloser in the Open method
func (z *zstdModeHandler) openGetReadCloser(
ctx context.Context,
o *Object,
offset int64,
limit int64,
cr chunkedreader.ChunkedReader,
closer io.Closer,
options ...fs.OpenOption,
) (rc io.ReadCloser, err error) {
var file io.Reader
if offset != 0 {
file, err = NewReaderAtSzstd(cr, o.meta.CompressionMetadataZstd, offset)
} else {
file, err = zstd.NewReader(cr)
}
if err != nil {
return nil, err
}
var fileReader io.Reader
if limit != -1 {
fileReader = io.LimitReader(file, limit)
} else {
fileReader = file
}
// Return a ReadCloser
return ReadCloserWrapper{Reader: fileReader, Closer: closer}, nil
}
// processFileNameGetFileExtension returns the file extension for the given compression mode
func (z *zstdModeHandler) processFileNameGetFileExtension(compressionMode int) string {
if compressionMode == Zstd {
return zstdFileExt
}
return ""
}
// putCompress compresses the input data and uploads it to the remote, returning the new object and its metadata
func (z *zstdModeHandler) putCompress(
ctx context.Context,
f *Fs,
in io.Reader,
src fs.ObjectInfo,
options []fs.OpenOption,
mimeType string,
) (fs.Object, *ObjectMetadata, error) {
// Unwrap reader accounting
in, wrap := accounting.UnWrap(in)
// Add the metadata hasher
metaHasher := md5.New()
in = io.TeeReader(in, metaHasher)
// Compress the file
pipeReader, pipeWriter := io.Pipe()
resultsZstd := make(chan compressionResult[SzstdMetadata])
go func() {
writer, err := NewWriterSzstd(pipeWriter, zstd.WithEncoderLevel(zstd.EncoderLevel(f.opt.CompressionLevel)))
if err != nil {
resultsZstd <- compressionResult[SzstdMetadata]{err: err}
close(resultsZstd)
return
}
_, err = io.Copy(writer, in)
if wErr := writer.Close(); wErr != nil && err == nil {
err = wErr
}
if cErr := pipeWriter.Close(); cErr != nil && err == nil {
err = cErr
}
resultsZstd <- compressionResult[SzstdMetadata]{err: err, meta: writer.GetMetadata()}
close(resultsZstd)
}()
wrappedIn := wrap(bufio.NewReaderSize(pipeReader, bufferSize))
ht := f.Fs.Hashes().GetOne()
var hasher *hash.MultiHasher
var err error
if ht != hash.None {
wrappedIn, wrap = accounting.UnWrap(wrappedIn)
hasher, err = hash.NewMultiHasherTypes(hash.NewHashSet(ht))
if err != nil {
return nil, nil, err
}
wrappedIn = io.TeeReader(wrappedIn, hasher)
wrappedIn = wrap(wrappedIn)
}
o, err := f.rcat(ctx, makeDataName(src.Remote(), src.Size(), f.mode), io.NopCloser(wrappedIn), src.ModTime(ctx), options)
if err != nil {
return nil, nil, err
}
result := <-resultsZstd
if result.err != nil {
if o != nil {
_ = o.Remove(ctx)
}
return nil, nil, result.err
}
// Build metadata using uncompressed size for filename
meta := z.newMetadata(result.meta.Size, f.mode, result.meta, hex.EncodeToString(metaHasher.Sum(nil)), mimeType)
if ht != hash.None && hasher != nil {
err = f.verifyObjectHash(ctx, o, hasher, ht)
if err != nil {
return nil, nil, err
}
}
return o, meta, nil
}
// putUncompressGetNewMetadata returns metadata in the putUncompress method for a specific compression algorithm
func (z *zstdModeHandler) putUncompressGetNewMetadata(o fs.Object, mode int, md5 string, mimeType string, sum []byte) (fs.Object, *ObjectMetadata, error) {
return o, z.newMetadata(o.Size(), mode, SzstdMetadata{}, hex.EncodeToString(sum), mimeType), nil
}
// This function generates a metadata object for sgzip.GzipMetadata or SzstdMetadata.
// Warning: This function panics if cmeta is not of the expected type.
func (z *zstdModeHandler) newMetadata(size int64, mode int, cmeta any, md5 string, mimeType string) *ObjectMetadata {
meta, ok := cmeta.(SzstdMetadata)
if !ok {
panic("invalid cmeta type: expected SzstdMetadata")
}
objMeta := new(ObjectMetadata)
objMeta.Size = size
objMeta.Mode = mode
objMeta.CompressionMetadataGzip = nil
objMeta.CompressionMetadataZstd = &meta
objMeta.MD5 = md5
objMeta.MimeType = mimeType
return objMeta
}

View File

@@ -23,6 +23,7 @@ To use this remote, all you need to do is specify another remote and a
compression mode to use: compression mode to use:
```text ```text
$ rclone config
Current remotes: Current remotes:
Name Type Name Type
@@ -30,7 +31,6 @@ Name Type
remote_to_press sometype remote_to_press sometype
e) Edit existing remote e) Edit existing remote
$ rclone config
n) New remote n) New remote
d) Delete remote d) Delete remote
r) Rename remote r) Rename remote
@@ -39,45 +39,74 @@ s) Set configuration password
q) Quit config q) Quit config
e/n/d/r/c/s/q> n e/n/d/r/c/s/q> n
name> compress name> compress
Option Storage.
Type of storage to configure.
Choose a number from below, or type in your own value.
... ...
8 / Compress a remote 12 / Compress a remote
\ "compress" \ (compress)
... ...
Storage> compress Storage> compress
** See help for compress backend at: https://rclone.org/compress/ **
Option remote.
Remote to compress. Remote to compress.
Enter a string value. Press Enter for the default (""). Enter a value.
remote> remote_to_press:subdir remote> remote_to_press:subdir
Option mode.
Compression mode. Compression mode.
Enter a string value. Press Enter for the default ("gzip"). Choose a number from below, or type in your own value of type string.
Choose a number from below, or type in your own value Press Enter for the default (gzip).
1 / Gzip compression balanced for speed and compression strength. 1 / Standard gzip compression with fastest parameters.
\ "gzip" \ (gzip)
compression_mode> gzip 2 / Zstandard compression — fast modern algorithm offering adjustable speed-to-compression tradeoffs.
Edit advanced config? (y/n) \ (zstd)
mode> gzip
Option level.
GZIP (levels -2 to 9):
- -2 — Huffman encoding only. Only use if you know what you're doing.
- -1 (default) — recommended; equivalent to level 5.
- 0 — turns off compression.
- 19 — increase compression at the cost of speed. Going past 6 generally offers very little return.
ZSTD (levels 0 to 4):
- 0 — turns off compression entirely.
- 1 — fastest compression with the lowest ratio.
- 2 (default) — good balance of speed and compression.
- 3 — better compression, but uses about 23x more CPU than the default.
- 4 — best possible compression ratio (highest CPU cost).
Notes:
- Choose GZIP for wide compatibility; ZSTD for better speed/ratio tradeoffs.
- Negative gzip levels: -2 = Huffman-only, -1 = default (≈ level 5).
Enter a value.
level> -1
Edit advanced config?
y) Yes y) Yes
n) No (default) n) No (default)
y/n> n y/n> n
Remote config
-------------------- Configuration complete.
[compress] Options:
type = compress - type: compress
remote = remote_to_press:subdir - remote: remote_to_press:subdir
compression_mode = gzip - mode: gzip
-------------------- - level: -1
Keep this "compress" remote?
y) Yes this is OK (default) y) Yes this is OK (default)
e) Edit this remote e) Edit this remote
d) Delete this remote d) Delete this remote
y/e/d> y y/e/d> y
``` ```
### Compression Modes ### Compression Algorithms
Currently only gzip compression is supported. It provides a decent balance - **GZIP** a well-established and widely adopted algorithm that strikes a solid balance between compression speed and ratio. It supports compression levels from -2 to 9, with the default -1 (roughly equivalent to level 5) offering an effective middle ground for most scenarios.
between speed and size and is well supported by other applications. Compression
strength can further be configured via an advanced setting where 0 is no - **Zstandard (zstd)** a modern, high-performance algorithm that offers precise control over the trade-off between speed and compression efficiency. Compression levels range from 0 (no compression) to 4 (maximum compression).
compression and 9 is strongest compression.
### File types ### File types
@@ -124,29 +153,38 @@ Properties:
- Examples: - Examples:
- "gzip" - "gzip"
- Standard gzip compression with fastest parameters. - Standard gzip compression with fastest parameters.
- "zstd"
### Advanced options - Zstandard compression — fast modern algorithm offering adjustable speed-to-compression tradeoffs.
Here are the Advanced options specific to compress (Compress a remote).
#### --compress-level #### --compress-level
GZIP compression level (-2 to 9). GZIP (levels -2 to 9):
- -2 — Huffman encoding only. Only use if you know what you're doing.
Generally -1 (default, equivalent to 5) is recommended. - -1 (default) — recommended; equivalent to level 5.
Levels 1 to 9 increase compression at the cost of speed. Going past 6 - 0 — turns off compression.
generally offers very little return. - 19 — increase compression at the cost of speed. Going past 6 generally offers very little return.
Level -2 uses Huffman encoding only. Only use if you know what you ZSTD (levels 0 to 4):
are doing. - 0 — turns off compression entirely.
Level 0 turns off compression. - 1 — fastest compression with the lowest ratio.
- 2 (default) — good balance of speed and compression.
- 3 — better compression, but uses about 23x more CPU than the default.
- 4 — best possible compression ratio (highest CPU cost).
Notes:
- Choose GZIP for wide compatibility; ZSTD for better speed/ratio tradeoffs.
- Negative gzip levels: -2 = Huffman-only, -1 = default (≈ level 5).
Properties: Properties:
- Config: level - Config: level
- Env Var: RCLONE_COMPRESS_LEVEL - Env Var: RCLONE_COMPRESS_LEVEL
- Type: int - Type: string
- Default: -1 - Required: true
### Advanced options
Here are the Advanced options specific to compress (Compress a remote).
#### --compress-ram-cache-limit #### --compress-ram-cache-limit

View File

@@ -133,6 +133,9 @@ backends:
- backend: "compress" - backend: "compress"
remote: "TestCompress:" remote: "TestCompress:"
fastlist: false fastlist: false
- backend: "compress"
remote: "TestCompressZstd:"
fastlist: false
# - backend: "compress" # - backend: "compress"
# remote: "TestCompressSwift:" # remote: "TestCompressSwift:"
# fastlist: false # fastlist: false

4
go.mod
View File

@@ -1,6 +1,6 @@
module github.com/rclone/rclone module github.com/rclone/rclone
go 1.24.0 go 1.24.4
require ( require (
bazil.org/fuse v0.0.0-20230120002735-62a210ff1fd5 bazil.org/fuse v0.0.0-20230120002735-62a210ff1fd5
@@ -11,6 +11,7 @@ require (
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358
github.com/Files-com/files-sdk-go/v3 v3.2.242 github.com/Files-com/files-sdk-go/v3 v3.2.242
github.com/Max-Sum/base32768 v0.0.0-20230304063302-18e6ce5945fd github.com/Max-Sum/base32768 v0.0.0-20230304063302-18e6ce5945fd
github.com/a1ex3/zstd-seekable-format-go/pkg v0.10.0
github.com/a8m/tree v0.0.0-20240104212747-2c8764a5f17e github.com/a8m/tree v0.0.0-20240104212747-2c8764a5f17e
github.com/aalpar/deheap v0.0.0-20210914013432-0cc84d79dec3 github.com/aalpar/deheap v0.0.0-20210914013432-0cc84d79dec3
github.com/abbot/go-http-auth v0.4.0 github.com/abbot/go-http-auth v0.4.0
@@ -173,6 +174,7 @@ require (
github.com/gofrs/flock v0.12.1 // indirect github.com/gofrs/flock v0.12.1 // indirect
github.com/gogo/protobuf v1.3.2 // indirect github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang-jwt/jwt/v5 v5.3.0 // indirect github.com/golang-jwt/jwt/v5 v5.3.0 // indirect
github.com/google/btree v1.1.3 // indirect
github.com/google/s2a-go v0.1.9 // indirect github.com/google/s2a-go v0.1.9 // indirect
github.com/googleapis/enterprise-certificate-proxy v0.3.6 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.6 // indirect
github.com/googleapis/gax-go/v2 v2.15.0 // indirect github.com/googleapis/gax-go/v2 v2.15.0 // indirect

4
go.sum
View File

@@ -90,6 +90,8 @@ github.com/PuerkitoBio/goquery v1.10.3 h1:pFYcNSqHxBD06Fpj/KsbStFRsgRATgnf3LeXiU
github.com/PuerkitoBio/goquery v1.10.3/go.mod h1:tMUX0zDMHXYlAQk6p35XxQMqMweEKB7iK7iLNd4RH4Y= github.com/PuerkitoBio/goquery v1.10.3/go.mod h1:tMUX0zDMHXYlAQk6p35XxQMqMweEKB7iK7iLNd4RH4Y=
github.com/STARRY-S/zip v0.2.3 h1:luE4dMvRPDOWQdeDdUxUoZkzUIpTccdKdhHHsQJ1fm4= github.com/STARRY-S/zip v0.2.3 h1:luE4dMvRPDOWQdeDdUxUoZkzUIpTccdKdhHHsQJ1fm4=
github.com/STARRY-S/zip v0.2.3/go.mod h1:lqJ9JdeRipyOQJrYSOtpNAiaesFO6zVDsE8GIGFaoSk= github.com/STARRY-S/zip v0.2.3/go.mod h1:lqJ9JdeRipyOQJrYSOtpNAiaesFO6zVDsE8GIGFaoSk=
github.com/a1ex3/zstd-seekable-format-go/pkg v0.10.0 h1:iLDOF0rdGTrol/q8OfPIIs5kLD8XvA2q75o6Uq/tgak=
github.com/a1ex3/zstd-seekable-format-go/pkg v0.10.0/go.mod h1:DrEWcQJjz7t5iF2duaiyhg4jyoF0kxOD6LtECNGkZ/Q=
github.com/a8m/tree v0.0.0-20240104212747-2c8764a5f17e h1:KMVieI1/Ub++GYfnhyFPoGE3g5TUiG4srE3TMGr5nM4= github.com/a8m/tree v0.0.0-20240104212747-2c8764a5f17e h1:KMVieI1/Ub++GYfnhyFPoGE3g5TUiG4srE3TMGr5nM4=
github.com/a8m/tree v0.0.0-20240104212747-2c8764a5f17e/go.mod h1:j5astEcUkZQX8lK+KKlQ3NRQ50f4EE8ZjyZpCz3mrH4= github.com/a8m/tree v0.0.0-20240104212747-2c8764a5f17e/go.mod h1:j5astEcUkZQX8lK+KKlQ3NRQ50f4EE8ZjyZpCz3mrH4=
github.com/aalpar/deheap v0.0.0-20210914013432-0cc84d79dec3 h1:hhdWprfSpFbN7lz3W1gM40vOgvSh1WCSMxYD6gGB4Hs= github.com/aalpar/deheap v0.0.0-20210914013432-0cc84d79dec3 h1:hhdWprfSpFbN7lz3W1gM40vOgvSh1WCSMxYD6gGB4Hs=
@@ -344,6 +346,8 @@ github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg=
github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=