1
0
mirror of https://github.com/rclone/rclone.git synced 2025-12-06 00:03:32 +00:00

backend/compress: add zstd compression

Added support for reading and writing zstd-compressed archives in seekable format
using "github.com/klauspost/compress/zstd" and
"github.com/SaveTheRbtz/zstd-seekable-format-go/pkg".

Bumped Go version from 1.24.0 to 1.24.4 due to requirements of
"github.com/SaveTheRbtz/zstd-seekable-format-go/pkg".
This commit is contained in:
Alex
2025-11-04 21:50:56 +07:00
committed by GitHub
parent ee92673e1b
commit 8f74e7d331
11 changed files with 1093 additions and 228 deletions

View File

@@ -2,10 +2,8 @@
package compress
import (
"bufio"
"bytes"
"context"
"crypto/md5"
"encoding/base64"
"encoding/binary"
"encoding/hex"
@@ -46,6 +44,7 @@ const (
minCompressionRatio = 1.1
gzFileExt = ".gz"
zstdFileExt = ".zst"
metaFileExt = ".json"
uncompressedFileExt = ".bin"
)
@@ -54,6 +53,7 @@ const (
const (
Uncompressed = 0
Gzip = 2
Zstd = 4
)
var nameRegexp = regexp.MustCompile(`^(.+?)\.([A-Za-z0-9-_]{11})$`)
@@ -66,6 +66,10 @@ func init() {
Value: "gzip",
Help: "Standard gzip compression with fastest parameters.",
},
{
Value: "zstd",
Help: "Zstandard compression — fast modern algorithm offering adjustable speed-to-compression tradeoffs.",
},
}
// Register our remote
@@ -87,17 +91,23 @@ func init() {
Examples: compressionModeOptions,
}, {
Name: "level",
Help: `GZIP compression level (-2 to 9).
Generally -1 (default, equivalent to 5) is recommended.
Levels 1 to 9 increase compression at the cost of speed. Going past 6
generally offers very little return.
Level -2 uses Huffman encoding only. Only use if you know what you
are doing.
Level 0 turns off compression.`,
Default: sgzip.DefaultCompression,
Advanced: true,
Help: `GZIP (levels -2 to 9):
- -2 — Huffman encoding only. Only use if you know what you're doing.
- -1 (default) — recommended; equivalent to level 5.
- 0 — turns off compression.
- 19 — increase compression at the cost of speed. Going past 6 generally offers very little return.
ZSTD (levels 0 to 4):
- 0 — turns off compression entirely.
- 1 — fastest compression with the lowest ratio.
- 2 (default) — good balance of speed and compression.
- 3 — better compression, but uses about 23x more CPU than the default.
- 4 — best possible compression ratio (highest CPU cost).
Notes:
- Choose GZIP for wide compatibility; ZSTD for better speed/ratio tradeoffs.
- Negative gzip levels: -2 = Huffman-only, -1 = default (≈ level 5).`,
Required: true,
}, {
Name: "ram_cache_limit",
Help: `Some remotes don't allow the upload of files with unknown size.
@@ -112,6 +122,47 @@ this limit will be cached on disk.`,
})
}
// compressionModeHandler defines the interface for handling different compression modes
type compressionModeHandler interface {
// processFileNameGetFileExtension returns the file extension for the given compression mode
processFileNameGetFileExtension(compressionMode int) string
// newObjectGetOriginalSize returns the original file size from the metadata
newObjectGetOriginalSize(meta *ObjectMetadata) (int64, error)
// isCompressible checks the compression ratio of the provided data and returns true if the ratio exceeds
// the configured threshold
isCompressible(r io.Reader, compressionMode int) (bool, error)
// putCompress compresses the input data and uploads it to the remote, returning the new object and its metadata
putCompress(
ctx context.Context,
f *Fs,
in io.Reader,
src fs.ObjectInfo,
options []fs.OpenOption,
mimeType string,
) (fs.Object, *ObjectMetadata, error)
// openGetReadCloser opens a compressed object and returns a ReadCloser in the Open method
openGetReadCloser(
ctx context.Context,
o *Object,
offset int64,
limit int64,
cr chunkedreader.ChunkedReader,
closer io.Closer,
options ...fs.OpenOption,
) (rc io.ReadCloser, err error)
// putUncompressGetNewMetadata returns metadata in the putUncompress method for a specific compression algorithm
putUncompressGetNewMetadata(o fs.Object, mode int, md5 string, mimeType string, sum []byte) (fs.Object, *ObjectMetadata, error)
// This function generates a metadata object for sgzip.GzipMetadata or SzstdMetadata.
// Warning: This function panics if cmeta is not of the expected type.
newMetadata(size int64, mode int, cmeta any, md5 string, mimeType string) *ObjectMetadata
}
// Options defines the configuration for this backend
type Options struct {
Remote string `config:"remote"`
@@ -125,12 +176,13 @@ type Options struct {
// Fs represents a wrapped fs.Fs
type Fs struct {
fs.Fs
wrapper fs.Fs
name string
root string
opt Options
mode int // compression mode id
features *fs.Features // optional features
wrapper fs.Fs
name string
root string
opt Options
mode int // compression mode id
features *fs.Features // optional features
modeHandler compressionModeHandler // compression mode handler
}
// NewFs constructs an Fs from the path, container:path
@@ -167,13 +219,28 @@ func NewFs(ctx context.Context, name, rpath string, m configmap.Mapper) (fs.Fs,
return nil, fmt.Errorf("failed to make remote %s:%q to wrap: %w", wName, remotePath, err)
}
compressionMode := compressionModeFromName(opt.CompressionMode)
var modeHandler compressionModeHandler
switch compressionMode {
case Gzip:
modeHandler = &gzipModeHandler{}
case Zstd:
modeHandler = &zstdModeHandler{}
case Uncompressed:
modeHandler = &uncompressedModeHandler{}
default:
modeHandler = &unknownModeHandler{}
}
// Create the wrapping fs
f := &Fs{
Fs: wrappedFs,
name: name,
root: rpath,
opt: *opt,
mode: compressionModeFromName(opt.CompressionMode),
Fs: wrappedFs,
name: name,
root: rpath,
opt: *opt,
mode: compressionMode,
modeHandler: modeHandler,
}
// Correct root if definitely pointing to a file
if err == fs.ErrorIsFile {
@@ -215,10 +282,13 @@ func NewFs(ctx context.Context, name, rpath string, m configmap.Mapper) (fs.Fs,
return f, err
}
// compressionModeFromName converts a compression mode name to its int representation.
func compressionModeFromName(name string) int {
switch name {
case "gzip":
return Gzip
case "zstd":
return Zstd
default:
return Uncompressed
}
@@ -242,7 +312,7 @@ func base64ToInt64(str string) (int64, error) {
// Processes a file name for a compressed file. Returns the original file name, the extension, and the size of the original file.
// Returns -2 for the original size if the file is uncompressed.
func processFileName(compressedFileName string) (origFileName string, extension string, origSize int64, err error) {
func processFileName(compressedFileName string, modeHandler compressionModeHandler) (origFileName string, extension string, origSize int64, err error) {
// Separate the filename and size from the extension
extensionPos := strings.LastIndex(compressedFileName, ".")
if extensionPos == -1 {
@@ -261,7 +331,8 @@ func processFileName(compressedFileName string) (origFileName string, extension
if err != nil {
return "", "", 0, errors.New("could not decode size")
}
return match[1], gzFileExt, size, nil
ext := modeHandler.processFileNameGetFileExtension(compressionModeFromName(compressedFileName[extensionPos+1:]))
return match[1], ext, size, nil
}
// Generates the file name for a metadata file
@@ -286,11 +357,15 @@ func unwrapMetadataFile(filename string) (string, bool) {
// makeDataName generates the file name for a data file with specified compression mode
func makeDataName(remote string, size int64, mode int) (newRemote string) {
if mode != Uncompressed {
switch mode {
case Gzip:
newRemote = remote + "." + int64ToBase64(size) + gzFileExt
} else {
case Zstd:
newRemote = remote + "." + int64ToBase64(size) + zstdFileExt
default:
newRemote = remote + uncompressedFileExt
}
return newRemote
}
@@ -304,7 +379,7 @@ func (f *Fs) dataName(remote string, size int64, compressed bool) (name string)
// addData parses an object and adds it to the DirEntries
func (f *Fs) addData(entries *fs.DirEntries, o fs.Object) {
origFileName, _, size, err := processFileName(o.Remote())
origFileName, _, size, err := processFileName(o.Remote(), f.modeHandler)
if err != nil {
fs.Errorf(o, "Error on parsing file name: %v", err)
return
@@ -427,8 +502,12 @@ func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
if err != nil {
return nil, fmt.Errorf("error decoding metadata: %w", err)
}
size, err := f.modeHandler.newObjectGetOriginalSize(meta)
if err != nil {
return nil, fmt.Errorf("error reading metadata: %w", err)
}
// Create our Object
o, err := f.Fs.NewObject(ctx, makeDataName(remote, meta.CompressionMetadata.Size, meta.Mode))
o, err := f.Fs.NewObject(ctx, makeDataName(remote, size, meta.Mode))
if err != nil {
return nil, err
}
@@ -437,7 +516,7 @@ func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
// checkCompressAndType checks if an object is compressible and determines it's mime type
// returns a multireader with the bytes that were read to determine mime type
func checkCompressAndType(in io.Reader) (newReader io.Reader, compressible bool, mimeType string, err error) {
func checkCompressAndType(in io.Reader, compressionMode int, modeHandler compressionModeHandler) (newReader io.Reader, compressible bool, mimeType string, err error) {
in, wrap := accounting.UnWrap(in)
buf := make([]byte, heuristicBytes)
n, err := in.Read(buf)
@@ -446,7 +525,7 @@ func checkCompressAndType(in io.Reader) (newReader io.Reader, compressible bool,
return nil, false, "", err
}
mime := mimetype.Detect(buf)
compressible, err = isCompressible(bytes.NewReader(buf))
compressible, err = modeHandler.isCompressible(bytes.NewReader(buf), compressionMode)
if err != nil {
return nil, false, "", err
}
@@ -454,26 +533,6 @@ func checkCompressAndType(in io.Reader) (newReader io.Reader, compressible bool,
return wrap(in), compressible, mime.String(), nil
}
// isCompressible checks the compression ratio of the provided data and returns true if the ratio exceeds
// the configured threshold
func isCompressible(r io.Reader) (bool, error) {
var b bytes.Buffer
w, err := sgzip.NewWriterLevel(&b, sgzip.DefaultCompression)
if err != nil {
return false, err
}
n, err := io.Copy(w, r)
if err != nil {
return false, err
}
err = w.Close()
if err != nil {
return false, err
}
ratio := float64(n) / float64(b.Len())
return ratio > minCompressionRatio, nil
}
// verifyObjectHash verifies the Objects hash
func (f *Fs) verifyObjectHash(ctx context.Context, o fs.Object, hasher *hash.MultiHasher, ht hash.Type) error {
srcHash := hasher.Sums()[ht]
@@ -494,9 +553,9 @@ func (f *Fs) verifyObjectHash(ctx context.Context, o fs.Object, hasher *hash.Mul
type putFn func(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error)
type compressionResult struct {
type compressionResult[T sgzip.GzipMetadata | SzstdMetadata] struct {
err error
meta sgzip.GzipMetadata
meta T
}
// replicating some of operations.Rcat functionality because we want to support remotes without streaming
@@ -537,106 +596,18 @@ func (f *Fs) rcat(ctx context.Context, dstFileName string, in io.ReadCloser, mod
return nil, fmt.Errorf("failed to write temporary local file: %w", err)
}
if _, err = tempFile.Seek(0, 0); err != nil {
return nil, err
return nil, fmt.Errorf("failed to seek temporary local file: %w", err)
}
finfo, err := tempFile.Stat()
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to stat temporary local file: %w", err)
}
return f.Fs.Put(ctx, tempFile, object.NewStaticObjectInfo(dstFileName, modTime, finfo.Size(), false, nil, f.Fs))
}
// Put a compressed version of a file. Returns a wrappable object and metadata.
func (f *Fs) putCompress(ctx context.Context, in io.Reader, src fs.ObjectInfo, options []fs.OpenOption, mimeType string) (fs.Object, *ObjectMetadata, error) {
// Unwrap reader accounting
in, wrap := accounting.UnWrap(in)
// Add the metadata hasher
metaHasher := md5.New()
in = io.TeeReader(in, metaHasher)
// Compress the file
pipeReader, pipeWriter := io.Pipe()
results := make(chan compressionResult)
go func() {
gz, err := sgzip.NewWriterLevel(pipeWriter, f.opt.CompressionLevel)
if err != nil {
results <- compressionResult{err: err, meta: sgzip.GzipMetadata{}}
return
}
_, err = io.Copy(gz, in)
gzErr := gz.Close()
if gzErr != nil {
fs.Errorf(nil, "Failed to close compress: %v", gzErr)
if err == nil {
err = gzErr
}
}
closeErr := pipeWriter.Close()
if closeErr != nil {
fs.Errorf(nil, "Failed to close pipe: %v", closeErr)
if err == nil {
err = closeErr
}
}
results <- compressionResult{err: err, meta: gz.MetaData()}
}()
wrappedIn := wrap(bufio.NewReaderSize(pipeReader, bufferSize)) // Probably no longer needed as sgzip has it's own buffering
// Find a hash the destination supports to compute a hash of
// the compressed data.
ht := f.Fs.Hashes().GetOne()
var hasher *hash.MultiHasher
var err error
if ht != hash.None {
// unwrap the accounting again
wrappedIn, wrap = accounting.UnWrap(wrappedIn)
hasher, err = hash.NewMultiHasherTypes(hash.NewHashSet(ht))
if err != nil {
return nil, nil, err
}
// add the hasher and re-wrap the accounting
wrappedIn = io.TeeReader(wrappedIn, hasher)
wrappedIn = wrap(wrappedIn)
}
// Transfer the data
o, err := f.rcat(ctx, makeDataName(src.Remote(), src.Size(), f.mode), io.NopCloser(wrappedIn), src.ModTime(ctx), options)
//o, err := operations.Rcat(ctx, f.Fs, makeDataName(src.Remote(), src.Size(), f.mode), io.NopCloser(wrappedIn), src.ModTime(ctx))
if err != nil {
if o != nil {
removeErr := o.Remove(ctx)
if removeErr != nil {
fs.Errorf(o, "Failed to remove partially transferred object: %v", err)
}
}
return nil, nil, err
}
// Check whether we got an error during compression
result := <-results
err = result.err
if err != nil {
if o != nil {
removeErr := o.Remove(ctx)
if removeErr != nil {
fs.Errorf(o, "Failed to remove partially compressed object: %v", err)
}
}
return nil, nil, err
}
// Generate metadata
meta := newMetadata(result.meta.Size, f.mode, result.meta, hex.EncodeToString(metaHasher.Sum(nil)), mimeType)
// Check the hashes of the compressed data if we were comparing them
if ht != hash.None && hasher != nil {
err = f.verifyObjectHash(ctx, o, hasher, ht)
if err != nil {
return nil, nil, err
}
}
return o, meta, nil
return f.modeHandler.putCompress(ctx, f, in, src, options, mimeType)
}
// Put an uncompressed version of a file. Returns a wrappable object and metadata.
@@ -680,7 +651,8 @@ func (f *Fs) putUncompress(ctx context.Context, in io.Reader, src fs.ObjectInfo,
if err != nil {
return nil, nil, err
}
return o, newMetadata(o.Size(), Uncompressed, sgzip.GzipMetadata{}, hex.EncodeToString(sum), mimeType), nil
return f.modeHandler.putUncompressGetNewMetadata(o, Uncompressed, hex.EncodeToString(sum), mimeType, sum)
}
// This function will write a metadata struct to a metadata Object for an src. Returns a wrappable metadata object.
@@ -751,7 +723,7 @@ func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options .
o, err := f.NewObject(ctx, src.Remote())
if err == fs.ErrorObjectNotFound {
// Get our file compressibility
in, compressible, mimeType, err := checkCompressAndType(in)
in, compressible, mimeType, err := checkCompressAndType(in, f.mode, f.modeHandler)
if err != nil {
return nil, err
}
@@ -771,7 +743,7 @@ func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, opt
}
found := err == nil
in, compressible, mimeType, err := checkCompressAndType(in)
in, compressible, mimeType, err := checkCompressAndType(in, f.mode, f.modeHandler)
if err != nil {
return nil, err
}
@@ -1090,11 +1062,12 @@ func (f *Fs) PublicLink(ctx context.Context, remote string, duration fs.Duration
// ObjectMetadata describes the metadata for an Object.
type ObjectMetadata struct {
Mode int // Compression mode of the file.
Size int64 // Size of the object.
MD5 string // MD5 hash of the file.
MimeType string // Mime type of the file
CompressionMetadata sgzip.GzipMetadata
Mode int // Compression mode of the file.
Size int64 // Size of the object.
MD5 string // MD5 hash of the file.
MimeType string // Mime type of the file
CompressionMetadataGzip *sgzip.GzipMetadata // Metadata for Gzip compression
CompressionMetadataZstd *SzstdMetadata // Metadata for Zstd compression
}
// Object with external metadata
@@ -1107,17 +1080,6 @@ type Object struct {
meta *ObjectMetadata // Metadata struct for this object (nil if not loaded)
}
// This function generates a metadata object
func newMetadata(size int64, mode int, cmeta sgzip.GzipMetadata, md5 string, mimeType string) *ObjectMetadata {
meta := new(ObjectMetadata)
meta.Size = size
meta.Mode = mode
meta.CompressionMetadata = cmeta
meta.MD5 = md5
meta.MimeType = mimeType
return meta
}
// This function will read the metadata from a metadata object.
func readMetadata(ctx context.Context, mo fs.Object) (meta *ObjectMetadata, err error) {
// Open our meradata object
@@ -1165,7 +1127,7 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op
return o.mo, o.mo.Update(ctx, in, src, options...)
}
in, compressible, mimeType, err := checkCompressAndType(in)
in, compressible, mimeType, err := checkCompressAndType(in, o.meta.Mode, o.f.modeHandler)
if err != nil {
return err
}
@@ -1278,7 +1240,7 @@ func (o *Object) String() string {
// Remote returns the remote path
func (o *Object) Remote() string {
origFileName, _, _, err := processFileName(o.Object.Remote())
origFileName, _, _, err := processFileName(o.Object.Remote(), o.f.modeHandler)
if err != nil {
fs.Errorf(o.f, "Could not get remote path for: %s", o.Object.Remote())
return o.Object.Remote()
@@ -1381,7 +1343,6 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (rc io.Read
return o.Object.Open(ctx, options...)
}
// Get offset and limit from OpenOptions, pass the rest to the underlying remote
var openOptions = []fs.OpenOption{&fs.SeekOption{Offset: 0}}
var offset, limit int64 = 0, -1
for _, option := range options {
switch x := option.(type) {
@@ -1389,31 +1350,12 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (rc io.Read
offset = x.Offset
case *fs.RangeOption:
offset, limit = x.Decode(o.Size())
default:
openOptions = append(openOptions, option)
}
}
// Get a chunkedreader for the wrapped object
chunkedReader := chunkedreader.New(ctx, o.Object, initialChunkSize, maxChunkSize, chunkStreams)
// Get file handle
var file io.Reader
if offset != 0 {
file, err = sgzip.NewReaderAt(chunkedReader, &o.meta.CompressionMetadata, offset)
} else {
file, err = sgzip.NewReader(chunkedReader)
}
if err != nil {
return nil, err
}
var fileReader io.Reader
if limit != -1 {
fileReader = io.LimitReader(file, limit)
} else {
fileReader = file
}
// Return a ReadCloser
return ReadCloserWrapper{Reader: fileReader, Closer: chunkedReader}, nil
var retCloser io.Closer = chunkedReader
return o.f.modeHandler.openGetReadCloser(ctx, o, offset, limit, chunkedReader, retCloser, options...)
}
// ObjectInfo describes a wrapped fs.ObjectInfo for being the source