mirror of
https://github.com/rclone/rclone.git
synced 2025-12-06 00:03:32 +00:00
backend/compress: add zstd compression
Added support for reading and writing zstd-compressed archives in seekable format using "github.com/klauspost/compress/zstd" and "github.com/SaveTheRbtz/zstd-seekable-format-go/pkg". Bumped Go version from 1.24.0 to 1.24.4 due to requirements of "github.com/SaveTheRbtz/zstd-seekable-format-go/pkg".
This commit is contained in:
@@ -2,10 +2,8 @@
|
|||||||
package compress
|
package compress
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"crypto/md5"
|
|
||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"encoding/hex"
|
"encoding/hex"
|
||||||
@@ -46,6 +44,7 @@ const (
|
|||||||
minCompressionRatio = 1.1
|
minCompressionRatio = 1.1
|
||||||
|
|
||||||
gzFileExt = ".gz"
|
gzFileExt = ".gz"
|
||||||
|
zstdFileExt = ".zst"
|
||||||
metaFileExt = ".json"
|
metaFileExt = ".json"
|
||||||
uncompressedFileExt = ".bin"
|
uncompressedFileExt = ".bin"
|
||||||
)
|
)
|
||||||
@@ -54,6 +53,7 @@ const (
|
|||||||
const (
|
const (
|
||||||
Uncompressed = 0
|
Uncompressed = 0
|
||||||
Gzip = 2
|
Gzip = 2
|
||||||
|
Zstd = 4
|
||||||
)
|
)
|
||||||
|
|
||||||
var nameRegexp = regexp.MustCompile(`^(.+?)\.([A-Za-z0-9-_]{11})$`)
|
var nameRegexp = regexp.MustCompile(`^(.+?)\.([A-Za-z0-9-_]{11})$`)
|
||||||
@@ -66,6 +66,10 @@ func init() {
|
|||||||
Value: "gzip",
|
Value: "gzip",
|
||||||
Help: "Standard gzip compression with fastest parameters.",
|
Help: "Standard gzip compression with fastest parameters.",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
Value: "zstd",
|
||||||
|
Help: "Zstandard compression — fast modern algorithm offering adjustable speed-to-compression tradeoffs.",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
// Register our remote
|
// Register our remote
|
||||||
@@ -87,17 +91,23 @@ func init() {
|
|||||||
Examples: compressionModeOptions,
|
Examples: compressionModeOptions,
|
||||||
}, {
|
}, {
|
||||||
Name: "level",
|
Name: "level",
|
||||||
Help: `GZIP compression level (-2 to 9).
|
Help: `GZIP (levels -2 to 9):
|
||||||
|
- -2 — Huffman encoding only. Only use if you know what you're doing.
|
||||||
Generally -1 (default, equivalent to 5) is recommended.
|
- -1 (default) — recommended; equivalent to level 5.
|
||||||
Levels 1 to 9 increase compression at the cost of speed. Going past 6
|
- 0 — turns off compression.
|
||||||
generally offers very little return.
|
- 1–9 — increase compression at the cost of speed. Going past 6 generally offers very little return.
|
||||||
|
|
||||||
Level -2 uses Huffman encoding only. Only use if you know what you
|
ZSTD (levels 0 to 4):
|
||||||
are doing.
|
- 0 — turns off compression entirely.
|
||||||
Level 0 turns off compression.`,
|
- 1 — fastest compression with the lowest ratio.
|
||||||
Default: sgzip.DefaultCompression,
|
- 2 (default) — good balance of speed and compression.
|
||||||
Advanced: true,
|
- 3 — better compression, but uses about 2–3x more CPU than the default.
|
||||||
|
- 4 — best possible compression ratio (highest CPU cost).
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
- Choose GZIP for wide compatibility; ZSTD for better speed/ratio tradeoffs.
|
||||||
|
- Negative gzip levels: -2 = Huffman-only, -1 = default (≈ level 5).`,
|
||||||
|
Required: true,
|
||||||
}, {
|
}, {
|
||||||
Name: "ram_cache_limit",
|
Name: "ram_cache_limit",
|
||||||
Help: `Some remotes don't allow the upload of files with unknown size.
|
Help: `Some remotes don't allow the upload of files with unknown size.
|
||||||
@@ -112,6 +122,47 @@ this limit will be cached on disk.`,
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// compressionModeHandler defines the interface for handling different compression modes
|
||||||
|
type compressionModeHandler interface {
|
||||||
|
// processFileNameGetFileExtension returns the file extension for the given compression mode
|
||||||
|
processFileNameGetFileExtension(compressionMode int) string
|
||||||
|
|
||||||
|
// newObjectGetOriginalSize returns the original file size from the metadata
|
||||||
|
newObjectGetOriginalSize(meta *ObjectMetadata) (int64, error)
|
||||||
|
|
||||||
|
// isCompressible checks the compression ratio of the provided data and returns true if the ratio exceeds
|
||||||
|
// the configured threshold
|
||||||
|
isCompressible(r io.Reader, compressionMode int) (bool, error)
|
||||||
|
|
||||||
|
// putCompress compresses the input data and uploads it to the remote, returning the new object and its metadata
|
||||||
|
putCompress(
|
||||||
|
ctx context.Context,
|
||||||
|
f *Fs,
|
||||||
|
in io.Reader,
|
||||||
|
src fs.ObjectInfo,
|
||||||
|
options []fs.OpenOption,
|
||||||
|
mimeType string,
|
||||||
|
) (fs.Object, *ObjectMetadata, error)
|
||||||
|
|
||||||
|
// openGetReadCloser opens a compressed object and returns a ReadCloser in the Open method
|
||||||
|
openGetReadCloser(
|
||||||
|
ctx context.Context,
|
||||||
|
o *Object,
|
||||||
|
offset int64,
|
||||||
|
limit int64,
|
||||||
|
cr chunkedreader.ChunkedReader,
|
||||||
|
closer io.Closer,
|
||||||
|
options ...fs.OpenOption,
|
||||||
|
) (rc io.ReadCloser, err error)
|
||||||
|
|
||||||
|
// putUncompressGetNewMetadata returns metadata in the putUncompress method for a specific compression algorithm
|
||||||
|
putUncompressGetNewMetadata(o fs.Object, mode int, md5 string, mimeType string, sum []byte) (fs.Object, *ObjectMetadata, error)
|
||||||
|
|
||||||
|
// This function generates a metadata object for sgzip.GzipMetadata or SzstdMetadata.
|
||||||
|
// Warning: This function panics if cmeta is not of the expected type.
|
||||||
|
newMetadata(size int64, mode int, cmeta any, md5 string, mimeType string) *ObjectMetadata
|
||||||
|
}
|
||||||
|
|
||||||
// Options defines the configuration for this backend
|
// Options defines the configuration for this backend
|
||||||
type Options struct {
|
type Options struct {
|
||||||
Remote string `config:"remote"`
|
Remote string `config:"remote"`
|
||||||
@@ -125,12 +176,13 @@ type Options struct {
|
|||||||
// Fs represents a wrapped fs.Fs
|
// Fs represents a wrapped fs.Fs
|
||||||
type Fs struct {
|
type Fs struct {
|
||||||
fs.Fs
|
fs.Fs
|
||||||
wrapper fs.Fs
|
wrapper fs.Fs
|
||||||
name string
|
name string
|
||||||
root string
|
root string
|
||||||
opt Options
|
opt Options
|
||||||
mode int // compression mode id
|
mode int // compression mode id
|
||||||
features *fs.Features // optional features
|
features *fs.Features // optional features
|
||||||
|
modeHandler compressionModeHandler // compression mode handler
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewFs constructs an Fs from the path, container:path
|
// NewFs constructs an Fs from the path, container:path
|
||||||
@@ -167,13 +219,28 @@ func NewFs(ctx context.Context, name, rpath string, m configmap.Mapper) (fs.Fs,
|
|||||||
return nil, fmt.Errorf("failed to make remote %s:%q to wrap: %w", wName, remotePath, err)
|
return nil, fmt.Errorf("failed to make remote %s:%q to wrap: %w", wName, remotePath, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
compressionMode := compressionModeFromName(opt.CompressionMode)
|
||||||
|
var modeHandler compressionModeHandler
|
||||||
|
|
||||||
|
switch compressionMode {
|
||||||
|
case Gzip:
|
||||||
|
modeHandler = &gzipModeHandler{}
|
||||||
|
case Zstd:
|
||||||
|
modeHandler = &zstdModeHandler{}
|
||||||
|
case Uncompressed:
|
||||||
|
modeHandler = &uncompressedModeHandler{}
|
||||||
|
default:
|
||||||
|
modeHandler = &unknownModeHandler{}
|
||||||
|
}
|
||||||
|
|
||||||
// Create the wrapping fs
|
// Create the wrapping fs
|
||||||
f := &Fs{
|
f := &Fs{
|
||||||
Fs: wrappedFs,
|
Fs: wrappedFs,
|
||||||
name: name,
|
name: name,
|
||||||
root: rpath,
|
root: rpath,
|
||||||
opt: *opt,
|
opt: *opt,
|
||||||
mode: compressionModeFromName(opt.CompressionMode),
|
mode: compressionMode,
|
||||||
|
modeHandler: modeHandler,
|
||||||
}
|
}
|
||||||
// Correct root if definitely pointing to a file
|
// Correct root if definitely pointing to a file
|
||||||
if err == fs.ErrorIsFile {
|
if err == fs.ErrorIsFile {
|
||||||
@@ -215,10 +282,13 @@ func NewFs(ctx context.Context, name, rpath string, m configmap.Mapper) (fs.Fs,
|
|||||||
return f, err
|
return f, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// compressionModeFromName converts a compression mode name to its int representation.
|
||||||
func compressionModeFromName(name string) int {
|
func compressionModeFromName(name string) int {
|
||||||
switch name {
|
switch name {
|
||||||
case "gzip":
|
case "gzip":
|
||||||
return Gzip
|
return Gzip
|
||||||
|
case "zstd":
|
||||||
|
return Zstd
|
||||||
default:
|
default:
|
||||||
return Uncompressed
|
return Uncompressed
|
||||||
}
|
}
|
||||||
@@ -242,7 +312,7 @@ func base64ToInt64(str string) (int64, error) {
|
|||||||
|
|
||||||
// Processes a file name for a compressed file. Returns the original file name, the extension, and the size of the original file.
|
// Processes a file name for a compressed file. Returns the original file name, the extension, and the size of the original file.
|
||||||
// Returns -2 for the original size if the file is uncompressed.
|
// Returns -2 for the original size if the file is uncompressed.
|
||||||
func processFileName(compressedFileName string) (origFileName string, extension string, origSize int64, err error) {
|
func processFileName(compressedFileName string, modeHandler compressionModeHandler) (origFileName string, extension string, origSize int64, err error) {
|
||||||
// Separate the filename and size from the extension
|
// Separate the filename and size from the extension
|
||||||
extensionPos := strings.LastIndex(compressedFileName, ".")
|
extensionPos := strings.LastIndex(compressedFileName, ".")
|
||||||
if extensionPos == -1 {
|
if extensionPos == -1 {
|
||||||
@@ -261,7 +331,8 @@ func processFileName(compressedFileName string) (origFileName string, extension
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return "", "", 0, errors.New("could not decode size")
|
return "", "", 0, errors.New("could not decode size")
|
||||||
}
|
}
|
||||||
return match[1], gzFileExt, size, nil
|
ext := modeHandler.processFileNameGetFileExtension(compressionModeFromName(compressedFileName[extensionPos+1:]))
|
||||||
|
return match[1], ext, size, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generates the file name for a metadata file
|
// Generates the file name for a metadata file
|
||||||
@@ -286,11 +357,15 @@ func unwrapMetadataFile(filename string) (string, bool) {
|
|||||||
|
|
||||||
// makeDataName generates the file name for a data file with specified compression mode
|
// makeDataName generates the file name for a data file with specified compression mode
|
||||||
func makeDataName(remote string, size int64, mode int) (newRemote string) {
|
func makeDataName(remote string, size int64, mode int) (newRemote string) {
|
||||||
if mode != Uncompressed {
|
switch mode {
|
||||||
|
case Gzip:
|
||||||
newRemote = remote + "." + int64ToBase64(size) + gzFileExt
|
newRemote = remote + "." + int64ToBase64(size) + gzFileExt
|
||||||
} else {
|
case Zstd:
|
||||||
|
newRemote = remote + "." + int64ToBase64(size) + zstdFileExt
|
||||||
|
default:
|
||||||
newRemote = remote + uncompressedFileExt
|
newRemote = remote + uncompressedFileExt
|
||||||
}
|
}
|
||||||
|
|
||||||
return newRemote
|
return newRemote
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -304,7 +379,7 @@ func (f *Fs) dataName(remote string, size int64, compressed bool) (name string)
|
|||||||
|
|
||||||
// addData parses an object and adds it to the DirEntries
|
// addData parses an object and adds it to the DirEntries
|
||||||
func (f *Fs) addData(entries *fs.DirEntries, o fs.Object) {
|
func (f *Fs) addData(entries *fs.DirEntries, o fs.Object) {
|
||||||
origFileName, _, size, err := processFileName(o.Remote())
|
origFileName, _, size, err := processFileName(o.Remote(), f.modeHandler)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fs.Errorf(o, "Error on parsing file name: %v", err)
|
fs.Errorf(o, "Error on parsing file name: %v", err)
|
||||||
return
|
return
|
||||||
@@ -427,8 +502,12 @@ func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("error decoding metadata: %w", err)
|
return nil, fmt.Errorf("error decoding metadata: %w", err)
|
||||||
}
|
}
|
||||||
|
size, err := f.modeHandler.newObjectGetOriginalSize(meta)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error reading metadata: %w", err)
|
||||||
|
}
|
||||||
// Create our Object
|
// Create our Object
|
||||||
o, err := f.Fs.NewObject(ctx, makeDataName(remote, meta.CompressionMetadata.Size, meta.Mode))
|
o, err := f.Fs.NewObject(ctx, makeDataName(remote, size, meta.Mode))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -437,7 +516,7 @@ func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
|
|||||||
|
|
||||||
// checkCompressAndType checks if an object is compressible and determines it's mime type
|
// checkCompressAndType checks if an object is compressible and determines it's mime type
|
||||||
// returns a multireader with the bytes that were read to determine mime type
|
// returns a multireader with the bytes that were read to determine mime type
|
||||||
func checkCompressAndType(in io.Reader) (newReader io.Reader, compressible bool, mimeType string, err error) {
|
func checkCompressAndType(in io.Reader, compressionMode int, modeHandler compressionModeHandler) (newReader io.Reader, compressible bool, mimeType string, err error) {
|
||||||
in, wrap := accounting.UnWrap(in)
|
in, wrap := accounting.UnWrap(in)
|
||||||
buf := make([]byte, heuristicBytes)
|
buf := make([]byte, heuristicBytes)
|
||||||
n, err := in.Read(buf)
|
n, err := in.Read(buf)
|
||||||
@@ -446,7 +525,7 @@ func checkCompressAndType(in io.Reader) (newReader io.Reader, compressible bool,
|
|||||||
return nil, false, "", err
|
return nil, false, "", err
|
||||||
}
|
}
|
||||||
mime := mimetype.Detect(buf)
|
mime := mimetype.Detect(buf)
|
||||||
compressible, err = isCompressible(bytes.NewReader(buf))
|
compressible, err = modeHandler.isCompressible(bytes.NewReader(buf), compressionMode)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, false, "", err
|
return nil, false, "", err
|
||||||
}
|
}
|
||||||
@@ -454,26 +533,6 @@ func checkCompressAndType(in io.Reader) (newReader io.Reader, compressible bool,
|
|||||||
return wrap(in), compressible, mime.String(), nil
|
return wrap(in), compressible, mime.String(), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// isCompressible checks the compression ratio of the provided data and returns true if the ratio exceeds
|
|
||||||
// the configured threshold
|
|
||||||
func isCompressible(r io.Reader) (bool, error) {
|
|
||||||
var b bytes.Buffer
|
|
||||||
w, err := sgzip.NewWriterLevel(&b, sgzip.DefaultCompression)
|
|
||||||
if err != nil {
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
n, err := io.Copy(w, r)
|
|
||||||
if err != nil {
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
err = w.Close()
|
|
||||||
if err != nil {
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
ratio := float64(n) / float64(b.Len())
|
|
||||||
return ratio > minCompressionRatio, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// verifyObjectHash verifies the Objects hash
|
// verifyObjectHash verifies the Objects hash
|
||||||
func (f *Fs) verifyObjectHash(ctx context.Context, o fs.Object, hasher *hash.MultiHasher, ht hash.Type) error {
|
func (f *Fs) verifyObjectHash(ctx context.Context, o fs.Object, hasher *hash.MultiHasher, ht hash.Type) error {
|
||||||
srcHash := hasher.Sums()[ht]
|
srcHash := hasher.Sums()[ht]
|
||||||
@@ -494,9 +553,9 @@ func (f *Fs) verifyObjectHash(ctx context.Context, o fs.Object, hasher *hash.Mul
|
|||||||
|
|
||||||
type putFn func(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error)
|
type putFn func(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error)
|
||||||
|
|
||||||
type compressionResult struct {
|
type compressionResult[T sgzip.GzipMetadata | SzstdMetadata] struct {
|
||||||
err error
|
err error
|
||||||
meta sgzip.GzipMetadata
|
meta T
|
||||||
}
|
}
|
||||||
|
|
||||||
// replicating some of operations.Rcat functionality because we want to support remotes without streaming
|
// replicating some of operations.Rcat functionality because we want to support remotes without streaming
|
||||||
@@ -537,106 +596,18 @@ func (f *Fs) rcat(ctx context.Context, dstFileName string, in io.ReadCloser, mod
|
|||||||
return nil, fmt.Errorf("failed to write temporary local file: %w", err)
|
return nil, fmt.Errorf("failed to write temporary local file: %w", err)
|
||||||
}
|
}
|
||||||
if _, err = tempFile.Seek(0, 0); err != nil {
|
if _, err = tempFile.Seek(0, 0); err != nil {
|
||||||
return nil, err
|
return nil, fmt.Errorf("failed to seek temporary local file: %w", err)
|
||||||
}
|
}
|
||||||
finfo, err := tempFile.Stat()
|
finfo, err := tempFile.Stat()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, fmt.Errorf("failed to stat temporary local file: %w", err)
|
||||||
}
|
}
|
||||||
return f.Fs.Put(ctx, tempFile, object.NewStaticObjectInfo(dstFileName, modTime, finfo.Size(), false, nil, f.Fs))
|
return f.Fs.Put(ctx, tempFile, object.NewStaticObjectInfo(dstFileName, modTime, finfo.Size(), false, nil, f.Fs))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Put a compressed version of a file. Returns a wrappable object and metadata.
|
// Put a compressed version of a file. Returns a wrappable object and metadata.
|
||||||
func (f *Fs) putCompress(ctx context.Context, in io.Reader, src fs.ObjectInfo, options []fs.OpenOption, mimeType string) (fs.Object, *ObjectMetadata, error) {
|
func (f *Fs) putCompress(ctx context.Context, in io.Reader, src fs.ObjectInfo, options []fs.OpenOption, mimeType string) (fs.Object, *ObjectMetadata, error) {
|
||||||
// Unwrap reader accounting
|
return f.modeHandler.putCompress(ctx, f, in, src, options, mimeType)
|
||||||
in, wrap := accounting.UnWrap(in)
|
|
||||||
|
|
||||||
// Add the metadata hasher
|
|
||||||
metaHasher := md5.New()
|
|
||||||
in = io.TeeReader(in, metaHasher)
|
|
||||||
|
|
||||||
// Compress the file
|
|
||||||
pipeReader, pipeWriter := io.Pipe()
|
|
||||||
results := make(chan compressionResult)
|
|
||||||
go func() {
|
|
||||||
gz, err := sgzip.NewWriterLevel(pipeWriter, f.opt.CompressionLevel)
|
|
||||||
if err != nil {
|
|
||||||
results <- compressionResult{err: err, meta: sgzip.GzipMetadata{}}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
_, err = io.Copy(gz, in)
|
|
||||||
gzErr := gz.Close()
|
|
||||||
if gzErr != nil {
|
|
||||||
fs.Errorf(nil, "Failed to close compress: %v", gzErr)
|
|
||||||
if err == nil {
|
|
||||||
err = gzErr
|
|
||||||
}
|
|
||||||
}
|
|
||||||
closeErr := pipeWriter.Close()
|
|
||||||
if closeErr != nil {
|
|
||||||
fs.Errorf(nil, "Failed to close pipe: %v", closeErr)
|
|
||||||
if err == nil {
|
|
||||||
err = closeErr
|
|
||||||
}
|
|
||||||
}
|
|
||||||
results <- compressionResult{err: err, meta: gz.MetaData()}
|
|
||||||
}()
|
|
||||||
wrappedIn := wrap(bufio.NewReaderSize(pipeReader, bufferSize)) // Probably no longer needed as sgzip has it's own buffering
|
|
||||||
|
|
||||||
// Find a hash the destination supports to compute a hash of
|
|
||||||
// the compressed data.
|
|
||||||
ht := f.Fs.Hashes().GetOne()
|
|
||||||
var hasher *hash.MultiHasher
|
|
||||||
var err error
|
|
||||||
if ht != hash.None {
|
|
||||||
// unwrap the accounting again
|
|
||||||
wrappedIn, wrap = accounting.UnWrap(wrappedIn)
|
|
||||||
hasher, err = hash.NewMultiHasherTypes(hash.NewHashSet(ht))
|
|
||||||
if err != nil {
|
|
||||||
return nil, nil, err
|
|
||||||
}
|
|
||||||
// add the hasher and re-wrap the accounting
|
|
||||||
wrappedIn = io.TeeReader(wrappedIn, hasher)
|
|
||||||
wrappedIn = wrap(wrappedIn)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Transfer the data
|
|
||||||
o, err := f.rcat(ctx, makeDataName(src.Remote(), src.Size(), f.mode), io.NopCloser(wrappedIn), src.ModTime(ctx), options)
|
|
||||||
//o, err := operations.Rcat(ctx, f.Fs, makeDataName(src.Remote(), src.Size(), f.mode), io.NopCloser(wrappedIn), src.ModTime(ctx))
|
|
||||||
if err != nil {
|
|
||||||
if o != nil {
|
|
||||||
removeErr := o.Remove(ctx)
|
|
||||||
if removeErr != nil {
|
|
||||||
fs.Errorf(o, "Failed to remove partially transferred object: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil, nil, err
|
|
||||||
}
|
|
||||||
// Check whether we got an error during compression
|
|
||||||
result := <-results
|
|
||||||
err = result.err
|
|
||||||
if err != nil {
|
|
||||||
if o != nil {
|
|
||||||
removeErr := o.Remove(ctx)
|
|
||||||
if removeErr != nil {
|
|
||||||
fs.Errorf(o, "Failed to remove partially compressed object: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil, nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Generate metadata
|
|
||||||
meta := newMetadata(result.meta.Size, f.mode, result.meta, hex.EncodeToString(metaHasher.Sum(nil)), mimeType)
|
|
||||||
|
|
||||||
// Check the hashes of the compressed data if we were comparing them
|
|
||||||
if ht != hash.None && hasher != nil {
|
|
||||||
err = f.verifyObjectHash(ctx, o, hasher, ht)
|
|
||||||
if err != nil {
|
|
||||||
return nil, nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return o, meta, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Put an uncompressed version of a file. Returns a wrappable object and metadata.
|
// Put an uncompressed version of a file. Returns a wrappable object and metadata.
|
||||||
@@ -680,7 +651,8 @@ func (f *Fs) putUncompress(ctx context.Context, in io.Reader, src fs.ObjectInfo,
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
return o, newMetadata(o.Size(), Uncompressed, sgzip.GzipMetadata{}, hex.EncodeToString(sum), mimeType), nil
|
|
||||||
|
return f.modeHandler.putUncompressGetNewMetadata(o, Uncompressed, hex.EncodeToString(sum), mimeType, sum)
|
||||||
}
|
}
|
||||||
|
|
||||||
// This function will write a metadata struct to a metadata Object for an src. Returns a wrappable metadata object.
|
// This function will write a metadata struct to a metadata Object for an src. Returns a wrappable metadata object.
|
||||||
@@ -751,7 +723,7 @@ func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options .
|
|||||||
o, err := f.NewObject(ctx, src.Remote())
|
o, err := f.NewObject(ctx, src.Remote())
|
||||||
if err == fs.ErrorObjectNotFound {
|
if err == fs.ErrorObjectNotFound {
|
||||||
// Get our file compressibility
|
// Get our file compressibility
|
||||||
in, compressible, mimeType, err := checkCompressAndType(in)
|
in, compressible, mimeType, err := checkCompressAndType(in, f.mode, f.modeHandler)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -771,7 +743,7 @@ func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, opt
|
|||||||
}
|
}
|
||||||
found := err == nil
|
found := err == nil
|
||||||
|
|
||||||
in, compressible, mimeType, err := checkCompressAndType(in)
|
in, compressible, mimeType, err := checkCompressAndType(in, f.mode, f.modeHandler)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -1090,11 +1062,12 @@ func (f *Fs) PublicLink(ctx context.Context, remote string, duration fs.Duration
|
|||||||
|
|
||||||
// ObjectMetadata describes the metadata for an Object.
|
// ObjectMetadata describes the metadata for an Object.
|
||||||
type ObjectMetadata struct {
|
type ObjectMetadata struct {
|
||||||
Mode int // Compression mode of the file.
|
Mode int // Compression mode of the file.
|
||||||
Size int64 // Size of the object.
|
Size int64 // Size of the object.
|
||||||
MD5 string // MD5 hash of the file.
|
MD5 string // MD5 hash of the file.
|
||||||
MimeType string // Mime type of the file
|
MimeType string // Mime type of the file
|
||||||
CompressionMetadata sgzip.GzipMetadata
|
CompressionMetadataGzip *sgzip.GzipMetadata // Metadata for Gzip compression
|
||||||
|
CompressionMetadataZstd *SzstdMetadata // Metadata for Zstd compression
|
||||||
}
|
}
|
||||||
|
|
||||||
// Object with external metadata
|
// Object with external metadata
|
||||||
@@ -1107,17 +1080,6 @@ type Object struct {
|
|||||||
meta *ObjectMetadata // Metadata struct for this object (nil if not loaded)
|
meta *ObjectMetadata // Metadata struct for this object (nil if not loaded)
|
||||||
}
|
}
|
||||||
|
|
||||||
// This function generates a metadata object
|
|
||||||
func newMetadata(size int64, mode int, cmeta sgzip.GzipMetadata, md5 string, mimeType string) *ObjectMetadata {
|
|
||||||
meta := new(ObjectMetadata)
|
|
||||||
meta.Size = size
|
|
||||||
meta.Mode = mode
|
|
||||||
meta.CompressionMetadata = cmeta
|
|
||||||
meta.MD5 = md5
|
|
||||||
meta.MimeType = mimeType
|
|
||||||
return meta
|
|
||||||
}
|
|
||||||
|
|
||||||
// This function will read the metadata from a metadata object.
|
// This function will read the metadata from a metadata object.
|
||||||
func readMetadata(ctx context.Context, mo fs.Object) (meta *ObjectMetadata, err error) {
|
func readMetadata(ctx context.Context, mo fs.Object) (meta *ObjectMetadata, err error) {
|
||||||
// Open our meradata object
|
// Open our meradata object
|
||||||
@@ -1165,7 +1127,7 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op
|
|||||||
return o.mo, o.mo.Update(ctx, in, src, options...)
|
return o.mo, o.mo.Update(ctx, in, src, options...)
|
||||||
}
|
}
|
||||||
|
|
||||||
in, compressible, mimeType, err := checkCompressAndType(in)
|
in, compressible, mimeType, err := checkCompressAndType(in, o.meta.Mode, o.f.modeHandler)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -1278,7 +1240,7 @@ func (o *Object) String() string {
|
|||||||
|
|
||||||
// Remote returns the remote path
|
// Remote returns the remote path
|
||||||
func (o *Object) Remote() string {
|
func (o *Object) Remote() string {
|
||||||
origFileName, _, _, err := processFileName(o.Object.Remote())
|
origFileName, _, _, err := processFileName(o.Object.Remote(), o.f.modeHandler)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fs.Errorf(o.f, "Could not get remote path for: %s", o.Object.Remote())
|
fs.Errorf(o.f, "Could not get remote path for: %s", o.Object.Remote())
|
||||||
return o.Object.Remote()
|
return o.Object.Remote()
|
||||||
@@ -1381,7 +1343,6 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (rc io.Read
|
|||||||
return o.Object.Open(ctx, options...)
|
return o.Object.Open(ctx, options...)
|
||||||
}
|
}
|
||||||
// Get offset and limit from OpenOptions, pass the rest to the underlying remote
|
// Get offset and limit from OpenOptions, pass the rest to the underlying remote
|
||||||
var openOptions = []fs.OpenOption{&fs.SeekOption{Offset: 0}}
|
|
||||||
var offset, limit int64 = 0, -1
|
var offset, limit int64 = 0, -1
|
||||||
for _, option := range options {
|
for _, option := range options {
|
||||||
switch x := option.(type) {
|
switch x := option.(type) {
|
||||||
@@ -1389,31 +1350,12 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (rc io.Read
|
|||||||
offset = x.Offset
|
offset = x.Offset
|
||||||
case *fs.RangeOption:
|
case *fs.RangeOption:
|
||||||
offset, limit = x.Decode(o.Size())
|
offset, limit = x.Decode(o.Size())
|
||||||
default:
|
|
||||||
openOptions = append(openOptions, option)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Get a chunkedreader for the wrapped object
|
// Get a chunkedreader for the wrapped object
|
||||||
chunkedReader := chunkedreader.New(ctx, o.Object, initialChunkSize, maxChunkSize, chunkStreams)
|
chunkedReader := chunkedreader.New(ctx, o.Object, initialChunkSize, maxChunkSize, chunkStreams)
|
||||||
// Get file handle
|
var retCloser io.Closer = chunkedReader
|
||||||
var file io.Reader
|
return o.f.modeHandler.openGetReadCloser(ctx, o, offset, limit, chunkedReader, retCloser, options...)
|
||||||
if offset != 0 {
|
|
||||||
file, err = sgzip.NewReaderAt(chunkedReader, &o.meta.CompressionMetadata, offset)
|
|
||||||
} else {
|
|
||||||
file, err = sgzip.NewReader(chunkedReader)
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
var fileReader io.Reader
|
|
||||||
if limit != -1 {
|
|
||||||
fileReader = io.LimitReader(file, limit)
|
|
||||||
} else {
|
|
||||||
fileReader = file
|
|
||||||
}
|
|
||||||
// Return a ReadCloser
|
|
||||||
return ReadCloserWrapper{Reader: fileReader, Closer: chunkedReader}, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ObjectInfo describes a wrapped fs.ObjectInfo for being the source
|
// ObjectInfo describes a wrapped fs.ObjectInfo for being the source
|
||||||
|
|||||||
@@ -48,7 +48,27 @@ func TestRemoteGzip(t *testing.T) {
|
|||||||
opt.ExtraConfig = []fstests.ExtraConfigItem{
|
opt.ExtraConfig = []fstests.ExtraConfigItem{
|
||||||
{Name: name, Key: "type", Value: "compress"},
|
{Name: name, Key: "type", Value: "compress"},
|
||||||
{Name: name, Key: "remote", Value: tempdir},
|
{Name: name, Key: "remote", Value: tempdir},
|
||||||
{Name: name, Key: "compression_mode", Value: "gzip"},
|
{Name: name, Key: "mode", Value: "gzip"},
|
||||||
|
{Name: name, Key: "level", Value: "-1"},
|
||||||
|
}
|
||||||
|
opt.QuickTestOK = true
|
||||||
|
fstests.Run(t, &opt)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRemoteZstd tests ZSTD compression
|
||||||
|
func TestRemoteZstd(t *testing.T) {
|
||||||
|
if *fstest.RemoteName != "" {
|
||||||
|
t.Skip("Skipping as -remote set")
|
||||||
|
}
|
||||||
|
tempdir := filepath.Join(os.TempDir(), "rclone-compress-test-zstd")
|
||||||
|
name := "TestCompressZstd"
|
||||||
|
opt := defaultOpt
|
||||||
|
opt.RemoteName = name + ":"
|
||||||
|
opt.ExtraConfig = []fstests.ExtraConfigItem{
|
||||||
|
{Name: name, Key: "type", Value: "compress"},
|
||||||
|
{Name: name, Key: "remote", Value: tempdir},
|
||||||
|
{Name: name, Key: "mode", Value: "zstd"},
|
||||||
|
{Name: name, Key: "level", Value: "2"},
|
||||||
}
|
}
|
||||||
opt.QuickTestOK = true
|
opt.QuickTestOK = true
|
||||||
fstests.Run(t, &opt)
|
fstests.Run(t, &opt)
|
||||||
|
|||||||
207
backend/compress/gzip_handler.go
Normal file
207
backend/compress/gzip_handler.go
Normal file
@@ -0,0 +1,207 @@
|
|||||||
|
package compress
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"crypto/md5"
|
||||||
|
"encoding/hex"
|
||||||
|
"errors"
|
||||||
|
"io"
|
||||||
|
|
||||||
|
"github.com/buengese/sgzip"
|
||||||
|
|
||||||
|
"github.com/rclone/rclone/fs"
|
||||||
|
"github.com/rclone/rclone/fs/accounting"
|
||||||
|
"github.com/rclone/rclone/fs/chunkedreader"
|
||||||
|
"github.com/rclone/rclone/fs/hash"
|
||||||
|
)
|
||||||
|
|
||||||
|
// gzipModeHandler implements compressionModeHandler for gzip
|
||||||
|
type gzipModeHandler struct{}
|
||||||
|
|
||||||
|
// isCompressible checks the compression ratio of the provided data and returns true if the ratio exceeds
|
||||||
|
// the configured threshold
|
||||||
|
func (g *gzipModeHandler) isCompressible(r io.Reader, compressionMode int) (bool, error) {
|
||||||
|
var b bytes.Buffer
|
||||||
|
var n int64
|
||||||
|
w, err := sgzip.NewWriterLevel(&b, sgzip.DefaultCompression)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
n, err = io.Copy(w, r)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
err = w.Close()
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
ratio := float64(n) / float64(b.Len())
|
||||||
|
return ratio > minCompressionRatio, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// newObjectGetOriginalSize returns the original file size from the metadata
|
||||||
|
func (g *gzipModeHandler) newObjectGetOriginalSize(meta *ObjectMetadata) (int64, error) {
|
||||||
|
if meta.CompressionMetadataGzip == nil {
|
||||||
|
return 0, errors.New("missing gzip metadata")
|
||||||
|
}
|
||||||
|
return meta.CompressionMetadataGzip.Size, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// openGetReadCloser opens a compressed object and returns a ReadCloser in the Open method
|
||||||
|
func (g *gzipModeHandler) openGetReadCloser(
|
||||||
|
ctx context.Context,
|
||||||
|
o *Object,
|
||||||
|
offset int64,
|
||||||
|
limit int64,
|
||||||
|
cr chunkedreader.ChunkedReader,
|
||||||
|
closer io.Closer,
|
||||||
|
options ...fs.OpenOption,
|
||||||
|
) (rc io.ReadCloser, err error) {
|
||||||
|
var file io.Reader
|
||||||
|
|
||||||
|
if offset != 0 {
|
||||||
|
file, err = sgzip.NewReaderAt(cr, o.meta.CompressionMetadataGzip, offset)
|
||||||
|
} else {
|
||||||
|
file, err = sgzip.NewReader(cr)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var fileReader io.Reader
|
||||||
|
if limit != -1 {
|
||||||
|
fileReader = io.LimitReader(file, limit)
|
||||||
|
} else {
|
||||||
|
fileReader = file
|
||||||
|
}
|
||||||
|
// Return a ReadCloser
|
||||||
|
return ReadCloserWrapper{Reader: fileReader, Closer: closer}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// processFileNameGetFileExtension returns the file extension for the given compression mode
|
||||||
|
func (g *gzipModeHandler) processFileNameGetFileExtension(compressionMode int) string {
|
||||||
|
if compressionMode == Gzip {
|
||||||
|
return gzFileExt
|
||||||
|
}
|
||||||
|
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// putCompress compresses the input data and uploads it to the remote, returning the new object and its metadata
|
||||||
|
func (g *gzipModeHandler) putCompress(
|
||||||
|
ctx context.Context,
|
||||||
|
f *Fs,
|
||||||
|
in io.Reader,
|
||||||
|
src fs.ObjectInfo,
|
||||||
|
options []fs.OpenOption,
|
||||||
|
mimeType string,
|
||||||
|
) (fs.Object, *ObjectMetadata, error) {
|
||||||
|
// Unwrap reader accounting
|
||||||
|
in, wrap := accounting.UnWrap(in)
|
||||||
|
|
||||||
|
// Add the metadata hasher
|
||||||
|
metaHasher := md5.New()
|
||||||
|
in = io.TeeReader(in, metaHasher)
|
||||||
|
|
||||||
|
// Compress the file
|
||||||
|
pipeReader, pipeWriter := io.Pipe()
|
||||||
|
|
||||||
|
resultsGzip := make(chan compressionResult[sgzip.GzipMetadata])
|
||||||
|
go func() {
|
||||||
|
gz, err := sgzip.NewWriterLevel(pipeWriter, f.opt.CompressionLevel)
|
||||||
|
if err != nil {
|
||||||
|
resultsGzip <- compressionResult[sgzip.GzipMetadata]{err: err, meta: sgzip.GzipMetadata{}}
|
||||||
|
close(resultsGzip)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
_, err = io.Copy(gz, in)
|
||||||
|
gzErr := gz.Close()
|
||||||
|
if gzErr != nil && err == nil {
|
||||||
|
err = gzErr
|
||||||
|
}
|
||||||
|
closeErr := pipeWriter.Close()
|
||||||
|
if closeErr != nil && err == nil {
|
||||||
|
err = closeErr
|
||||||
|
}
|
||||||
|
resultsGzip <- compressionResult[sgzip.GzipMetadata]{err: err, meta: gz.MetaData()}
|
||||||
|
close(resultsGzip)
|
||||||
|
}()
|
||||||
|
|
||||||
|
wrappedIn := wrap(bufio.NewReaderSize(pipeReader, bufferSize)) // Probably no longer needed as sgzip has it's own buffering
|
||||||
|
|
||||||
|
// Find a hash the destination supports to compute a hash of
|
||||||
|
// the compressed data.
|
||||||
|
ht := f.Fs.Hashes().GetOne()
|
||||||
|
var hasher *hash.MultiHasher
|
||||||
|
var err error
|
||||||
|
if ht != hash.None {
|
||||||
|
// unwrap the accounting again
|
||||||
|
wrappedIn, wrap = accounting.UnWrap(wrappedIn)
|
||||||
|
hasher, err = hash.NewMultiHasherTypes(hash.NewHashSet(ht))
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
// add the hasher and re-wrap the accounting
|
||||||
|
wrappedIn = io.TeeReader(wrappedIn, hasher)
|
||||||
|
wrappedIn = wrap(wrappedIn)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Transfer the data
|
||||||
|
o, err := f.rcat(ctx, makeDataName(src.Remote(), src.Size(), f.mode), io.NopCloser(wrappedIn), src.ModTime(ctx), options)
|
||||||
|
if err != nil {
|
||||||
|
if o != nil {
|
||||||
|
if removeErr := o.Remove(ctx); removeErr != nil {
|
||||||
|
fs.Errorf(o, "Failed to remove partially transferred object: %v", removeErr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
// Check whether we got an error during compression
|
||||||
|
result := <-resultsGzip
|
||||||
|
if result.err != nil {
|
||||||
|
if o != nil {
|
||||||
|
if removeErr := o.Remove(ctx); removeErr != nil {
|
||||||
|
fs.Errorf(o, "Failed to remove partially compressed object: %v", removeErr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, nil, result.err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate metadata
|
||||||
|
meta := g.newMetadata(result.meta.Size, f.mode, result.meta, hex.EncodeToString(metaHasher.Sum(nil)), mimeType)
|
||||||
|
|
||||||
|
// Check the hashes of the compressed data if we were comparing them
|
||||||
|
if ht != hash.None && hasher != nil {
|
||||||
|
err = f.verifyObjectHash(ctx, o, hasher, ht)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return o, meta, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// putUncompressGetNewMetadata returns metadata in the putUncompress method for a specific compression algorithm
|
||||||
|
func (g *gzipModeHandler) putUncompressGetNewMetadata(o fs.Object, mode int, md5 string, mimeType string, sum []byte) (fs.Object, *ObjectMetadata, error) {
|
||||||
|
return o, g.newMetadata(o.Size(), mode, sgzip.GzipMetadata{}, hex.EncodeToString(sum), mimeType), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function generates a metadata object for sgzip.GzipMetadata or SzstdMetadata.
|
||||||
|
// Warning: This function panics if cmeta is not of the expected type.
|
||||||
|
func (g *gzipModeHandler) newMetadata(size int64, mode int, cmeta any, md5 string, mimeType string) *ObjectMetadata {
|
||||||
|
meta, ok := cmeta.(sgzip.GzipMetadata)
|
||||||
|
if !ok {
|
||||||
|
panic("invalid cmeta type: expected sgzip.GzipMetadata")
|
||||||
|
}
|
||||||
|
|
||||||
|
objMeta := new(ObjectMetadata)
|
||||||
|
objMeta.Size = size
|
||||||
|
objMeta.Mode = mode
|
||||||
|
objMeta.CompressionMetadataGzip = &meta
|
||||||
|
objMeta.CompressionMetadataZstd = nil
|
||||||
|
objMeta.MD5 = md5
|
||||||
|
objMeta.MimeType = mimeType
|
||||||
|
|
||||||
|
return objMeta
|
||||||
|
}
|
||||||
327
backend/compress/szstd_helper.go
Normal file
327
backend/compress/szstd_helper.go
Normal file
@@ -0,0 +1,327 @@
|
|||||||
|
package compress
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"io"
|
||||||
|
"runtime"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
szstd "github.com/a1ex3/zstd-seekable-format-go/pkg"
|
||||||
|
"github.com/klauspost/compress/zstd"
|
||||||
|
)
|
||||||
|
|
||||||
|
const szstdChunkSize int = 1 << 20 // 1 MiB chunk size
|
||||||
|
|
||||||
|
// SzstdMetadata holds metadata for szstd compressed files.
|
||||||
|
type SzstdMetadata struct {
|
||||||
|
BlockSize int // BlockSize is the size of the blocks in the zstd file
|
||||||
|
Size int64 // Size is the uncompressed size of the file
|
||||||
|
BlockData []uint32 // BlockData is the block data for the zstd file, used for seeking
|
||||||
|
}
|
||||||
|
|
||||||
|
// SzstdWriter is a writer that compresses data in szstd format.
|
||||||
|
type SzstdWriter struct {
|
||||||
|
enc *zstd.Encoder
|
||||||
|
w szstd.ConcurrentWriter
|
||||||
|
metadata SzstdMetadata
|
||||||
|
mu sync.Mutex
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewWriterSzstd creates a new szstd writer with the specified options.
|
||||||
|
// It initializes the szstd writer with a zstd encoder and returns a pointer to the SzstdWriter.
|
||||||
|
// The writer can be used to write data in chunks, and it will automatically handle block sizes and metadata.
|
||||||
|
func NewWriterSzstd(w io.Writer, opts ...zstd.EOption) (*SzstdWriter, error) {
|
||||||
|
encoder, err := zstd.NewWriter(nil, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
sw, err := szstd.NewWriter(w, encoder)
|
||||||
|
if err != nil {
|
||||||
|
if err := encoder.Close(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &SzstdWriter{
|
||||||
|
enc: encoder,
|
||||||
|
w: sw,
|
||||||
|
metadata: SzstdMetadata{
|
||||||
|
BlockSize: szstdChunkSize,
|
||||||
|
Size: 0,
|
||||||
|
},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write writes data to the szstd writer in chunks of szstdChunkSize.
|
||||||
|
// It handles the block size and metadata updates automatically.
|
||||||
|
func (w *SzstdWriter) Write(p []byte) (int, error) {
|
||||||
|
if len(p) == 0 {
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if w.metadata.BlockData == nil {
|
||||||
|
numBlocks := (len(p) + w.metadata.BlockSize - 1) / w.metadata.BlockSize
|
||||||
|
w.metadata.BlockData = make([]uint32, 1, numBlocks+1)
|
||||||
|
w.metadata.BlockData[0] = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
start := 0
|
||||||
|
total := len(p)
|
||||||
|
|
||||||
|
var writerFunc szstd.FrameSource = func() ([]byte, error) {
|
||||||
|
if start >= total {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
end := min(start+w.metadata.BlockSize, total)
|
||||||
|
chunk := p[start:end]
|
||||||
|
size := end - start
|
||||||
|
|
||||||
|
w.mu.Lock()
|
||||||
|
w.metadata.Size += int64(size)
|
||||||
|
w.mu.Unlock()
|
||||||
|
|
||||||
|
start = end
|
||||||
|
return chunk, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// write sizes of compressed blocks in the callback
|
||||||
|
err := w.w.WriteMany(context.Background(), writerFunc,
|
||||||
|
szstd.WithWriteCallback(func(size uint32) {
|
||||||
|
w.mu.Lock()
|
||||||
|
lastOffset := w.metadata.BlockData[len(w.metadata.BlockData)-1]
|
||||||
|
w.metadata.BlockData = append(w.metadata.BlockData, lastOffset+size)
|
||||||
|
w.mu.Unlock()
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return total, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close closes the SzstdWriter and its underlying encoder.
|
||||||
|
func (w *SzstdWriter) Close() error {
|
||||||
|
if err := w.w.Close(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := w.enc.Close(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetMetadata returns the metadata of the szstd writer.
|
||||||
|
func (w *SzstdWriter) GetMetadata() SzstdMetadata {
|
||||||
|
return w.metadata
|
||||||
|
}
|
||||||
|
|
||||||
|
// SzstdReaderAt is a reader that allows random access in szstd compressed data.
|
||||||
|
type SzstdReaderAt struct {
|
||||||
|
r szstd.Reader
|
||||||
|
decoder *zstd.Decoder
|
||||||
|
metadata *SzstdMetadata
|
||||||
|
pos int64
|
||||||
|
mu sync.Mutex
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewReaderAtSzstd creates a new SzstdReaderAt at the specified io.ReadSeeker.
|
||||||
|
func NewReaderAtSzstd(rs io.ReadSeeker, meta *SzstdMetadata, offset int64, opts ...zstd.DOption) (*SzstdReaderAt, error) {
|
||||||
|
decoder, err := zstd.NewReader(nil, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
r, err := szstd.NewReader(rs, decoder)
|
||||||
|
if err != nil {
|
||||||
|
decoder.Close()
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
sr := &SzstdReaderAt{
|
||||||
|
r: r,
|
||||||
|
decoder: decoder,
|
||||||
|
metadata: meta,
|
||||||
|
pos: 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set initial position to the provided offset
|
||||||
|
if _, err := sr.Seek(offset, io.SeekStart); err != nil {
|
||||||
|
if err := sr.Close(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return sr, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Seek sets the offset for the next Read.
|
||||||
|
func (s *SzstdReaderAt) Seek(offset int64, whence int) (int64, error) {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
|
pos, err := s.r.Seek(offset, whence)
|
||||||
|
if err == nil {
|
||||||
|
s.pos = pos
|
||||||
|
}
|
||||||
|
return pos, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *SzstdReaderAt) Read(p []byte) (int, error) {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
|
n, err := s.r.Read(p)
|
||||||
|
if err == nil {
|
||||||
|
s.pos += int64(n)
|
||||||
|
}
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// ReadAt reads data at the specified offset.
|
||||||
|
func (s *SzstdReaderAt) ReadAt(p []byte, off int64) (int, error) {
|
||||||
|
if off < 0 {
|
||||||
|
return 0, errors.New("invalid offset")
|
||||||
|
}
|
||||||
|
if off >= s.metadata.Size {
|
||||||
|
return 0, io.EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
endOff := min(off+int64(len(p)), s.metadata.Size)
|
||||||
|
|
||||||
|
// Find all blocks covered by the range
|
||||||
|
type blockInfo struct {
|
||||||
|
index int // Block index
|
||||||
|
offsetInBlock int64 // Offset within the block for starting reading
|
||||||
|
bytesToRead int64 // How many bytes to read from this block
|
||||||
|
}
|
||||||
|
|
||||||
|
var blocks []blockInfo
|
||||||
|
uncompressedOffset := int64(0)
|
||||||
|
currentOff := off
|
||||||
|
|
||||||
|
for i := 0; i < len(s.metadata.BlockData)-1; i++ {
|
||||||
|
blockUncompressedEnd := min(uncompressedOffset+int64(s.metadata.BlockSize), s.metadata.Size)
|
||||||
|
|
||||||
|
if currentOff < blockUncompressedEnd && endOff > uncompressedOffset {
|
||||||
|
offsetInBlock := max(0, currentOff-uncompressedOffset)
|
||||||
|
bytesToRead := min(blockUncompressedEnd-uncompressedOffset-offsetInBlock, endOff-currentOff)
|
||||||
|
|
||||||
|
blocks = append(blocks, blockInfo{
|
||||||
|
index: i,
|
||||||
|
offsetInBlock: offsetInBlock,
|
||||||
|
bytesToRead: bytesToRead,
|
||||||
|
})
|
||||||
|
|
||||||
|
currentOff += bytesToRead
|
||||||
|
if currentOff >= endOff {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
uncompressedOffset = blockUncompressedEnd
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(blocks) == 0 {
|
||||||
|
return 0, io.EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parallel block decoding
|
||||||
|
type decodeResult struct {
|
||||||
|
index int
|
||||||
|
data []byte
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
|
||||||
|
resultCh := make(chan decodeResult, len(blocks))
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
sem := make(chan struct{}, runtime.NumCPU())
|
||||||
|
|
||||||
|
for _, block := range blocks {
|
||||||
|
wg.Add(1)
|
||||||
|
go func(block blockInfo) {
|
||||||
|
defer wg.Done()
|
||||||
|
sem <- struct{}{}
|
||||||
|
defer func() { <-sem }()
|
||||||
|
|
||||||
|
startOffset := int64(s.metadata.BlockData[block.index])
|
||||||
|
endOffset := int64(s.metadata.BlockData[block.index+1])
|
||||||
|
compressedSize := endOffset - startOffset
|
||||||
|
|
||||||
|
compressed := make([]byte, compressedSize)
|
||||||
|
_, err := s.r.ReadAt(compressed, startOffset)
|
||||||
|
if err != nil && err != io.EOF {
|
||||||
|
resultCh <- decodeResult{index: block.index, err: err}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
decoded, err := s.decoder.DecodeAll(compressed, nil)
|
||||||
|
if err != nil {
|
||||||
|
resultCh <- decodeResult{index: block.index, err: err}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
resultCh <- decodeResult{index: block.index, data: decoded, err: nil}
|
||||||
|
}(block)
|
||||||
|
}
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
wg.Wait()
|
||||||
|
close(resultCh)
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Collect results in block index order
|
||||||
|
totalRead := 0
|
||||||
|
results := make(map[int]decodeResult)
|
||||||
|
expected := len(blocks)
|
||||||
|
minIndex := blocks[0].index
|
||||||
|
|
||||||
|
for res := range resultCh {
|
||||||
|
results[res.index] = res
|
||||||
|
for {
|
||||||
|
if result, ok := results[minIndex]; ok {
|
||||||
|
if result.err != nil {
|
||||||
|
return 0, result.err
|
||||||
|
}
|
||||||
|
// find the corresponding blockInfo
|
||||||
|
var blk blockInfo
|
||||||
|
for _, b := range blocks {
|
||||||
|
if b.index == result.index {
|
||||||
|
blk = b
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
start := blk.offsetInBlock
|
||||||
|
end := start + blk.bytesToRead
|
||||||
|
copy(p[totalRead:totalRead+int(blk.bytesToRead)], result.data[start:end])
|
||||||
|
totalRead += int(blk.bytesToRead)
|
||||||
|
minIndex++
|
||||||
|
if minIndex-blocks[0].index >= len(blocks) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(results) == expected && minIndex-blocks[0].index >= len(blocks) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return totalRead, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close closes the SzstdReaderAt and underlying decoder.
|
||||||
|
func (s *SzstdReaderAt) Close() error {
|
||||||
|
if err := s.r.Close(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
s.decoder.Close()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
65
backend/compress/uncompressed_handler.go
Normal file
65
backend/compress/uncompressed_handler.go
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
package compress
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
|
||||||
|
"github.com/rclone/rclone/fs"
|
||||||
|
"github.com/rclone/rclone/fs/chunkedreader"
|
||||||
|
)
|
||||||
|
|
||||||
|
// uncompressedModeHandler implements compressionModeHandler for uncompressed files
|
||||||
|
type uncompressedModeHandler struct{}
|
||||||
|
|
||||||
|
// isCompressible checks the compression ratio of the provided data and returns true if the ratio exceeds
|
||||||
|
// the configured threshold
|
||||||
|
func (u *uncompressedModeHandler) isCompressible(r io.Reader, compressionMode int) (bool, error) {
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// newObjectGetOriginalSize returns the original file size from the metadata
|
||||||
|
func (u *uncompressedModeHandler) newObjectGetOriginalSize(meta *ObjectMetadata) (int64, error) {
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// openGetReadCloser opens a compressed object and returns a ReadCloser in the Open method
|
||||||
|
func (u *uncompressedModeHandler) openGetReadCloser(
|
||||||
|
ctx context.Context,
|
||||||
|
o *Object,
|
||||||
|
offset int64,
|
||||||
|
limit int64,
|
||||||
|
cr chunkedreader.ChunkedReader,
|
||||||
|
closer io.Closer,
|
||||||
|
options ...fs.OpenOption,
|
||||||
|
) (rc io.ReadCloser, err error) {
|
||||||
|
return o.Object.Open(ctx, options...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// processFileNameGetFileExtension returns the file extension for the given compression mode
|
||||||
|
func (u *uncompressedModeHandler) processFileNameGetFileExtension(compressionMode int) string {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// putCompress compresses the input data and uploads it to the remote, returning the new object and its metadata
|
||||||
|
func (u *uncompressedModeHandler) putCompress(
|
||||||
|
ctx context.Context,
|
||||||
|
f *Fs,
|
||||||
|
in io.Reader,
|
||||||
|
src fs.ObjectInfo,
|
||||||
|
options []fs.OpenOption,
|
||||||
|
mimeType string,
|
||||||
|
) (fs.Object, *ObjectMetadata, error) {
|
||||||
|
return nil, nil, fmt.Errorf("unsupported compression mode %d", f.mode)
|
||||||
|
}
|
||||||
|
|
||||||
|
// putUncompressGetNewMetadata returns metadata in the putUncompress method for a specific compression algorithm
|
||||||
|
func (u *uncompressedModeHandler) putUncompressGetNewMetadata(o fs.Object, mode int, md5 string, mimeType string, sum []byte) (fs.Object, *ObjectMetadata, error) {
|
||||||
|
return nil, nil, fmt.Errorf("unsupported compression mode %d", Uncompressed)
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function generates a metadata object for sgzip.GzipMetadata or SzstdMetadata.
|
||||||
|
// Warning: This function panics if cmeta is not of the expected type.
|
||||||
|
func (u *uncompressedModeHandler) newMetadata(size int64, mode int, cmeta any, md5 string, mimeType string) *ObjectMetadata {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
65
backend/compress/unknown_handler.go
Normal file
65
backend/compress/unknown_handler.go
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
package compress
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
|
||||||
|
"github.com/rclone/rclone/fs"
|
||||||
|
"github.com/rclone/rclone/fs/chunkedreader"
|
||||||
|
)
|
||||||
|
|
||||||
|
// unknownModeHandler implements compressionModeHandler for unknown compression types
|
||||||
|
type unknownModeHandler struct{}
|
||||||
|
|
||||||
|
// isCompressible checks the compression ratio of the provided data and returns true if the ratio exceeds
|
||||||
|
// the configured threshold
|
||||||
|
func (unk *unknownModeHandler) isCompressible(r io.Reader, compressionMode int) (bool, error) {
|
||||||
|
return false, fmt.Errorf("unknown compression mode %d", compressionMode)
|
||||||
|
}
|
||||||
|
|
||||||
|
// newObjectGetOriginalSize returns the original file size from the metadata
|
||||||
|
func (unk *unknownModeHandler) newObjectGetOriginalSize(meta *ObjectMetadata) (int64, error) {
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// openGetReadCloser opens a compressed object and returns a ReadCloser in the Open method
|
||||||
|
func (unk *unknownModeHandler) openGetReadCloser(
|
||||||
|
ctx context.Context,
|
||||||
|
o *Object,
|
||||||
|
offset int64,
|
||||||
|
limit int64,
|
||||||
|
cr chunkedreader.ChunkedReader,
|
||||||
|
closer io.Closer,
|
||||||
|
options ...fs.OpenOption,
|
||||||
|
) (rc io.ReadCloser, err error) {
|
||||||
|
return nil, fmt.Errorf("unknown compression mode %d", o.meta.Mode)
|
||||||
|
}
|
||||||
|
|
||||||
|
// processFileNameGetFileExtension returns the file extension for the given compression mode
|
||||||
|
func (unk *unknownModeHandler) processFileNameGetFileExtension(compressionMode int) string {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// putCompress compresses the input data and uploads it to the remote, returning the new object and its metadata
|
||||||
|
func (unk *unknownModeHandler) putCompress(
|
||||||
|
ctx context.Context,
|
||||||
|
f *Fs,
|
||||||
|
in io.Reader,
|
||||||
|
src fs.ObjectInfo,
|
||||||
|
options []fs.OpenOption,
|
||||||
|
mimeType string,
|
||||||
|
) (fs.Object, *ObjectMetadata, error) {
|
||||||
|
return nil, nil, fmt.Errorf("unknown compression mode %d", f.mode)
|
||||||
|
}
|
||||||
|
|
||||||
|
// putUncompressGetNewMetadata returns metadata in the putUncompress method for a specific compression algorithm
|
||||||
|
func (unk *unknownModeHandler) putUncompressGetNewMetadata(o fs.Object, mode int, md5 string, mimeType string, sum []byte) (fs.Object, *ObjectMetadata, error) {
|
||||||
|
return nil, nil, fmt.Errorf("unknown compression mode")
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function generates a metadata object for sgzip.GzipMetadata or SzstdMetadata.
|
||||||
|
// Warning: This function panics if cmeta is not of the expected type.
|
||||||
|
func (unk *unknownModeHandler) newMetadata(size int64, mode int, cmeta any, md5 string, mimeType string) *ObjectMetadata {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
192
backend/compress/zstd_handler.go
Normal file
192
backend/compress/zstd_handler.go
Normal file
@@ -0,0 +1,192 @@
|
|||||||
|
package compress
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"crypto/md5"
|
||||||
|
"encoding/hex"
|
||||||
|
"errors"
|
||||||
|
"io"
|
||||||
|
|
||||||
|
"github.com/klauspost/compress/zstd"
|
||||||
|
|
||||||
|
"github.com/rclone/rclone/fs"
|
||||||
|
"github.com/rclone/rclone/fs/accounting"
|
||||||
|
"github.com/rclone/rclone/fs/chunkedreader"
|
||||||
|
"github.com/rclone/rclone/fs/hash"
|
||||||
|
)
|
||||||
|
|
||||||
|
// zstdModeHandler implements compressionModeHandler for zstd
|
||||||
|
type zstdModeHandler struct{}
|
||||||
|
|
||||||
|
// isCompressible checks the compression ratio of the provided data and returns true if the ratio exceeds
|
||||||
|
// the configured threshold
|
||||||
|
func (z *zstdModeHandler) isCompressible(r io.Reader, compressionMode int) (bool, error) {
|
||||||
|
var b bytes.Buffer
|
||||||
|
var n int64
|
||||||
|
w, err := NewWriterSzstd(&b, zstd.WithEncoderLevel(zstd.SpeedDefault))
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
n, err = io.Copy(w, r)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
err = w.Close()
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
ratio := float64(n) / float64(b.Len())
|
||||||
|
return ratio > minCompressionRatio, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// newObjectGetOriginalSize returns the original file size from the metadata
|
||||||
|
func (z *zstdModeHandler) newObjectGetOriginalSize(meta *ObjectMetadata) (int64, error) {
|
||||||
|
if meta.CompressionMetadataZstd == nil {
|
||||||
|
return 0, errors.New("missing zstd metadata")
|
||||||
|
}
|
||||||
|
return meta.CompressionMetadataZstd.Size, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// openGetReadCloser opens a compressed object and returns a ReadCloser in the Open method
|
||||||
|
func (z *zstdModeHandler) openGetReadCloser(
|
||||||
|
ctx context.Context,
|
||||||
|
o *Object,
|
||||||
|
offset int64,
|
||||||
|
limit int64,
|
||||||
|
cr chunkedreader.ChunkedReader,
|
||||||
|
closer io.Closer,
|
||||||
|
options ...fs.OpenOption,
|
||||||
|
) (rc io.ReadCloser, err error) {
|
||||||
|
var file io.Reader
|
||||||
|
|
||||||
|
if offset != 0 {
|
||||||
|
file, err = NewReaderAtSzstd(cr, o.meta.CompressionMetadataZstd, offset)
|
||||||
|
} else {
|
||||||
|
file, err = zstd.NewReader(cr)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var fileReader io.Reader
|
||||||
|
if limit != -1 {
|
||||||
|
fileReader = io.LimitReader(file, limit)
|
||||||
|
} else {
|
||||||
|
fileReader = file
|
||||||
|
}
|
||||||
|
// Return a ReadCloser
|
||||||
|
return ReadCloserWrapper{Reader: fileReader, Closer: closer}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// processFileNameGetFileExtension returns the file extension for the given compression mode
|
||||||
|
func (z *zstdModeHandler) processFileNameGetFileExtension(compressionMode int) string {
|
||||||
|
if compressionMode == Zstd {
|
||||||
|
return zstdFileExt
|
||||||
|
}
|
||||||
|
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// putCompress compresses the input data and uploads it to the remote, returning the new object and its metadata
|
||||||
|
func (z *zstdModeHandler) putCompress(
|
||||||
|
ctx context.Context,
|
||||||
|
f *Fs,
|
||||||
|
in io.Reader,
|
||||||
|
src fs.ObjectInfo,
|
||||||
|
options []fs.OpenOption,
|
||||||
|
mimeType string,
|
||||||
|
) (fs.Object, *ObjectMetadata, error) {
|
||||||
|
// Unwrap reader accounting
|
||||||
|
in, wrap := accounting.UnWrap(in)
|
||||||
|
|
||||||
|
// Add the metadata hasher
|
||||||
|
metaHasher := md5.New()
|
||||||
|
in = io.TeeReader(in, metaHasher)
|
||||||
|
|
||||||
|
// Compress the file
|
||||||
|
pipeReader, pipeWriter := io.Pipe()
|
||||||
|
|
||||||
|
resultsZstd := make(chan compressionResult[SzstdMetadata])
|
||||||
|
go func() {
|
||||||
|
writer, err := NewWriterSzstd(pipeWriter, zstd.WithEncoderLevel(zstd.EncoderLevel(f.opt.CompressionLevel)))
|
||||||
|
if err != nil {
|
||||||
|
resultsZstd <- compressionResult[SzstdMetadata]{err: err}
|
||||||
|
close(resultsZstd)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
_, err = io.Copy(writer, in)
|
||||||
|
if wErr := writer.Close(); wErr != nil && err == nil {
|
||||||
|
err = wErr
|
||||||
|
}
|
||||||
|
if cErr := pipeWriter.Close(); cErr != nil && err == nil {
|
||||||
|
err = cErr
|
||||||
|
}
|
||||||
|
|
||||||
|
resultsZstd <- compressionResult[SzstdMetadata]{err: err, meta: writer.GetMetadata()}
|
||||||
|
close(resultsZstd)
|
||||||
|
}()
|
||||||
|
|
||||||
|
wrappedIn := wrap(bufio.NewReaderSize(pipeReader, bufferSize))
|
||||||
|
|
||||||
|
ht := f.Fs.Hashes().GetOne()
|
||||||
|
var hasher *hash.MultiHasher
|
||||||
|
var err error
|
||||||
|
if ht != hash.None {
|
||||||
|
wrappedIn, wrap = accounting.UnWrap(wrappedIn)
|
||||||
|
hasher, err = hash.NewMultiHasherTypes(hash.NewHashSet(ht))
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
wrappedIn = io.TeeReader(wrappedIn, hasher)
|
||||||
|
wrappedIn = wrap(wrappedIn)
|
||||||
|
}
|
||||||
|
|
||||||
|
o, err := f.rcat(ctx, makeDataName(src.Remote(), src.Size(), f.mode), io.NopCloser(wrappedIn), src.ModTime(ctx), options)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
result := <-resultsZstd
|
||||||
|
if result.err != nil {
|
||||||
|
if o != nil {
|
||||||
|
_ = o.Remove(ctx)
|
||||||
|
}
|
||||||
|
return nil, nil, result.err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build metadata using uncompressed size for filename
|
||||||
|
meta := z.newMetadata(result.meta.Size, f.mode, result.meta, hex.EncodeToString(metaHasher.Sum(nil)), mimeType)
|
||||||
|
if ht != hash.None && hasher != nil {
|
||||||
|
err = f.verifyObjectHash(ctx, o, hasher, ht)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return o, meta, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// putUncompressGetNewMetadata returns metadata in the putUncompress method for a specific compression algorithm
|
||||||
|
func (z *zstdModeHandler) putUncompressGetNewMetadata(o fs.Object, mode int, md5 string, mimeType string, sum []byte) (fs.Object, *ObjectMetadata, error) {
|
||||||
|
return o, z.newMetadata(o.Size(), mode, SzstdMetadata{}, hex.EncodeToString(sum), mimeType), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function generates a metadata object for sgzip.GzipMetadata or SzstdMetadata.
|
||||||
|
// Warning: This function panics if cmeta is not of the expected type.
|
||||||
|
func (z *zstdModeHandler) newMetadata(size int64, mode int, cmeta any, md5 string, mimeType string) *ObjectMetadata {
|
||||||
|
meta, ok := cmeta.(SzstdMetadata)
|
||||||
|
if !ok {
|
||||||
|
panic("invalid cmeta type: expected SzstdMetadata")
|
||||||
|
}
|
||||||
|
|
||||||
|
objMeta := new(ObjectMetadata)
|
||||||
|
objMeta.Size = size
|
||||||
|
objMeta.Mode = mode
|
||||||
|
objMeta.CompressionMetadataGzip = nil
|
||||||
|
objMeta.CompressionMetadataZstd = &meta
|
||||||
|
objMeta.MD5 = md5
|
||||||
|
objMeta.MimeType = mimeType
|
||||||
|
|
||||||
|
return objMeta
|
||||||
|
}
|
||||||
@@ -23,6 +23,7 @@ To use this remote, all you need to do is specify another remote and a
|
|||||||
compression mode to use:
|
compression mode to use:
|
||||||
|
|
||||||
```text
|
```text
|
||||||
|
$ rclone config
|
||||||
Current remotes:
|
Current remotes:
|
||||||
|
|
||||||
Name Type
|
Name Type
|
||||||
@@ -30,7 +31,6 @@ Name Type
|
|||||||
remote_to_press sometype
|
remote_to_press sometype
|
||||||
|
|
||||||
e) Edit existing remote
|
e) Edit existing remote
|
||||||
$ rclone config
|
|
||||||
n) New remote
|
n) New remote
|
||||||
d) Delete remote
|
d) Delete remote
|
||||||
r) Rename remote
|
r) Rename remote
|
||||||
@@ -39,45 +39,74 @@ s) Set configuration password
|
|||||||
q) Quit config
|
q) Quit config
|
||||||
e/n/d/r/c/s/q> n
|
e/n/d/r/c/s/q> n
|
||||||
name> compress
|
name> compress
|
||||||
|
|
||||||
|
Option Storage.
|
||||||
|
Type of storage to configure.
|
||||||
|
Choose a number from below, or type in your own value.
|
||||||
...
|
...
|
||||||
8 / Compress a remote
|
12 / Compress a remote
|
||||||
\ "compress"
|
\ (compress)
|
||||||
...
|
...
|
||||||
Storage> compress
|
Storage> compress
|
||||||
** See help for compress backend at: https://rclone.org/compress/ **
|
|
||||||
|
|
||||||
|
Option remote.
|
||||||
Remote to compress.
|
Remote to compress.
|
||||||
Enter a string value. Press Enter for the default ("").
|
Enter a value.
|
||||||
remote> remote_to_press:subdir
|
remote> remote_to_press:subdir
|
||||||
|
|
||||||
|
Option mode.
|
||||||
Compression mode.
|
Compression mode.
|
||||||
Enter a string value. Press Enter for the default ("gzip").
|
Choose a number from below, or type in your own value of type string.
|
||||||
Choose a number from below, or type in your own value
|
Press Enter for the default (gzip).
|
||||||
1 / Gzip compression balanced for speed and compression strength.
|
1 / Standard gzip compression with fastest parameters.
|
||||||
\ "gzip"
|
\ (gzip)
|
||||||
compression_mode> gzip
|
2 / Zstandard compression — fast modern algorithm offering adjustable speed-to-compression tradeoffs.
|
||||||
Edit advanced config? (y/n)
|
\ (zstd)
|
||||||
|
mode> gzip
|
||||||
|
|
||||||
|
Option level.
|
||||||
|
GZIP (levels -2 to 9):
|
||||||
|
- -2 — Huffman encoding only. Only use if you know what you're doing.
|
||||||
|
- -1 (default) — recommended; equivalent to level 5.
|
||||||
|
- 0 — turns off compression.
|
||||||
|
- 1–9 — increase compression at the cost of speed. Going past 6 generally offers very little return.
|
||||||
|
|
||||||
|
ZSTD (levels 0 to 4):
|
||||||
|
- 0 — turns off compression entirely.
|
||||||
|
- 1 — fastest compression with the lowest ratio.
|
||||||
|
- 2 (default) — good balance of speed and compression.
|
||||||
|
- 3 — better compression, but uses about 2–3x more CPU than the default.
|
||||||
|
- 4 — best possible compression ratio (highest CPU cost).
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
- Choose GZIP for wide compatibility; ZSTD for better speed/ratio tradeoffs.
|
||||||
|
- Negative gzip levels: -2 = Huffman-only, -1 = default (≈ level 5).
|
||||||
|
Enter a value.
|
||||||
|
level> -1
|
||||||
|
|
||||||
|
Edit advanced config?
|
||||||
y) Yes
|
y) Yes
|
||||||
n) No (default)
|
n) No (default)
|
||||||
y/n> n
|
y/n> n
|
||||||
Remote config
|
|
||||||
--------------------
|
Configuration complete.
|
||||||
[compress]
|
Options:
|
||||||
type = compress
|
- type: compress
|
||||||
remote = remote_to_press:subdir
|
- remote: remote_to_press:subdir
|
||||||
compression_mode = gzip
|
- mode: gzip
|
||||||
--------------------
|
- level: -1
|
||||||
|
Keep this "compress" remote?
|
||||||
y) Yes this is OK (default)
|
y) Yes this is OK (default)
|
||||||
e) Edit this remote
|
e) Edit this remote
|
||||||
d) Delete this remote
|
d) Delete this remote
|
||||||
y/e/d> y
|
y/e/d> y
|
||||||
```
|
```
|
||||||
|
|
||||||
### Compression Modes
|
### Compression Algorithms
|
||||||
|
|
||||||
Currently only gzip compression is supported. It provides a decent balance
|
- **GZIP** – a well-established and widely adopted algorithm that strikes a solid balance between compression speed and ratio. It supports compression levels from -2 to 9, with the default -1 (roughly equivalent to level 5) offering an effective middle ground for most scenarios.
|
||||||
between speed and size and is well supported by other applications. Compression
|
|
||||||
strength can further be configured via an advanced setting where 0 is no
|
- **Zstandard (zstd)** – a modern, high-performance algorithm that offers precise control over the trade-off between speed and compression efficiency. Compression levels range from 0 (no compression) to 4 (maximum compression).
|
||||||
compression and 9 is strongest compression.
|
|
||||||
|
|
||||||
### File types
|
### File types
|
||||||
|
|
||||||
@@ -124,29 +153,38 @@ Properties:
|
|||||||
- Examples:
|
- Examples:
|
||||||
- "gzip"
|
- "gzip"
|
||||||
- Standard gzip compression with fastest parameters.
|
- Standard gzip compression with fastest parameters.
|
||||||
|
- "zstd"
|
||||||
### Advanced options
|
- Zstandard compression — fast modern algorithm offering adjustable speed-to-compression tradeoffs.
|
||||||
|
|
||||||
Here are the Advanced options specific to compress (Compress a remote).
|
|
||||||
|
|
||||||
#### --compress-level
|
#### --compress-level
|
||||||
|
|
||||||
GZIP compression level (-2 to 9).
|
GZIP (levels -2 to 9):
|
||||||
|
- -2 — Huffman encoding only. Only use if you know what you're doing.
|
||||||
Generally -1 (default, equivalent to 5) is recommended.
|
- -1 (default) — recommended; equivalent to level 5.
|
||||||
Levels 1 to 9 increase compression at the cost of speed. Going past 6
|
- 0 — turns off compression.
|
||||||
generally offers very little return.
|
- 1–9 — increase compression at the cost of speed. Going past 6 generally offers very little return.
|
||||||
|
|
||||||
Level -2 uses Huffman encoding only. Only use if you know what you
|
ZSTD (levels 0 to 4):
|
||||||
are doing.
|
- 0 — turns off compression entirely.
|
||||||
Level 0 turns off compression.
|
- 1 — fastest compression with the lowest ratio.
|
||||||
|
- 2 (default) — good balance of speed and compression.
|
||||||
|
- 3 — better compression, but uses about 2–3x more CPU than the default.
|
||||||
|
- 4 — best possible compression ratio (highest CPU cost).
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
- Choose GZIP for wide compatibility; ZSTD for better speed/ratio tradeoffs.
|
||||||
|
- Negative gzip levels: -2 = Huffman-only, -1 = default (≈ level 5).
|
||||||
|
|
||||||
Properties:
|
Properties:
|
||||||
|
|
||||||
- Config: level
|
- Config: level
|
||||||
- Env Var: RCLONE_COMPRESS_LEVEL
|
- Env Var: RCLONE_COMPRESS_LEVEL
|
||||||
- Type: int
|
- Type: string
|
||||||
- Default: -1
|
- Required: true
|
||||||
|
|
||||||
|
### Advanced options
|
||||||
|
|
||||||
|
Here are the Advanced options specific to compress (Compress a remote).
|
||||||
|
|
||||||
#### --compress-ram-cache-limit
|
#### --compress-ram-cache-limit
|
||||||
|
|
||||||
|
|||||||
@@ -133,6 +133,9 @@ backends:
|
|||||||
- backend: "compress"
|
- backend: "compress"
|
||||||
remote: "TestCompress:"
|
remote: "TestCompress:"
|
||||||
fastlist: false
|
fastlist: false
|
||||||
|
- backend: "compress"
|
||||||
|
remote: "TestCompressZstd:"
|
||||||
|
fastlist: false
|
||||||
# - backend: "compress"
|
# - backend: "compress"
|
||||||
# remote: "TestCompressSwift:"
|
# remote: "TestCompressSwift:"
|
||||||
# fastlist: false
|
# fastlist: false
|
||||||
|
|||||||
4
go.mod
4
go.mod
@@ -1,6 +1,6 @@
|
|||||||
module github.com/rclone/rclone
|
module github.com/rclone/rclone
|
||||||
|
|
||||||
go 1.24.0
|
go 1.24.4
|
||||||
|
|
||||||
require (
|
require (
|
||||||
bazil.org/fuse v0.0.0-20230120002735-62a210ff1fd5
|
bazil.org/fuse v0.0.0-20230120002735-62a210ff1fd5
|
||||||
@@ -11,6 +11,7 @@ require (
|
|||||||
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358
|
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358
|
||||||
github.com/Files-com/files-sdk-go/v3 v3.2.242
|
github.com/Files-com/files-sdk-go/v3 v3.2.242
|
||||||
github.com/Max-Sum/base32768 v0.0.0-20230304063302-18e6ce5945fd
|
github.com/Max-Sum/base32768 v0.0.0-20230304063302-18e6ce5945fd
|
||||||
|
github.com/a1ex3/zstd-seekable-format-go/pkg v0.10.0
|
||||||
github.com/a8m/tree v0.0.0-20240104212747-2c8764a5f17e
|
github.com/a8m/tree v0.0.0-20240104212747-2c8764a5f17e
|
||||||
github.com/aalpar/deheap v0.0.0-20210914013432-0cc84d79dec3
|
github.com/aalpar/deheap v0.0.0-20210914013432-0cc84d79dec3
|
||||||
github.com/abbot/go-http-auth v0.4.0
|
github.com/abbot/go-http-auth v0.4.0
|
||||||
@@ -173,6 +174,7 @@ require (
|
|||||||
github.com/gofrs/flock v0.12.1 // indirect
|
github.com/gofrs/flock v0.12.1 // indirect
|
||||||
github.com/gogo/protobuf v1.3.2 // indirect
|
github.com/gogo/protobuf v1.3.2 // indirect
|
||||||
github.com/golang-jwt/jwt/v5 v5.3.0 // indirect
|
github.com/golang-jwt/jwt/v5 v5.3.0 // indirect
|
||||||
|
github.com/google/btree v1.1.3 // indirect
|
||||||
github.com/google/s2a-go v0.1.9 // indirect
|
github.com/google/s2a-go v0.1.9 // indirect
|
||||||
github.com/googleapis/enterprise-certificate-proxy v0.3.6 // indirect
|
github.com/googleapis/enterprise-certificate-proxy v0.3.6 // indirect
|
||||||
github.com/googleapis/gax-go/v2 v2.15.0 // indirect
|
github.com/googleapis/gax-go/v2 v2.15.0 // indirect
|
||||||
|
|||||||
4
go.sum
4
go.sum
@@ -90,6 +90,8 @@ github.com/PuerkitoBio/goquery v1.10.3 h1:pFYcNSqHxBD06Fpj/KsbStFRsgRATgnf3LeXiU
|
|||||||
github.com/PuerkitoBio/goquery v1.10.3/go.mod h1:tMUX0zDMHXYlAQk6p35XxQMqMweEKB7iK7iLNd4RH4Y=
|
github.com/PuerkitoBio/goquery v1.10.3/go.mod h1:tMUX0zDMHXYlAQk6p35XxQMqMweEKB7iK7iLNd4RH4Y=
|
||||||
github.com/STARRY-S/zip v0.2.3 h1:luE4dMvRPDOWQdeDdUxUoZkzUIpTccdKdhHHsQJ1fm4=
|
github.com/STARRY-S/zip v0.2.3 h1:luE4dMvRPDOWQdeDdUxUoZkzUIpTccdKdhHHsQJ1fm4=
|
||||||
github.com/STARRY-S/zip v0.2.3/go.mod h1:lqJ9JdeRipyOQJrYSOtpNAiaesFO6zVDsE8GIGFaoSk=
|
github.com/STARRY-S/zip v0.2.3/go.mod h1:lqJ9JdeRipyOQJrYSOtpNAiaesFO6zVDsE8GIGFaoSk=
|
||||||
|
github.com/a1ex3/zstd-seekable-format-go/pkg v0.10.0 h1:iLDOF0rdGTrol/q8OfPIIs5kLD8XvA2q75o6Uq/tgak=
|
||||||
|
github.com/a1ex3/zstd-seekable-format-go/pkg v0.10.0/go.mod h1:DrEWcQJjz7t5iF2duaiyhg4jyoF0kxOD6LtECNGkZ/Q=
|
||||||
github.com/a8m/tree v0.0.0-20240104212747-2c8764a5f17e h1:KMVieI1/Ub++GYfnhyFPoGE3g5TUiG4srE3TMGr5nM4=
|
github.com/a8m/tree v0.0.0-20240104212747-2c8764a5f17e h1:KMVieI1/Ub++GYfnhyFPoGE3g5TUiG4srE3TMGr5nM4=
|
||||||
github.com/a8m/tree v0.0.0-20240104212747-2c8764a5f17e/go.mod h1:j5astEcUkZQX8lK+KKlQ3NRQ50f4EE8ZjyZpCz3mrH4=
|
github.com/a8m/tree v0.0.0-20240104212747-2c8764a5f17e/go.mod h1:j5astEcUkZQX8lK+KKlQ3NRQ50f4EE8ZjyZpCz3mrH4=
|
||||||
github.com/aalpar/deheap v0.0.0-20210914013432-0cc84d79dec3 h1:hhdWprfSpFbN7lz3W1gM40vOgvSh1WCSMxYD6gGB4Hs=
|
github.com/aalpar/deheap v0.0.0-20210914013432-0cc84d79dec3 h1:hhdWprfSpFbN7lz3W1gM40vOgvSh1WCSMxYD6gGB4Hs=
|
||||||
@@ -344,6 +346,8 @@ github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek
|
|||||||
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
|
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
|
||||||
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
|
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
|
||||||
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
|
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
|
||||||
|
github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg=
|
||||||
|
github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4=
|
||||||
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
|
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
|
||||||
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
||||||
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
||||||
|
|||||||
Reference in New Issue
Block a user