1
0
mirror of https://github.com/gilbertchen/duplicacy synced 2025-12-06 00:03:38 +00:00

Implement zstd compression

Zstd compression can be enabled by providing `-zstd` or `-zstd-level <level>`
to `init`, `add`, or `backup`. With `-zstd` the compression level will be
`default`, and with `-zstd-level` the level can be any of `fastest`, `default`,
`better`, or `best`.
This commit is contained in:
Gilbert Chen
2023-03-26 21:31:51 -04:00
parent 9f276047db
commit 53b0f3f7b6
6 changed files with 138 additions and 5 deletions

View File

@@ -371,6 +371,8 @@ func configRepository(context *cli.Context, init bool) {
} else if existingConfig.CompressionLevel != 100 {
duplicacy.LOG_ERROR("STORAGE_COMPRESSION", "This storage is configured with an invalid compression level %d", existingConfig.CompressionLevel)
return
} else if existingConfig.CompressionLevel != duplicacy.DEFAULT_COMPRESSION_LEVEL {
duplicacy.LOG_INFO("STORAGE_COMPRESSION", "Compression level: %d", existingConfig.CompressionLevel)
}
// Don't print config in the background mode
@@ -378,8 +380,6 @@ func configRepository(context *cli.Context, init bool) {
existingConfig.Print()
}
} else {
compressionLevel := 100
averageChunkSize := duplicacy.AtoSize(context.String("chunk-size"))
if averageChunkSize == 0 {
fmt.Fprintf(context.App.Writer, "Invalid average chunk size: %s.\n\n", context.String("chunk-size"))
@@ -487,6 +487,18 @@ func configRepository(context *cli.Context, init bool) {
}
}
compressionLevel := 100
zstdLevel := context.String("zstd-level")
if zstdLevel != "" {
if level, found := duplicacy.ZSTD_COMPRESSION_LEVELS[zstdLevel]; found {
compressionLevel = level
} else {
duplicacy.LOG_ERROR("STORAGE_COMPRESSION", "Invalid zstd compression level: %s", zstdLevel)
}
} else if context.Bool("zstd") {
compressionLevel = duplicacy.ZSTD_COMPRESSION_LEVEL_DEFAULT
}
duplicacy.ConfigStorage(storage, iterations, compressionLevel, averageChunkSize, maximumChunkSize,
minimumChunkSize, storagePassword, otherConfig, bitCopy, context.String("key"), dataShards, parityShards)
}
@@ -786,6 +798,17 @@ func backupRepository(context *cli.Context) {
backupManager.SetupSnapshotCache(preference.Name)
backupManager.SetDryRun(dryRun)
zstdLevel := context.String("zstd-level")
if zstdLevel != "" {
if level, found := duplicacy.ZSTD_COMPRESSION_LEVELS[zstdLevel]; found {
backupManager.SetCompressionLevel(level)
} else {
duplicacy.LOG_ERROR("STORAGE_COMPRESSION", "Invalid zstd compression level: %s", zstdLevel)
}
} else if context.Bool("zstd") {
backupManager.SetCompressionLevel(duplicacy.ZSTD_COMPRESSION_LEVEL_DEFAULT)
}
metadataChunkSize := context.Int("metadata-chunk-size")
maximumInMemoryEntries := context.Int("max-in-memory-entries")
backupManager.Backup(repository, quickMode, threads, context.String("t"), showStatistics, enableVSS, vssTimeout, enumOnly, metadataChunkSize, maximumInMemoryEntries)
@@ -1428,6 +1451,15 @@ func main() {
Usage: "the minimum size of chunks (defaults to chunk-size/4)",
Argument: "<size>",
},
cli.StringFlag{
Name: "zstd-level",
Usage: "set zstd compression level (fast, default, better, or best)",
Argument: "<level>",
},
cli.BoolFlag{
Name: "zstd",
Usage: "short for -zstd default",
},
cli.IntFlag{
Name: "iterations",
Usage: "the number of iterations used in storage key derivation (default is 16384)",
@@ -1495,6 +1527,15 @@ func main() {
Name: "dry-run",
Usage: "dry run for testing, don't backup anything. Use with -stats and -d",
},
cli.StringFlag{
Name: "zstd-level",
Usage: "set zstd compression level (fast, default, better, or best)",
Argument: "<level>",
},
cli.BoolFlag{
Name: "zstd",
Usage: "short for -zstd default",
},
cli.BoolFlag{
Name: "vss",
Usage: "enable the Volume Shadow Copy service (Windows and macOS using APFS only)",
@@ -1938,6 +1979,15 @@ func main() {
Usage: "the minimum size of chunks (default is chunk-size/4)",
Argument: "<size>",
},
cli.StringFlag{
Name: "zstd-level",
Usage: "set zstd compression level (fast, default, better, or best)",
Argument: "<level>",
},
cli.BoolFlag{
Name: "zstd",
Usage: "short for -zstd default",
},
cli.IntFlag{
Name: "iterations",
Usage: "the number of iterations used in storage key derivation (default is 16384)",

1
go.mod
View File

@@ -41,6 +41,7 @@ require (
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect
github.com/googleapis/gax-go/v2 v2.0.5 // indirect
github.com/jmespath/go-jmespath v0.3.0 // indirect
github.com/klauspost/compress v1.16.3 // indirect
github.com/klauspost/cpuid v1.3.1 // indirect
github.com/kr/fs v0.1.0 // indirect
github.com/kr/text v0.2.0 // indirect

2
go.sum
View File

@@ -143,6 +143,8 @@ github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCV
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.16.3 h1:XuJt9zzcnaz6a16/OU53ZjWp/v7/42WcR5t2a0PcNQY=
github.com/klauspost/compress v1.16.3/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
github.com/klauspost/cpuid v1.2.4/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/cpuid v1.3.1 h1:5JNjFYYQrZeKRJ0734q51WCEEn2huer72Dc7K+R/b6s=
github.com/klauspost/cpuid v1.3.1/go.mod h1:bYW4mA6ZgKPob1/Dlai2LviZJO7KGI3uoWLd42rAQw4=

View File

@@ -47,6 +47,10 @@ func (manager *BackupManager) SetDryRun(dryRun bool) {
manager.config.dryRun = dryRun
}
func (manager *BackupManager) SetCompressionLevel(level int) {
manager.config.CompressionLevel = level
}
// CreateBackupManager creates a backup manager using the specified 'storage'. 'snapshotID' is a unique id to
// identify snapshots created for this repository. 'top' is the top directory of the repository. 'password' is the
// master key which can be nil if encryption is not enabled.
@@ -138,6 +142,8 @@ func (manager *BackupManager) Backup(top string, quickMode bool, threads int, ta
LOG_DEBUG("BACKUP_PARAMETERS", "top: %s, quick: %t, tag: %s", top, quickMode, tag)
manager.config.PrintCompressionLevel()
if manager.config.DataShards != 0 && manager.config.ParityShards != 0 {
LOG_INFO("BACKUP_ERASURECODING", "Erasure coding is enabled with %d data shards and %d parity shards",
manager.config.DataShards, manager.config.ParityShards)

View File

@@ -24,6 +24,7 @@ import (
"github.com/bkaradzic/go-lz4"
"github.com/minio/highwayhash"
"github.com/klauspost/reedsolomon"
"github.com/klauspost/compress/zstd"
// This is a fork of github.com/minio/highwayhash at 1.0.1 that computes incorrect hash on
// arm64 machines. We need this fork to be able to read the chunks created by Duplicacy
@@ -267,6 +268,38 @@ func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isMetada
deflater, _ := zlib.NewWriterLevel(encryptedBuffer, chunk.config.CompressionLevel)
deflater.Write(chunk.buffer.Bytes())
deflater.Close()
} else if chunk.config.CompressionLevel >= ZSTD_COMPRESSION_LEVEL_FASTEST && chunk.config.CompressionLevel <= ZSTD_COMPRESSION_LEVEL_BEST {
encryptedBuffer.Write([]byte("ZSTD"))
compressionLevel := zstd.SpeedDefault
if chunk.config.CompressionLevel == ZSTD_COMPRESSION_LEVEL_FASTEST {
compressionLevel = zstd.SpeedFastest
} else if chunk.config.CompressionLevel == ZSTD_COMPRESSION_LEVEL_BETTER {
compressionLevel = zstd.SpeedBetterCompression
} else if chunk.config.CompressionLevel == ZSTD_COMPRESSION_LEVEL_BEST {
compressionLevel = zstd.SpeedBestCompression
}
deflater, err := zstd.NewWriter(encryptedBuffer, zstd.WithEncoderLevel(compressionLevel))
if err != nil {
return err
}
// Make sure we have enough space in encryptedBuffer
availableLength := encryptedBuffer.Cap() - len(encryptedBuffer.Bytes())
maximumLength := deflater.MaxEncodedSize(chunk.buffer.Len())
if availableLength < maximumLength {
encryptedBuffer.Grow(maximumLength - availableLength)
}
_, err = deflater.Write(chunk.buffer.Bytes())
if err != nil {
return fmt.Errorf("ZSTD compression error: %v", err)
}
err = deflater.Close()
if err != nil {
return fmt.Errorf("ZSTD compression error: %v", err)
}
} else if chunk.config.CompressionLevel == DEFAULT_COMPRESSION_LEVEL {
encryptedBuffer.Write([]byte("LZ4 "))
// Make sure we have enough space in encryptedBuffer
@@ -361,7 +394,6 @@ func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isMetada
chunk.buffer.Write(header)
return nil
}
// This is to ensure compatibility with Vertical Backup, which still uses HMAC-SHA256 (instead of HMAC-BLAKE2) to
@@ -633,6 +665,24 @@ func (chunk *Chunk) Decrypt(encryptionKey []byte, derivationKey string) (err err
chunk.hash = nil
return nil, rewriteNeeded
}
if len(compressed) > 4 && string(compressed[:4]) == "ZSTD" {
chunk.buffer.Reset()
chunk.hasher = chunk.config.NewKeyedHasher(chunk.config.HashKey)
chunk.hash = nil
encryptedBuffer.Read(encryptedBuffer.Bytes()[:4])
inflater, err := zstd.NewReader(encryptedBuffer)
if err != nil {
return err, false
}
defer inflater.Close()
if _, err = io.Copy(chunk, inflater); err != nil {
return err, false
}
return nil, rewriteNeeded
}
inflater, err := zlib.NewReader(encryptedBuffer)
if err != nil {
return err, false

View File

@@ -35,6 +35,19 @@ var DEFAULT_KEY = []byte("duplicacy")
// standard zlib levels of -1 to 9.
var DEFAULT_COMPRESSION_LEVEL = 100
// zstd compression levels starting from 200
var ZSTD_COMPRESSION_LEVEL_FASTEST = 200
var ZSTD_COMPRESSION_LEVEL_DEFAULT = 201
var ZSTD_COMPRESSION_LEVEL_BETTER = 202
var ZSTD_COMPRESSION_LEVEL_BEST = 203
var ZSTD_COMPRESSION_LEVELS = map[string]int {
"fastest": ZSTD_COMPRESSION_LEVEL_FASTEST,
"default": ZSTD_COMPRESSION_LEVEL_DEFAULT,
"better": ZSTD_COMPRESSION_LEVEL_BETTER,
"best": ZSTD_COMPRESSION_LEVEL_BEST,
}
// The new banner of the config file (to differentiate from the old format where the salt and iterations are fixed)
var CONFIG_BANNER = "duplicacy\001"
@@ -202,6 +215,14 @@ func (config *Config) Print() {
}
func (config *Config) PrintCompressionLevel() {
for name, level := range ZSTD_COMPRESSION_LEVELS {
if level == config.CompressionLevel {
LOG_INFO("COMPRESSION_LEVEL", "Zstd compression is enabled (level: %s)", name)
}
}
}
func CreateConfigFromParameters(compressionLevel int, averageChunkSize int, maximumChunkSize int, mininumChunkSize int,
isEncrypted bool, copyFrom *Config, bitCopy bool) (config *Config) {
@@ -294,7 +315,10 @@ func (config *Config) PutChunk(chunk *Chunk) {
}
func (config *Config) NewKeyedHasher(key []byte) hash.Hash {
if config.CompressionLevel == DEFAULT_COMPRESSION_LEVEL {
// Early versions of Duplicacy used SHA256 as the hash function for chunk IDs at the time when
// only zlib compression was supported. Later SHA256 was replaced by Blake2b and LZ4 was used
// for compression (with compression level set to 100).
if config.CompressionLevel >= DEFAULT_COMPRESSION_LEVEL {
hasher, err := blake2.New(&blake2.Config{Size: 32, Key: key})
if err != nil {
LOG_ERROR("HASH_KEY", "Invalid hash key: %x", key)
@@ -339,7 +363,7 @@ func (hasher *DummyHasher) BlockSize() int {
func (config *Config) NewFileHasher() hash.Hash {
if SkipFileHash {
return &DummyHasher{}
} else if config.CompressionLevel == DEFAULT_COMPRESSION_LEVEL {
} else if config.CompressionLevel >= DEFAULT_COMPRESSION_LEVEL {
hasher, _ := blake2.New(&blake2.Config{Size: 32})
return hasher
} else {