1
0
mirror of https://github.com/gilbertchen/duplicacy synced 2025-12-06 00:03:38 +00:00
Files
duplicacy/src/duplicacy_chunkmaker.go
Gilbert Chen d9f6545d63 Rewrite the backup procedure to reduce memory usage
Main changes:

* Change the listing order of files/directories so that the local and remote
  snapshots can be compared on-the-fly.

* Introduce a new struct called EntryList that maintains a list of
  files/directories, which are kept in memory when the number is lower, and
  serialized into a file when there are too many.

* EntryList can also be turned into an on-disk incomplete snapshot quickly,
  to support fast-resume on next run.

* ChunkOperator can now download and upload chunks, thus replacing original
  ChunkDownloader and ChunkUploader.  The new ChunkDownloader is only used
  to prefetch chunks during the restore operation.
2021-10-24 23:34:49 -04:00

307 lines
7.8 KiB
Go

// Copyright (c) Acrosync LLC. All rights reserved.
// Free for personal use and commercial trial
// Commercial use requires per-user licenses available from https://duplicacy.com
package duplicacy
import (
"crypto/sha256"
"encoding/binary"
"encoding/hex"
"io"
)
// ChunkMaker breaks data into chunks using buzhash. To save memory, the chunk maker only use a circular buffer
// whose size is double the minimum chunk size.
type ChunkMaker struct {
maximumChunkSize int
minimumChunkSize int
bufferCapacity int
hashMask uint64
randomTable [256]uint64
buffer []byte
bufferSize int
bufferStart int
minimumReached bool
hashSum uint64
chunk *Chunk
config *Config
hashOnly bool
hashOnlyChunk *Chunk
}
// CreateChunkMaker creates a chunk maker. 'randomSeed' is used to generate the character-to-integer table needed by
// buzhash.
func CreateFileChunkMaker(config *Config, hashOnly bool) *ChunkMaker {
size := 1
for size*2 <= config.AverageChunkSize {
size *= 2
}
if size != config.AverageChunkSize {
LOG_FATAL("CHUNK_SIZE", "Invalid average chunk size: %d is not a power of 2", config.AverageChunkSize)
return nil
}
maker := &ChunkMaker{
hashMask: uint64(config.AverageChunkSize - 1),
maximumChunkSize: config.MaximumChunkSize,
minimumChunkSize: config.MinimumChunkSize,
bufferCapacity: 2 * config.MinimumChunkSize,
config: config,
hashOnly: hashOnly,
}
if hashOnly {
maker.hashOnlyChunk = CreateChunk(config, false)
}
randomData := sha256.Sum256(config.ChunkSeed)
for i := 0; i < 64; i++ {
for j := 0; j < 4; j++ {
maker.randomTable[4*i+j] = binary.LittleEndian.Uint64(randomData[8*j : 8*j+8])
}
randomData = sha256.Sum256(randomData[:])
}
maker.buffer = make([]byte, 2*config.MinimumChunkSize)
maker.bufferStart = 0
maker.bufferSize = 0
maker.startNewChunk()
return maker
}
// CreateMetaDataChunkMaker creates a chunk maker that always uses the variable-sized chunking algorithm
func CreateMetaDataChunkMaker(config *Config, chunkSize int) *ChunkMaker {
size := 1
for size*2 <= chunkSize {
size *= 2
}
if size != chunkSize {
LOG_FATAL("CHUNK_SIZE", "Invalid metadata chunk size: %d is not a power of 2", chunkSize)
return nil
}
maker := CreateFileChunkMaker(config, false)
maker.hashMask = uint64(chunkSize - 1)
maker.maximumChunkSize = chunkSize * 4
maker.minimumChunkSize = chunkSize / 4
maker.bufferCapacity = 2 * maker.minimumChunkSize
maker.buffer = make([]byte, maker.bufferCapacity)
return maker
}
func rotateLeft(value uint64, bits uint) uint64 {
return (value << (bits & 0x3f)) | (value >> (64 - (bits & 0x3f)))
}
func rotateLeftByOne(value uint64) uint64 {
return (value << 1) | (value >> 63)
}
func (maker *ChunkMaker) buzhashSum(sum uint64, data []byte) uint64 {
for i := 0; i < len(data); i++ {
sum = rotateLeftByOne(sum) ^ maker.randomTable[data[i]]
}
return sum
}
func (maker *ChunkMaker) buzhashUpdate(sum uint64, out byte, in byte, length int) uint64 {
return rotateLeftByOne(sum) ^ rotateLeft(maker.randomTable[out], uint(length)) ^ maker.randomTable[in]
}
func (maker *ChunkMaker) startNewChunk() (chunk *Chunk) {
maker.hashSum = 0
maker.minimumReached = false
if maker.hashOnly {
maker.chunk = maker.hashOnlyChunk
maker.chunk.Reset(true)
} else {
maker.chunk = maker.config.GetChunk()
maker.chunk.Reset(true)
}
return
}
func (maker *ChunkMaker) AddData(reader io.Reader, sendChunk func(*Chunk)) (int64, string) {
isEOF := false
fileSize := int64(0)
fileHasher := maker.config.NewFileHasher()
// Move data from the buffer to the chunk.
fill := func(count int) {
if maker.bufferStart+count < maker.bufferCapacity {
maker.chunk.Write(maker.buffer[maker.bufferStart : maker.bufferStart+count])
maker.bufferStart += count
maker.bufferSize -= count
} else {
maker.chunk.Write(maker.buffer[maker.bufferStart:])
maker.chunk.Write(maker.buffer[:count-(maker.bufferCapacity-maker.bufferStart)])
maker.bufferStart = count - (maker.bufferCapacity - maker.bufferStart)
maker.bufferSize -= count
}
}
var err error
if maker.minimumChunkSize == maker.maximumChunkSize {
if reader == nil {
return 0, ""
}
for {
maker.startNewChunk()
maker.bufferStart = 0
for maker.bufferStart < maker.minimumChunkSize && !isEOF {
count, err := reader.Read(maker.buffer[maker.bufferStart:maker.minimumChunkSize])
if err != nil {
if err != io.EOF {
LOG_ERROR("CHUNK_MAKER", "Failed to read %d bytes: %s", count, err.Error())
return 0, ""
} else {
isEOF = true
}
}
maker.bufferStart += count
}
if maker.bufferStart > 0 {
fileHasher.Write(maker.buffer[:maker.bufferStart])
fileSize += int64(maker.bufferStart)
maker.chunk.Write(maker.buffer[:maker.bufferStart])
sendChunk(maker.chunk)
}
if isEOF {
return fileSize, hex.EncodeToString(fileHasher.Sum(nil))
}
}
}
for {
// If the buffer still has some space left and EOF is not seen, read more data.
for maker.bufferSize < maker.bufferCapacity && !isEOF && reader != nil {
start := maker.bufferStart + maker.bufferSize
count := maker.bufferCapacity - start
if start >= maker.bufferCapacity {
start -= maker.bufferCapacity
count = maker.bufferStart - start
}
count, err = reader.Read(maker.buffer[start : start+count])
if err != nil && err != io.EOF {
LOG_ERROR("CHUNK_MAKER", "Failed to read %d bytes: %s", count, err.Error())
return 0, ""
}
maker.bufferSize += count
fileHasher.Write(maker.buffer[start : start+count])
fileSize += int64(count)
// if EOF is seen, try to switch to next file and continue
if err == io.EOF {
isEOF = true
break
}
}
// No eough data to meet the minimum chunk size requirement, so just return as a chunk.
if maker.bufferSize < maker.minimumChunkSize {
if reader == nil {
fill(maker.bufferSize)
if maker.chunk.GetLength() > 0 {
sendChunk(maker.chunk)
}
return 0, ""
} else if isEOF {
return fileSize, hex.EncodeToString(fileHasher.Sum(nil))
} else {
continue
}
}
// Minimum chunk size has been reached. Calculate the buzhash for the minimum size chunk.
if !maker.minimumReached {
bytes := maker.minimumChunkSize
if maker.bufferStart+bytes < maker.bufferCapacity {
maker.hashSum = maker.buzhashSum(0, maker.buffer[maker.bufferStart:maker.bufferStart+bytes])
} else {
maker.hashSum = maker.buzhashSum(0, maker.buffer[maker.bufferStart:])
maker.hashSum = maker.buzhashSum(maker.hashSum,
maker.buffer[:bytes-(maker.bufferCapacity-maker.bufferStart)])
}
if (maker.hashSum & maker.hashMask) == 0 {
// This is a minimum size chunk
fill(bytes)
sendChunk(maker.chunk)
maker.startNewChunk()
continue
}
maker.minimumReached = true
}
// Now check the buzhash of the data in the buffer, shifting one byte at a time.
bytes := maker.bufferSize - maker.minimumChunkSize
isEOC := false // chunk boundary found
maxSize := maker.maximumChunkSize - maker.chunk.GetLength()
for i := 0; i < bytes; i++ {
out := maker.bufferStart + i
if out >= maker.bufferCapacity {
out -= maker.bufferCapacity
}
in := maker.bufferStart + i + maker.minimumChunkSize
if in >= maker.bufferCapacity {
in -= maker.bufferCapacity
}
maker.hashSum = maker.buzhashUpdate(maker.hashSum, maker.buffer[out], maker.buffer[in], maker.minimumChunkSize)
if (maker.hashSum&maker.hashMask) == 0 || i == maxSize-maker.minimumChunkSize-1 {
// A chunk is completed.
bytes = i + 1 + maker.minimumChunkSize
isEOC = true
break
}
}
fill(bytes)
if isEOC {
sendChunk(maker.chunk)
maker.startNewChunk()
} else {
if reader == nil {
fill(maker.minimumChunkSize)
sendChunk(maker.chunk)
maker.startNewChunk()
return 0, ""
}
}
if isEOF {
return fileSize, hex.EncodeToString(fileHasher.Sum(nil))
}
}
}