1
0
mirror of https://github.com/gilbertchen/duplicacy synced 2025-12-06 00:03:38 +00:00

Merge pull request #625 from gilbertchen/memory_optimization

Rewrite the backup procedure to reduce memory usage
This commit is contained in:
gilbertchen
2022-04-07 23:26:46 -04:00
committed by GitHub
20 changed files with 2744 additions and 1746 deletions

View File

@@ -785,7 +785,10 @@ func backupRepository(context *cli.Context) {
backupManager.SetupSnapshotCache(preference.Name)
backupManager.SetDryRun(dryRun)
backupManager.Backup(repository, quickMode, threads, context.String("t"), showStatistics, enableVSS, vssTimeout, enumOnly)
metadataChunkSize := context.Int("metadata-chunk-size")
maximumInMemoryEntries := context.Int("max-in-memory-entries")
backupManager.Backup(repository, quickMode, threads, context.String("t"), showStatistics, enableVSS, vssTimeout, enumOnly, metadataChunkSize, maximumInMemoryEntries)
runScript(context, preference.Name, "post")
}
@@ -1510,6 +1513,19 @@ func main() {
Name: "enum-only",
Usage: "enumerate the repository recursively and then exit",
},
cli.IntFlag{
Name: "metadata-chunk-size",
Value: 1024 * 1024,
Usage: "the average size of metadata chunks (defaults to 1M)",
Argument: "<size>",
},
cli.IntFlag{
Name: "max-in-memory-entries",
Value: 1024 * 1024,
Usage: "the maximum number of entries kept in memory (defaults to 1M)",
Argument: "<number>",
},
},
Usage: "Save a snapshot of the repository to the storage",
ArgsUsage: " ",

File diff suppressed because it is too large Load Diff

View File

@@ -257,7 +257,7 @@ func TestBackupManager(t *testing.T) {
backupManager.SetupSnapshotCache("default")
SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy")
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false)
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false, 1024, 1024)
time.Sleep(time.Duration(delay) * time.Second)
SetDuplicacyPreferencePath(testDir + "/repository2/.duplicacy")
failedFiles := backupManager.Restore(testDir+"/repository2", threads /*inPlace=*/, false /*quickMode=*/, false, threads /*overwrite=*/, true,
@@ -282,7 +282,7 @@ func TestBackupManager(t *testing.T) {
modifyFile(testDir+"/repository1/dir1/file3", 0.3)
SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy")
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "second", false, false, 0, false)
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "second", false, false, 0, false, 1024, 1024)
time.Sleep(time.Duration(delay) * time.Second)
SetDuplicacyPreferencePath(testDir + "/repository2/.duplicacy")
failedFiles = backupManager.Restore(testDir+"/repository2", 2 /*inPlace=*/, true /*quickMode=*/, true, threads /*overwrite=*/, true,
@@ -303,7 +303,7 @@ func TestBackupManager(t *testing.T) {
os.Mkdir(testDir+"/repository1/dir2/dir3", 0700)
os.Mkdir(testDir+"/repository1/dir4", 0700)
SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy")
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, false, threads, "third", false, false, 0, false)
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, false, threads, "third", false, false, 0, false, 1024, 1024)
time.Sleep(time.Duration(delay) * time.Second)
// Create some directories and files under repository2 that will be deleted during restore
@@ -368,7 +368,7 @@ func TestBackupManager(t *testing.T) {
}
backupManager.SnapshotManager.CheckSnapshots( /*snapshotID*/ "host1" /*revisions*/, []int{2, 3} /*tag*/, "",
/*showStatistics*/ false /*showTabular*/, false /*checkFiles*/, false /*checkChunks*/, false /*searchFossils*/, false /*resurrect*/, false, 1 /*allowFailures*/, false)
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, false, threads, "fourth", false, false, 0, false)
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, false, threads, "fourth", false, false, 0, false, 1024, 1024)
backupManager.SnapshotManager.PruneSnapshots("host1", "host1" /*revisions*/, nil /*tags*/, nil /*retentions*/, nil,
/*exhaustive*/ false /*exclusive=*/, true /*ignoredIDs*/, nil /*dryRun*/, false /*deleteOnly*/, false /*collectOnly*/, false, 1)
numberOfSnapshots = backupManager.SnapshotManager.ListSnapshots( /*snapshotID*/ "host1" /*revisionsToList*/, nil /*tag*/, "" /*showFiles*/, false /*showChunks*/, false)
@@ -533,7 +533,7 @@ func TestPersistRestore(t *testing.T) {
unencBackupManager.SetupSnapshotCache("default")
SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy")
unencBackupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false)
unencBackupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false, 1024, 1024)
time.Sleep(time.Duration(delay) * time.Second)
@@ -543,7 +543,7 @@ func TestPersistRestore(t *testing.T) {
encBackupManager.SetupSnapshotCache("default")
SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy")
encBackupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false)
encBackupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false, 1024, 1024)
time.Sleep(time.Duration(delay) * time.Second)

View File

@@ -29,29 +29,29 @@ func benchmarkSplit(reader *bytes.Reader, fileSize int64, chunkSize int, compres
config.HashKey = DEFAULT_KEY
config.IDKey = DEFAULT_KEY
maker := CreateChunkMaker(config, false)
maker := CreateFileChunkMaker(config, false)
startTime := float64(time.Now().UnixNano()) / 1e9
numberOfChunks := 0
reader.Seek(0, os.SEEK_SET)
maker.ForEachChunk(reader,
func(chunk *Chunk, final bool) {
if compression {
key := ""
if encryption {
key = "0123456789abcdef0123456789abcdef"
}
err := chunk.Encrypt([]byte(key), "", false)
if err != nil {
LOG_ERROR("BENCHMARK_ENCRYPT", "Failed to encrypt the chunk: %v", err)
}
chunkFunc := func(chunk *Chunk) {
if compression {
key := ""
if encryption {
key = "0123456789abcdef0123456789abcdef"
}
config.PutChunk(chunk)
numberOfChunks++
},
func(size int64, hash string) (io.Reader, bool) {
return nil, false
})
err := chunk.Encrypt([]byte(key), "", false)
if err != nil {
LOG_ERROR("BENCHMARK_ENCRYPT", "Failed to encrypt the chunk: %v", err)
}
}
config.PutChunk(chunk)
numberOfChunks++
}
maker.AddData(reader, chunkFunc)
maker.AddData(nil, chunkFunc)
runningTime := float64(time.Now().UnixNano())/1e9 - startTime
speed := int64(float64(fileSize) / runningTime)

View File

@@ -65,8 +65,8 @@ type Chunk struct {
config *Config // Every chunk is associated with a Config object. Which hashing algorithm to use is determined
// by the config
isSnapshot bool // Indicates if the chunk is a snapshot chunk (instead of a file chunk). This is only used by RSA
// encryption, where a snapshot chunk is not encrypted by RSA
isMetadata bool // Indicates if the chunk is a metadata chunk (instead of a file chunk). This is primarily used by RSA
// encryption, where a metadata chunk is not encrypted by RSA
isBroken bool // Indicates the chunk did not download correctly. This is only used for -persist (allowFailures) mode
}
@@ -127,7 +127,7 @@ func (chunk *Chunk) Reset(hashNeeded bool) {
chunk.hash = nil
chunk.id = ""
chunk.size = 0
chunk.isSnapshot = false
chunk.isMetadata = false
chunk.isBroken = false
}
@@ -186,7 +186,7 @@ func (chunk *Chunk) VerifyID() {
// Encrypt encrypts the plain data stored in the chunk buffer. If derivationKey is not nil, the actual
// encryption key will be HMAC-SHA256(encryptionKey, derivationKey).
func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isSnapshot bool) (err error) {
func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isMetadata bool) (err error) {
var aesBlock cipher.Block
var gcm cipher.AEAD
@@ -203,8 +203,8 @@ func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isSnapsh
key := encryptionKey
usingRSA := false
// Enable RSA encryption only when the chunk is not a snapshot chunk
if chunk.config.rsaPublicKey != nil && !isSnapshot && !chunk.isSnapshot {
// Enable RSA encryption only when the chunk is not a metadata chunk
if chunk.config.rsaPublicKey != nil && !isMetadata && !chunk.isMetadata {
randomKey := make([]byte, 32)
_, err := rand.Read(randomKey)
if err != nil {

View File

@@ -5,7 +5,6 @@
package duplicacy
import (
"io"
"sync/atomic"
"time"
)
@@ -20,78 +19,47 @@ type ChunkDownloadTask struct {
isDownloading bool // 'true' means the chunk has been downloaded or is being downloaded
}
// ChunkDownloadCompletion represents the nofication when a chunk has been downloaded.
type ChunkDownloadCompletion struct {
chunkIndex int // The index of this chunk in the chunk list
chunk *Chunk // The chunk that has been downloaded
chunk *Chunk
chunkIndex int
}
// ChunkDownloader is capable of performing multi-threaded downloading. Chunks to be downloaded are first organized
// ChunkDownloader is a wrapper of ChunkOperator and is only used by the restore procedure.capable of performing multi-threaded downloading. Chunks to be downloaded are first organized
// as a list of ChunkDownloadTasks, with only the chunkHash field initialized. When a chunk is needed, the
// corresponding ChunkDownloadTask is sent to the dowloading goroutine. Once a chunk is downloaded, it will be
// inserted in the completed task list.
type ChunkDownloader struct {
operator *ChunkOperator
totalChunkSize int64 // Total chunk size
downloadedChunkSize int64 // Downloaded chunk size
config *Config // Associated config
storage Storage // Download from this storage
snapshotCache *FileStorage // Used as cache if not nil; usually for downloading snapshot chunks
showStatistics bool // Show a stats log for each chunk if true
threads int // Number of threads
allowFailures bool // Whether to failfast on download error, or continue
taskList []ChunkDownloadTask // The list of chunks to be downloaded
completedTasks map[int]bool // Store downloaded chunks
lastChunkIndex int // a monotonically increasing number indicating the last chunk to be downloaded
taskQueue chan ChunkDownloadTask // Downloading goroutines are waiting on this channel for input
stopChannel chan bool // Used to stop the dowloading goroutines
completionChannel chan ChunkDownloadCompletion // A downloading goroutine sends back the chunk via this channel after downloading
startTime int64 // The time it starts downloading
numberOfDownloadedChunks int // The number of chunks that have been downloaded
numberOfDownloadingChunks int // The number of chunks still being downloaded
numberOfActiveChunks int // The number of chunks that is being downloaded or has been downloaded but not reclaimed
NumberOfFailedChunks int // The number of chunks that can't be downloaded
}
func CreateChunkDownloader(config *Config, storage Storage, snapshotCache *FileStorage, showStatistics bool, threads int, allowFailures bool) *ChunkDownloader {
func CreateChunkDownloader(operator *ChunkOperator) *ChunkDownloader {
downloader := &ChunkDownloader{
config: config,
storage: storage,
snapshotCache: snapshotCache,
showStatistics: showStatistics,
threads: threads,
allowFailures: allowFailures,
operator: operator,
taskList: nil,
completedTasks: make(map[int]bool),
lastChunkIndex: 0,
taskQueue: make(chan ChunkDownloadTask, threads),
stopChannel: make(chan bool),
completionChannel: make(chan ChunkDownloadCompletion),
startTime: time.Now().Unix(),
}
// Start the downloading goroutines
for i := 0; i < downloader.threads; i++ {
go func(threadIndex int) {
defer CatchLogException()
for {
select {
case task := <-downloader.taskQueue:
downloader.Download(threadIndex, task)
case <-downloader.stopChannel:
return
}
}
}(i)
}
return downloader
}
@@ -127,26 +95,7 @@ func (downloader *ChunkDownloader) AddFiles(snapshot *Snapshot, files []*Entry)
maximumChunks = file.EndChunk - file.StartChunk
}
}
}
// AddChunk adds a single chunk the download list.
func (downloader *ChunkDownloader) AddChunk(chunkHash string) int {
task := ChunkDownloadTask{
chunkIndex: len(downloader.taskList),
chunkHash: chunkHash,
chunkLength: 0,
needed: true,
isDownloading: false,
}
downloader.taskList = append(downloader.taskList, task)
if downloader.numberOfActiveChunks < downloader.threads {
downloader.taskQueue <- task
downloader.numberOfDownloadingChunks++
downloader.numberOfActiveChunks++
downloader.taskList[len(downloader.taskList)-1].isDownloading = true
}
return len(downloader.taskList) - 1
downloader.operator.totalChunkSize = downloader.totalChunkSize
}
// Prefetch adds up to 'threads' chunks needed by a file to the download list
@@ -159,20 +108,22 @@ func (downloader *ChunkDownloader) Prefetch(file *Entry) {
task := &downloader.taskList[i]
if task.needed {
if !task.isDownloading {
if downloader.numberOfActiveChunks >= downloader.threads {
if downloader.numberOfActiveChunks >= downloader.operator.threads {
return
}
LOG_DEBUG("DOWNLOAD_PREFETCH", "Prefetching %s chunk %s", file.Path,
downloader.config.GetChunkIDFromHash(task.chunkHash))
downloader.taskQueue <- *task
downloader.operator.config.GetChunkIDFromHash(task.chunkHash))
downloader.operator.DownloadAsync(task.chunkHash, i, false, func (chunk *Chunk, chunkIndex int) {
downloader.completionChannel <- ChunkDownloadCompletion { chunk: chunk, chunkIndex: chunkIndex }
})
task.isDownloading = true
downloader.numberOfDownloadingChunks++
downloader.numberOfActiveChunks++
}
} else {
LOG_DEBUG("DOWNLOAD_PREFETCH", "%s chunk %s is not needed", file.Path,
downloader.config.GetChunkIDFromHash(task.chunkHash))
downloader.operator.config.GetChunkIDFromHash(task.chunkHash))
}
}
}
@@ -186,7 +137,7 @@ func (downloader *ChunkDownloader) Reclaim(chunkIndex int) {
for i := range downloader.completedTasks {
if i < chunkIndex && downloader.taskList[i].chunk != nil {
downloader.config.PutChunk(downloader.taskList[i].chunk)
downloader.operator.config.PutChunk(downloader.taskList[i].chunk)
downloader.taskList[i].chunk = nil
delete(downloader.completedTasks, i)
downloader.numberOfActiveChunks--
@@ -222,8 +173,10 @@ func (downloader *ChunkDownloader) WaitForChunk(chunkIndex int) (chunk *Chunk) {
// If we haven't started download the specified chunk, download it now
if !downloader.taskList[chunkIndex].isDownloading {
LOG_DEBUG("DOWNLOAD_FETCH", "Fetching chunk %s",
downloader.config.GetChunkIDFromHash(downloader.taskList[chunkIndex].chunkHash))
downloader.taskQueue <- downloader.taskList[chunkIndex]
downloader.operator.config.GetChunkIDFromHash(downloader.taskList[chunkIndex].chunkHash))
downloader.operator.DownloadAsync(downloader.taskList[chunkIndex].chunkHash, chunkIndex, false, func (chunk *Chunk, chunkIndex int) {
downloader.completionChannel <- ChunkDownloadCompletion { chunk: chunk, chunkIndex: chunkIndex }
})
downloader.taskList[chunkIndex].isDownloading = true
downloader.numberOfDownloadingChunks++
downloader.numberOfActiveChunks++
@@ -231,7 +184,7 @@ func (downloader *ChunkDownloader) WaitForChunk(chunkIndex int) (chunk *Chunk) {
// We also need to look ahead and prefetch other chunks as many as permitted by the number of threads
for i := chunkIndex + 1; i < len(downloader.taskList); i++ {
if downloader.numberOfActiveChunks >= downloader.threads {
if downloader.numberOfActiveChunks >= downloader.operator.threads {
break
}
task := &downloader.taskList[i]
@@ -240,8 +193,10 @@ func (downloader *ChunkDownloader) WaitForChunk(chunkIndex int) (chunk *Chunk) {
}
if !task.isDownloading {
LOG_DEBUG("DOWNLOAD_PREFETCH", "Prefetching chunk %s", downloader.config.GetChunkIDFromHash(task.chunkHash))
downloader.taskQueue <- *task
LOG_DEBUG("DOWNLOAD_PREFETCH", "Prefetching chunk %s", downloader.operator.config.GetChunkIDFromHash(task.chunkHash))
downloader.operator.DownloadAsync(task.chunkHash, task.chunkIndex, false, func (chunk *Chunk, chunkIndex int) {
downloader.completionChannel <- ChunkDownloadCompletion { chunk: chunk, chunkIndex: chunkIndex }
})
task.isDownloading = true
downloader.numberOfDownloadingChunks++
downloader.numberOfActiveChunks++
@@ -255,9 +210,6 @@ func (downloader *ChunkDownloader) WaitForChunk(chunkIndex int) (chunk *Chunk) {
downloader.taskList[completion.chunkIndex].chunk = completion.chunk
downloader.numberOfDownloadedChunks++
downloader.numberOfDownloadingChunks--
if completion.chunk.isBroken {
downloader.NumberOfFailedChunks++
}
}
return downloader.taskList[chunkIndex].chunk
}
@@ -281,13 +233,10 @@ func (downloader *ChunkDownloader) WaitForCompletion() {
// Wait for a completion event first
if downloader.numberOfActiveChunks > 0 {
completion := <-downloader.completionChannel
downloader.config.PutChunk(completion.chunk)
downloader.operator.config.PutChunk(completion.chunk)
downloader.numberOfActiveChunks--
downloader.numberOfDownloadedChunks++
downloader.numberOfDownloadingChunks--
if completion.chunk.isBroken {
downloader.NumberOfFailedChunks++
}
}
// Pass the tasks one by one to the download queue
@@ -297,215 +246,13 @@ func (downloader *ChunkDownloader) WaitForCompletion() {
downloader.lastChunkIndex++
continue
}
downloader.taskQueue <- *task
downloader.operator.DownloadAsync(task.chunkHash, task.chunkIndex, false, func (chunk *Chunk, chunkIndex int) {
downloader.completionChannel <- ChunkDownloadCompletion { chunk: chunk, chunkIndex: chunkIndex }
})
task.isDownloading = true
downloader.numberOfDownloadingChunks++
downloader.numberOfActiveChunks++
downloader.lastChunkIndex++
}
}
}
// Stop terminates all downloading goroutines
func (downloader *ChunkDownloader) Stop() {
for downloader.numberOfDownloadingChunks > 0 {
completion := <-downloader.completionChannel
downloader.completedTasks[completion.chunkIndex] = true
downloader.taskList[completion.chunkIndex].chunk = completion.chunk
downloader.numberOfDownloadedChunks++
downloader.numberOfDownloadingChunks--
if completion.chunk.isBroken {
downloader.NumberOfFailedChunks++
}
}
for i := range downloader.completedTasks {
downloader.config.PutChunk(downloader.taskList[i].chunk)
downloader.taskList[i].chunk = nil
downloader.numberOfActiveChunks--
}
for i := 0; i < downloader.threads; i++ {
downloader.stopChannel <- true
}
}
// Download downloads a chunk from the storage.
func (downloader *ChunkDownloader) Download(threadIndex int, task ChunkDownloadTask) bool {
cachedPath := ""
chunk := downloader.config.GetChunk()
chunkID := downloader.config.GetChunkIDFromHash(task.chunkHash)
if downloader.snapshotCache != nil && downloader.storage.IsCacheNeeded() {
var exist bool
var err error
// Reset the chunk with a hasher -- we're reading from the cache where chunk are not encrypted or compressed
chunk.Reset(true)
cachedPath, exist, _, err = downloader.snapshotCache.FindChunk(threadIndex, chunkID, false)
if err != nil {
LOG_WARN("DOWNLOAD_CACHE", "Failed to find the cache path for the chunk %s: %v", chunkID, err)
} else if exist {
err = downloader.snapshotCache.DownloadFile(0, cachedPath, chunk)
if err != nil {
LOG_WARN("DOWNLOAD_CACHE", "Failed to load the chunk %s from the snapshot cache: %v", chunkID, err)
} else {
actualChunkID := chunk.GetID()
if actualChunkID != chunkID {
LOG_WARN("DOWNLOAD_CACHE_CORRUPTED",
"The chunk %s load from the snapshot cache has a hash id of %s", chunkID, actualChunkID)
} else {
LOG_DEBUG("CHUNK_CACHE", "Chunk %s has been loaded from the snapshot cache", chunkID)
downloader.completionChannel <- ChunkDownloadCompletion{chunk: chunk, chunkIndex: task.chunkIndex}
return false
}
}
}
}
// Reset the chunk without a hasher -- the downloaded content will be encrypted and/or compressed and the hasher
// will be set up before the encryption
chunk.Reset(false)
// If failures are allowed, complete the task properly
completeFailedChunk := func(chunk *Chunk) {
if downloader.allowFailures {
chunk.isBroken = true
downloader.completionChannel <- ChunkDownloadCompletion{chunk: chunk, chunkIndex: task.chunkIndex}
}
}
const MaxDownloadAttempts = 3
for downloadAttempt := 0; ; downloadAttempt++ {
// Find the chunk by ID first.
chunkPath, exist, _, err := downloader.storage.FindChunk(threadIndex, chunkID, false)
if err != nil {
completeFailedChunk(chunk)
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CHUNK", "Failed to find the chunk %s: %v", chunkID, err)
return false
}
if !exist {
// No chunk is found. Have to find it in the fossil pool again.
fossilPath, exist, _, err := downloader.storage.FindChunk(threadIndex, chunkID, true)
if err != nil {
completeFailedChunk(chunk)
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CHUNK", "Failed to find the chunk %s: %v", chunkID, err)
return false
}
if !exist {
retry := false
// Retry for Hubic or WebDAV as it may return 404 even when the chunk exists
if _, ok := downloader.storage.(*HubicStorage); ok {
retry = true
}
if _, ok := downloader.storage.(*WebDAVStorage); ok {
retry = true
}
if retry && downloadAttempt < MaxDownloadAttempts {
LOG_WARN("DOWNLOAD_RETRY", "Failed to find the chunk %s; retrying", chunkID)
continue
}
completeFailedChunk(chunk)
// A chunk is not found. This is a serious error and hopefully it will never happen.
if err != nil {
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CHUNK", "Chunk %s can't be found: %v", chunkID, err)
} else {
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CHUNK", "Chunk %s can't be found", chunkID)
}
return false
}
// Don't try to resurrect the fossil as we did before. This is to avoid the potential read-after-rename
// consistency issue. Instead, download the fossil directly; resurrection should be taken care of later.
chunkPath = fossilPath
LOG_WARN("DOWNLOAD_FOSSIL", "Chunk %s is a fossil", chunkID)
}
err = downloader.storage.DownloadFile(threadIndex, chunkPath, chunk)
if err != nil {
_, isHubic := downloader.storage.(*HubicStorage)
// Retry on EOF or if it is a Hubic backend as it may return 404 even when the chunk exists
if (err == io.ErrUnexpectedEOF || isHubic) && downloadAttempt < MaxDownloadAttempts {
LOG_WARN("DOWNLOAD_RETRY", "Failed to download the chunk %s: %v; retrying", chunkID, err)
chunk.Reset(false)
continue
} else {
completeFailedChunk(chunk)
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CHUNK", "Failed to download the chunk %s: %v", chunkID, err)
return false
}
}
err = chunk.Decrypt(downloader.config.ChunkKey, task.chunkHash)
if err != nil {
if downloadAttempt < MaxDownloadAttempts {
LOG_WARN("DOWNLOAD_RETRY", "Failed to decrypt the chunk %s: %v; retrying", chunkID, err)
chunk.Reset(false)
continue
} else {
completeFailedChunk(chunk)
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_DECRYPT", "Failed to decrypt the chunk %s: %v", chunkID, err)
return false
}
}
actualChunkID := chunk.GetID()
if actualChunkID != chunkID {
if downloadAttempt < MaxDownloadAttempts {
LOG_WARN("DOWNLOAD_RETRY", "The chunk %s has a hash id of %s; retrying", chunkID, actualChunkID)
chunk.Reset(false)
continue
} else {
completeFailedChunk(chunk)
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CORRUPTED", "The chunk %s has a hash id of %s", chunkID, actualChunkID)
return false
}
}
break
}
if len(cachedPath) > 0 {
// Save a copy to the local snapshot cache
err := downloader.snapshotCache.UploadFile(threadIndex, cachedPath, chunk.GetBytes())
if err != nil {
LOG_WARN("DOWNLOAD_CACHE", "Failed to add the chunk %s to the snapshot cache: %v", chunkID, err)
}
}
downloadedChunkSize := atomic.AddInt64(&downloader.downloadedChunkSize, int64(chunk.GetLength()))
if (downloader.showStatistics || IsTracing()) && downloader.totalChunkSize > 0 {
now := time.Now().Unix()
if now <= downloader.startTime {
now = downloader.startTime + 1
}
speed := downloadedChunkSize / (now - downloader.startTime)
remainingTime := int64(0)
if speed > 0 {
remainingTime = (downloader.totalChunkSize-downloadedChunkSize)/speed + 1
}
percentage := float32(downloadedChunkSize * 1000 / downloader.totalChunkSize)
LOG_INFO("DOWNLOAD_PROGRESS", "Downloaded chunk %d size %d, %sB/s %s %.1f%%",
task.chunkIndex+1, chunk.GetLength(),
PrettySize(speed), PrettyTime(remainingTime), percentage/10)
} else {
LOG_DEBUG("CHUNK_DOWNLOAD", "Chunk %s has been downloaded", chunkID)
}
downloader.completionChannel <- ChunkDownloadCompletion{chunk: chunk, chunkIndex: task.chunkIndex}
return true
}
}

View File

@@ -25,15 +25,20 @@ type ChunkMaker struct {
bufferSize int
bufferStart int
minimumReached bool
hashSum uint64
chunk *Chunk
config *Config
hashOnly bool
hashOnlyChunk *Chunk
}
// CreateChunkMaker creates a chunk maker. 'randomSeed' is used to generate the character-to-integer table needed by
// buzhash.
func CreateChunkMaker(config *Config, hashOnly bool) *ChunkMaker {
func CreateFileChunkMaker(config *Config, hashOnly bool) *ChunkMaker {
size := 1
for size*2 <= config.AverageChunkSize {
size *= 2
@@ -67,6 +72,33 @@ func CreateChunkMaker(config *Config, hashOnly bool) *ChunkMaker {
}
maker.buffer = make([]byte, 2*config.MinimumChunkSize)
maker.bufferStart = 0
maker.bufferSize = 0
maker.startNewChunk()
return maker
}
// CreateMetaDataChunkMaker creates a chunk maker that always uses the variable-sized chunking algorithm
func CreateMetaDataChunkMaker(config *Config, chunkSize int) *ChunkMaker {
size := 1
for size*2 <= chunkSize {
size *= 2
}
if size != chunkSize {
LOG_FATAL("CHUNK_SIZE", "Invalid metadata chunk size: %d is not a power of 2", chunkSize)
return nil
}
maker := CreateFileChunkMaker(config, false)
maker.hashMask = uint64(chunkSize - 1)
maker.maximumChunkSize = chunkSize * 4
maker.minimumChunkSize = chunkSize / 4
maker.bufferCapacity = 2 * maker.minimumChunkSize
maker.buffer = make([]byte, maker.bufferCapacity)
return maker
}
@@ -90,62 +122,50 @@ func (maker *ChunkMaker) buzhashUpdate(sum uint64, out byte, in byte, length int
return rotateLeftByOne(sum) ^ rotateLeft(maker.randomTable[out], uint(length)) ^ maker.randomTable[in]
}
// ForEachChunk reads data from 'reader'. If EOF is encountered, it will call 'nextReader' to ask for next file. If
// 'nextReader' returns false, it will process remaining data in the buffer and then quit. When a chunk is identified,
// it will call 'endOfChunk' to return the chunk size and a boolean flag indicating if it is the last chunk.
func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *Chunk, final bool),
nextReader func(size int64, hash string) (io.Reader, bool)) {
func (maker *ChunkMaker) startNewChunk() (chunk *Chunk) {
maker.hashSum = 0
maker.minimumReached = false
if maker.hashOnly {
maker.chunk = maker.hashOnlyChunk
maker.chunk.Reset(true)
} else {
maker.chunk = maker.config.GetChunk()
maker.chunk.Reset(true)
}
return
}
maker.bufferStart = 0
maker.bufferSize = 0
var minimumReached bool
var hashSum uint64
var chunk *Chunk
func (maker *ChunkMaker) AddData(reader io.Reader, sendChunk func(*Chunk)) (int64, string) {
isEOF := false
fileSize := int64(0)
fileHasher := maker.config.NewFileHasher()
// Start a new chunk.
startNewChunk := func() {
hashSum = 0
minimumReached = false
if maker.hashOnly {
chunk = maker.hashOnlyChunk
chunk.Reset(true)
} else {
chunk = maker.config.GetChunk()
chunk.Reset(true)
}
}
// Move data from the buffer to the chunk.
fill := func(count int) {
if maker.bufferStart+count < maker.bufferCapacity {
chunk.Write(maker.buffer[maker.bufferStart : maker.bufferStart+count])
maker.chunk.Write(maker.buffer[maker.bufferStart : maker.bufferStart+count])
maker.bufferStart += count
maker.bufferSize -= count
} else {
chunk.Write(maker.buffer[maker.bufferStart:])
chunk.Write(maker.buffer[:count-(maker.bufferCapacity-maker.bufferStart)])
maker.chunk.Write(maker.buffer[maker.bufferStart:])
maker.chunk.Write(maker.buffer[:count-(maker.bufferCapacity-maker.bufferStart)])
maker.bufferStart = count - (maker.bufferCapacity - maker.bufferStart)
maker.bufferSize -= count
}
}
startNewChunk()
var err error
isEOF := false
if maker.minimumChunkSize == maker.maximumChunkSize {
if maker.bufferCapacity < maker.minimumChunkSize {
maker.buffer = make([]byte, maker.minimumChunkSize)
if reader == nil {
return 0, ""
}
for {
maker.startNewChunk()
maker.bufferStart = 0
for maker.bufferStart < maker.minimumChunkSize && !isEOF {
count, err := reader.Read(maker.buffer[maker.bufferStart:maker.minimumChunkSize])
@@ -153,7 +173,7 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C
if err != nil {
if err != io.EOF {
LOG_ERROR("CHUNK_MAKER", "Failed to read %d bytes: %s", count, err.Error())
return
return 0, ""
} else {
isEOF = true
}
@@ -161,26 +181,15 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C
maker.bufferStart += count
}
fileHasher.Write(maker.buffer[:maker.bufferStart])
fileSize += int64(maker.bufferStart)
chunk.Write(maker.buffer[:maker.bufferStart])
if maker.bufferStart > 0 {
fileHasher.Write(maker.buffer[:maker.bufferStart])
fileSize += int64(maker.bufferStart)
maker.chunk.Write(maker.buffer[:maker.bufferStart])
sendChunk(maker.chunk)
}
if isEOF {
var ok bool
reader, ok = nextReader(fileSize, hex.EncodeToString(fileHasher.Sum(nil)))
if !ok {
endOfChunk(chunk, true)
return
} else {
endOfChunk(chunk, false)
startNewChunk()
fileSize = 0
fileHasher = maker.config.NewFileHasher()
isEOF = false
}
} else {
endOfChunk(chunk, false)
startNewChunk()
return fileSize, hex.EncodeToString(fileHasher.Sum(nil))
}
}
@@ -189,7 +198,7 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C
for {
// If the buffer still has some space left and EOF is not seen, read more data.
for maker.bufferSize < maker.bufferCapacity && !isEOF {
for maker.bufferSize < maker.bufferCapacity && !isEOF && reader != nil {
start := maker.bufferStart + maker.bufferSize
count := maker.bufferCapacity - start
if start >= maker.bufferCapacity {
@@ -201,7 +210,7 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C
if err != nil && err != io.EOF {
LOG_ERROR("CHUNK_MAKER", "Failed to read %d bytes: %s", count, err.Error())
return
return 0, ""
}
maker.bufferSize += count
@@ -210,54 +219,55 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C
// if EOF is seen, try to switch to next file and continue
if err == io.EOF {
var ok bool
reader, ok = nextReader(fileSize, hex.EncodeToString(fileHasher.Sum(nil)))
if !ok {
isEOF = true
} else {
fileSize = 0
fileHasher = maker.config.NewFileHasher()
isEOF = false
}
isEOF = true
break
}
}
// No eough data to meet the minimum chunk size requirement, so just return as a chunk.
if maker.bufferSize < maker.minimumChunkSize {
fill(maker.bufferSize)
endOfChunk(chunk, true)
return
if reader == nil {
fill(maker.bufferSize)
if maker.chunk.GetLength() > 0 {
sendChunk(maker.chunk)
}
return 0, ""
} else if isEOF {
return fileSize, hex.EncodeToString(fileHasher.Sum(nil))
} else {
continue
}
}
// Minimum chunk size has been reached. Calculate the buzhash for the minimum size chunk.
if !minimumReached {
if !maker.minimumReached {
bytes := maker.minimumChunkSize
if maker.bufferStart+bytes < maker.bufferCapacity {
hashSum = maker.buzhashSum(0, maker.buffer[maker.bufferStart:maker.bufferStart+bytes])
maker.hashSum = maker.buzhashSum(0, maker.buffer[maker.bufferStart:maker.bufferStart+bytes])
} else {
hashSum = maker.buzhashSum(0, maker.buffer[maker.bufferStart:])
hashSum = maker.buzhashSum(hashSum,
maker.hashSum = maker.buzhashSum(0, maker.buffer[maker.bufferStart:])
maker.hashSum = maker.buzhashSum(maker.hashSum,
maker.buffer[:bytes-(maker.bufferCapacity-maker.bufferStart)])
}
if (hashSum & maker.hashMask) == 0 {
if (maker.hashSum & maker.hashMask) == 0 {
// This is a minimum size chunk
fill(bytes)
endOfChunk(chunk, false)
startNewChunk()
sendChunk(maker.chunk)
maker.startNewChunk()
continue
}
minimumReached = true
maker.minimumReached = true
}
// Now check the buzhash of the data in the buffer, shifting one byte at a time.
bytes := maker.bufferSize - maker.minimumChunkSize
isEOC := false
maxSize := maker.maximumChunkSize - chunk.GetLength()
for i := 0; i < maker.bufferSize-maker.minimumChunkSize; i++ {
isEOC := false // chunk boundary found
maxSize := maker.maximumChunkSize - maker.chunk.GetLength()
for i := 0; i < bytes; i++ {
out := maker.bufferStart + i
if out >= maker.bufferCapacity {
out -= maker.bufferCapacity
@@ -267,8 +277,8 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C
in -= maker.bufferCapacity
}
hashSum = maker.buzhashUpdate(hashSum, maker.buffer[out], maker.buffer[in], maker.minimumChunkSize)
if (hashSum&maker.hashMask) == 0 || i == maxSize-maker.minimumChunkSize-1 {
maker.hashSum = maker.buzhashUpdate(maker.hashSum, maker.buffer[out], maker.buffer[in], maker.minimumChunkSize)
if (maker.hashSum&maker.hashMask) == 0 || i == maxSize-maker.minimumChunkSize-1 {
// A chunk is completed.
bytes = i + 1 + maker.minimumChunkSize
isEOC = true
@@ -277,21 +287,20 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C
}
fill(bytes)
if isEOC {
if isEOF && maker.bufferSize == 0 {
endOfChunk(chunk, true)
return
sendChunk(maker.chunk)
maker.startNewChunk()
} else {
if reader == nil {
fill(maker.minimumChunkSize)
sendChunk(maker.chunk)
maker.startNewChunk()
return 0, ""
}
endOfChunk(chunk, false)
startNewChunk()
continue
}
if isEOF {
fill(maker.bufferSize)
endOfChunk(chunk, true)
return
return fileSize, hex.EncodeToString(fileHasher.Sum(nil))
}
}
}

View File

@@ -7,14 +7,12 @@ package duplicacy
import (
"bytes"
crypto_rand "crypto/rand"
"io"
"math/rand"
"sort"
"testing"
)
func splitIntoChunks(content []byte, n, averageChunkSize, maxChunkSize, minChunkSize,
bufferCapacity int) ([]string, int) {
func splitIntoChunks(content []byte, n, averageChunkSize, maxChunkSize, minChunkSize int) ([]string, int) {
config := CreateConfig()
@@ -27,14 +25,12 @@ func splitIntoChunks(content []byte, n, averageChunkSize, maxChunkSize, minChunk
config.HashKey = DEFAULT_KEY
config.IDKey = DEFAULT_KEY
maker := CreateChunkMaker(config, false)
maker := CreateFileChunkMaker(config, false)
var chunks []string
totalChunkSize := 0
totalFileSize := int64(0)
//LOG_INFO("CHUNK_SPLIT", "bufferCapacity: %d", bufferCapacity)
buffers := make([]*bytes.Buffer, n)
sizes := make([]int, n)
sizes[0] = 0
@@ -42,7 +38,7 @@ func splitIntoChunks(content []byte, n, averageChunkSize, maxChunkSize, minChunk
same := true
for same {
same = false
sizes[i] = rand.Int() % n
sizes[i] = rand.Int() % len(content)
for j := 0; j < i; j++ {
if sizes[i] == sizes[j] {
same = true
@@ -59,22 +55,17 @@ func splitIntoChunks(content []byte, n, averageChunkSize, maxChunkSize, minChunk
}
buffers[n-1] = bytes.NewBuffer(content[sizes[n-1]:])
i := 0
chunkFunc := func(chunk *Chunk) {
chunks = append(chunks, chunk.GetHash())
totalChunkSize += chunk.GetLength()
config.PutChunk(chunk)
}
maker.ForEachChunk(buffers[0],
func(chunk *Chunk, final bool) {
//LOG_INFO("CHUNK_SPLIT", "i: %d, chunk: %s, size: %d", i, chunk.GetHash(), size)
chunks = append(chunks, chunk.GetHash())
totalChunkSize += chunk.GetLength()
},
func(size int64, hash string) (io.Reader, bool) {
totalFileSize += size
i++
if i >= len(buffers) {
return nil, false
}
return buffers[i], true
})
for _, buffer := range buffers {
fileSize, _ := maker.AddData(buffer, chunkFunc)
totalFileSize += fileSize
}
maker.AddData(nil, chunkFunc)
if totalFileSize != int64(totalChunkSize) {
LOG_ERROR("CHUNK_SPLIT", "total chunk size: %d, total file size: %d", totalChunkSize, totalFileSize)
@@ -96,35 +87,28 @@ func TestChunkMaker(t *testing.T) {
continue
}
chunkArray1, totalSize1 := splitIntoChunks(content, 10, 32, 64, 16, 32)
chunkArray1, totalSize1 := splitIntoChunks(content, 10, 32, 64, 16)
capacities := [...]int{32, 33, 34, 61, 62, 63, 64, 65, 66, 126, 127, 128, 129, 130,
255, 256, 257, 511, 512, 513, 1023, 1024, 1025,
32, 48, 64, 128, 256, 512, 1024, 2048}
//capacities := [...]int { 32 }
for _, n := range [...]int{6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} {
chunkArray2, totalSize2 := splitIntoChunks(content, n, 32, 64, 16)
for _, capacity := range capacities {
if totalSize1 != totalSize2 {
t.Errorf("[size %d] total size is %d instead of %d",
size, totalSize2, totalSize1)
}
for _, n := range [...]int{6, 7, 8, 9, 10} {
chunkArray2, totalSize2 := splitIntoChunks(content, n, 32, 64, 16, capacity)
if totalSize1 != totalSize2 {
t.Errorf("[size %d, capacity %d] total size is %d instead of %d",
size, capacity, totalSize2, totalSize1)
}
if len(chunkArray1) != len(chunkArray2) {
t.Errorf("[size %d, capacity %d] number of chunks is %d instead of %d",
size, capacity, len(chunkArray2), len(chunkArray1))
} else {
for i := 0; i < len(chunkArray1); i++ {
if chunkArray1[i] != chunkArray2[i] {
t.Errorf("[size %d, capacity %d, chunk %d] chunk is different", size, capacity, i)
}
if len(chunkArray1) != len(chunkArray2) {
t.Errorf("[size %d] number of chunks is %d instead of %d",
size, len(chunkArray2), len(chunkArray1))
} else {
for i := 0; i < len(chunkArray1); i++ {
if chunkArray1[i] != chunkArray2[i] {
t.Errorf("[size %d, chunk %d] chunk is different", size, i)
}
}
}
}
}

View File

@@ -5,6 +5,7 @@
package duplicacy
import (
"io"
"sync"
"sync/atomic"
"time"
@@ -12,42 +13,69 @@ import (
// These are operations that ChunkOperator will perform.
const (
ChunkOperationFind = 0
ChunkOperationDelete = 1
ChunkOperationFossilize = 2
ChunkOperationResurrect = 3
ChunkOperationDownload = 0
ChunkOperationUpload = 1
ChunkOperationDelete = 2
ChunkOperationFossilize = 3
ChunkOperationResurrect = 4
ChunkOperationFind = 5
)
// ChunkOperatorTask is used to pass parameters for different kinds of chunk operations.
type ChunkOperatorTask struct {
operation int // The type of operation
chunkID string // The chunk id
filePath string // The path of the chunk file; it may be empty
// ChunkTask is used to pass parameters for different kinds of chunk operations.
type ChunkTask struct {
operation int // The type of operation
chunkID string // The chunk id
chunkHash string // The chunk hash
chunkIndex int // The chunk index
filePath string // The path of the chunk file; it may be empty
isMetadata bool
chunk *Chunk
completionFunc func(chunk *Chunk, chunkIndex int)
}
// ChunkOperator is capable of performing multi-threaded operations on chunks.
type ChunkOperator struct {
numberOfActiveTasks int64 // The number of chunks that are being operated on
storage Storage // This storage
threads int // Number of threads
taskQueue chan ChunkOperatorTask // Operating goroutines are waiting on this channel for input
stopChannel chan bool // Used to stop all the goroutines
config *Config // Associated config
storage Storage // This storage
snapshotCache *FileStorage
showStatistics bool
threads int // Number of threads
taskQueue chan ChunkTask // Operating goroutines are waiting on this channel for input
stopChannel chan bool // Used to stop all the goroutines
fossils []string // For fossilize operation, the paths of the fossils are stored in this slice
fossilsLock *sync.Mutex // The lock for 'fossils'
numberOfActiveTasks int64 // The number of chunks that are being operated on
fossils []string // For fossilize operation, the paths of the fossils are stored in this slice
collectionLock *sync.Mutex // The lock for accessing 'fossils'
startTime int64 // The time it starts downloading
totalChunkSize int64 // Total chunk size
downloadedChunkSize int64 // Downloaded chunk size
allowFailures bool // Whether to fail on download error, or continue
NumberOfFailedChunks int64 // The number of chunks that can't be downloaded
UploadCompletionFunc func(chunk *Chunk, chunkIndex int, inCache bool, chunkSize int, uploadSize int)
}
// CreateChunkOperator creates a new ChunkOperator.
func CreateChunkOperator(storage Storage, threads int) *ChunkOperator {
func CreateChunkOperator(config *Config, storage Storage, snapshotCache *FileStorage, showStatistics bool, threads int, allowFailures bool) *ChunkOperator {
operator := &ChunkOperator{
config: config,
storage: storage,
snapshotCache: snapshotCache,
showStatistics: showStatistics,
threads: threads,
taskQueue: make(chan ChunkOperatorTask, threads*4),
taskQueue: make(chan ChunkTask, threads),
stopChannel: make(chan bool),
fossils: make([]string, 0),
fossilsLock: &sync.Mutex{},
collectionLock: &sync.Mutex{},
allowFailures: allowFailures,
}
// Start the operator goroutines
@@ -84,38 +112,78 @@ func (operator *ChunkOperator) Stop() {
atomic.AddInt64(&operator.numberOfActiveTasks, int64(-1))
}
func (operator *ChunkOperator) AddTask(operation int, chunkID string, filePath string) {
func (operator *ChunkOperator) WaitForCompletion() {
task := ChunkOperatorTask{
operation: operation,
chunkID: chunkID,
filePath: filePath,
for atomic.LoadInt64(&operator.numberOfActiveTasks) > 0 {
time.Sleep(100 * time.Millisecond)
}
operator.taskQueue <- task
atomic.AddInt64(&operator.numberOfActiveTasks, int64(1))
}
func (operator *ChunkOperator) Find(chunkID string) {
operator.AddTask(ChunkOperationFind, chunkID, "")
func (operator *ChunkOperator) AddTask(operation int, chunkID string, chunkHash string, filePath string, chunkIndex int, chunk *Chunk, isMetadata bool, completionFunc func(*Chunk, int)) {
task := ChunkTask {
operation: operation,
chunkID: chunkID,
chunkHash: chunkHash,
chunkIndex: chunkIndex,
filePath: filePath,
chunk: chunk,
isMetadata: isMetadata,
completionFunc: completionFunc,
}
operator.taskQueue <- task
atomic.AddInt64(&operator.numberOfActiveTasks, int64(1))
return
}
func (operator *ChunkOperator) Download(chunkHash string, chunkIndex int, isMetadata bool) *Chunk {
chunkID := operator.config.GetChunkIDFromHash(chunkHash)
completionChannel := make(chan *Chunk)
completionFunc := func(chunk *Chunk, chunkIndex int) {
completionChannel <- chunk
}
operator.AddTask(ChunkOperationDownload, chunkID, chunkHash, "", chunkIndex, nil, isMetadata, completionFunc)
return <- completionChannel
}
func (operator *ChunkOperator) DownloadAsync(chunkHash string, chunkIndex int, isMetadata bool, completionFunc func(*Chunk, int)) {
chunkID := operator.config.GetChunkIDFromHash(chunkHash)
operator.AddTask(ChunkOperationDownload, chunkID, chunkHash, "", chunkIndex, nil, isMetadata, completionFunc)
}
func (operator *ChunkOperator) Upload(chunk *Chunk, chunkIndex int, isMetadata bool) {
chunkHash := chunk.GetHash()
chunkID := operator.config.GetChunkIDFromHash(chunkHash)
operator.AddTask(ChunkOperationUpload, chunkID, chunkHash, "", chunkIndex, chunk, isMetadata, nil)
}
func (operator *ChunkOperator) Delete(chunkID string, filePath string) {
operator.AddTask(ChunkOperationDelete, chunkID, filePath)
operator.AddTask(ChunkOperationDelete, chunkID, "", filePath, 0, nil, false, nil)
}
func (operator *ChunkOperator) Fossilize(chunkID string, filePath string) {
operator.AddTask(ChunkOperationFossilize, chunkID, filePath)
operator.AddTask(ChunkOperationFossilize, chunkID, "", filePath, 0, nil, false, nil)
}
func (operator *ChunkOperator) Resurrect(chunkID string, filePath string) {
operator.AddTask(ChunkOperationResurrect, chunkID, filePath)
operator.AddTask(ChunkOperationResurrect, chunkID, "", filePath, 0, nil, false, nil)
}
func (operator *ChunkOperator) Run(threadIndex int, task ChunkOperatorTask) {
func (operator *ChunkOperator) Run(threadIndex int, task ChunkTask) {
defer func() {
atomic.AddInt64(&operator.numberOfActiveTasks, int64(-1))
}()
if task.operation == ChunkOperationDownload {
operator.DownloadChunk(threadIndex, task)
return
} else if task.operation == ChunkOperationUpload {
operator.UploadChunk(threadIndex, task)
return
}
// task.filePath may be empty. If so, find the chunk first.
if task.operation == ChunkOperationDelete || task.operation == ChunkOperationFossilize {
if task.filePath == "" {
@@ -132,9 +200,9 @@ func (operator *ChunkOperator) Run(threadIndex int, task ChunkOperatorTask) {
fossilPath, exist, _, _ := operator.storage.FindChunk(threadIndex, task.chunkID, true)
if exist {
LOG_WARN("CHUNK_FOSSILIZE", "Chunk %s is already a fossil", task.chunkID)
operator.fossilsLock.Lock()
operator.collectionLock.Lock()
operator.fossils = append(operator.fossils, fossilPath)
operator.fossilsLock.Unlock()
operator.collectionLock.Unlock()
} else {
LOG_ERROR("CHUNK_FIND", "Chunk %s does not exist in the storage", task.chunkID)
}
@@ -175,17 +243,17 @@ func (operator *ChunkOperator) Run(threadIndex int, task ChunkOperatorTask) {
if err == nil {
LOG_TRACE("CHUNK_DELETE", "Deleted chunk file %s as the fossil already exists", task.chunkID)
}
operator.fossilsLock.Lock()
operator.collectionLock.Lock()
operator.fossils = append(operator.fossils, fossilPath)
operator.fossilsLock.Unlock()
operator.collectionLock.Unlock()
} else {
LOG_ERROR("CHUNK_DELETE", "Failed to fossilize the chunk %s: %v", task.chunkID, err)
}
} else {
LOG_TRACE("CHUNK_FOSSILIZE", "The chunk %s has been marked as a fossil", task.chunkID)
operator.fossilsLock.Lock()
operator.collectionLock.Lock()
operator.fossils = append(operator.fossils, fossilPath)
operator.fossilsLock.Unlock()
operator.collectionLock.Unlock()
}
} else if task.operation == ChunkOperationResurrect {
chunkPath, exist, _, err := operator.storage.FindChunk(threadIndex, task.chunkID, false)
@@ -207,3 +275,267 @@ func (operator *ChunkOperator) Run(threadIndex int, task ChunkOperatorTask) {
}
}
}
// Download downloads a chunk from the storage.
func (operator *ChunkOperator) DownloadChunk(threadIndex int, task ChunkTask) {
cachedPath := ""
chunk := operator.config.GetChunk()
chunk.isMetadata = task.isMetadata
chunkID := task.chunkID
defer func() {
if chunk != nil {
operator.config.PutChunk(chunk)
}
} ()
if task.isMetadata && operator.snapshotCache != nil {
var exist bool
var err error
// Reset the chunk with a hasher -- we're reading from the cache where chunk are not encrypted or compressed
chunk.Reset(true)
cachedPath, exist, _, err = operator.snapshotCache.FindChunk(threadIndex, chunkID, false)
if err != nil {
LOG_WARN("DOWNLOAD_CACHE", "Failed to find the cache path for the chunk %s: %v", chunkID, err)
} else if exist {
err = operator.snapshotCache.DownloadFile(0, cachedPath, chunk)
if err != nil {
LOG_WARN("DOWNLOAD_CACHE", "Failed to load the chunk %s from the snapshot cache: %v", chunkID, err)
} else {
actualChunkID := chunk.GetID()
if actualChunkID != chunkID {
LOG_WARN("DOWNLOAD_CACHE_CORRUPTED",
"The chunk %s load from the snapshot cache has a hash id of %s", chunkID, actualChunkID)
} else {
LOG_DEBUG("CHUNK_CACHE", "Chunk %s has been loaded from the snapshot cache", chunkID)
task.completionFunc(chunk, task.chunkIndex)
chunk = nil
return
}
}
}
}
// Reset the chunk without a hasher -- the downloaded content will be encrypted and/or compressed and the hasher
// will be set up before the encryption
chunk.Reset(false)
chunk.isMetadata = task.isMetadata
// If failures are allowed, complete the task properly
completeFailedChunk := func() {
atomic.AddInt64(&operator.NumberOfFailedChunks, 1)
if operator.allowFailures {
task.completionFunc(chunk, task.chunkIndex)
}
}
const MaxDownloadAttempts = 3
for downloadAttempt := 0; ; downloadAttempt++ {
// Find the chunk by ID first.
chunkPath, exist, _, err := operator.storage.FindChunk(threadIndex, chunkID, false)
if err != nil {
completeFailedChunk()
LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Failed to find the chunk %s: %v", chunkID, err)
return
}
if !exist {
// No chunk is found. Have to find it in the fossil pool again.
fossilPath, exist, _, err := operator.storage.FindChunk(threadIndex, chunkID, true)
if err != nil {
completeFailedChunk()
LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Failed to find the chunk %s: %v", chunkID, err)
return
}
if !exist {
retry := false
// Retry for Hubic or WebDAV as it may return 404 even when the chunk exists
if _, ok := operator.storage.(*HubicStorage); ok {
retry = true
}
if _, ok := operator.storage.(*WebDAVStorage); ok {
retry = true
}
if retry && downloadAttempt < MaxDownloadAttempts {
LOG_WARN("DOWNLOAD_RETRY", "Failed to find the chunk %s; retrying", chunkID)
continue
}
// A chunk is not found. This is a serious error and hopefully it will never happen.
completeFailedChunk()
if err != nil {
LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Chunk %s can't be found: %v", chunkID, err)
} else {
LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Chunk %s can't be found", chunkID)
}
return
}
// We can't download the fossil directly. We have to turn it back into a regular chunk and try
// downloading again.
err = operator.storage.MoveFile(threadIndex, fossilPath, chunkPath)
if err != nil {
completeFailedChunk()
LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Failed to resurrect chunk %s: %v", chunkID, err)
return
}
LOG_WARN("DOWNLOAD_RESURRECT", "Fossil %s has been resurrected", chunkID)
continue
}
err = operator.storage.DownloadFile(threadIndex, chunkPath, chunk)
if err != nil {
_, isHubic := operator.storage.(*HubicStorage)
// Retry on EOF or if it is a Hubic backend as it may return 404 even when the chunk exists
if (err == io.ErrUnexpectedEOF || isHubic) && downloadAttempt < MaxDownloadAttempts {
LOG_WARN("DOWNLOAD_RETRY", "Failed to download the chunk %s: %v; retrying", chunkID, err)
chunk.Reset(false)
chunk.isMetadata = task.isMetadata
continue
} else {
completeFailedChunk()
LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Failed to download the chunk %s: %v", chunkID, err)
return
}
}
err = chunk.Decrypt(operator.config.ChunkKey, task.chunkHash)
if err != nil {
if downloadAttempt < MaxDownloadAttempts {
LOG_WARN("DOWNLOAD_RETRY", "Failed to decrypt the chunk %s: %v; retrying", chunkID, err)
chunk.Reset(false)
chunk.isMetadata = task.isMetadata
continue
} else {
completeFailedChunk()
LOG_WERROR(operator.allowFailures, "DOWNLOAD_DECRYPT", "Failed to decrypt the chunk %s: %v", chunkID, err)
return
}
}
actualChunkID := chunk.GetID()
if actualChunkID != chunkID {
if downloadAttempt < MaxDownloadAttempts {
LOG_WARN("DOWNLOAD_RETRY", "The chunk %s has a hash id of %s; retrying", chunkID, actualChunkID)
chunk.Reset(false)
chunk.isMetadata = task.isMetadata
continue
} else {
completeFailedChunk()
LOG_WERROR(operator.allowFailures, "DOWNLOAD_CORRUPTED", "The chunk %s has a hash id of %s", chunkID, actualChunkID)
return
}
}
break
}
if chunk.isMetadata && len(cachedPath) > 0 {
// Save a copy to the local snapshot cache
err := operator.snapshotCache.UploadFile(threadIndex, cachedPath, chunk.GetBytes())
if err != nil {
LOG_WARN("DOWNLOAD_CACHE", "Failed to add the chunk %s to the snapshot cache: %v", chunkID, err)
}
}
downloadedChunkSize := atomic.AddInt64(&operator.downloadedChunkSize, int64(chunk.GetLength()))
if (operator.showStatistics || IsTracing()) && operator.totalChunkSize > 0 {
now := time.Now().Unix()
if now <= operator.startTime {
now = operator.startTime + 1
}
speed := downloadedChunkSize / (now - operator.startTime)
remainingTime := int64(0)
if speed > 0 {
remainingTime = (operator.totalChunkSize-downloadedChunkSize)/speed + 1
}
percentage := float32(downloadedChunkSize * 1000 / operator.totalChunkSize)
LOG_INFO("DOWNLOAD_PROGRESS", "Downloaded chunk %d size %d, %sB/s %s %.1f%%",
task.chunkIndex+1, chunk.GetLength(),
PrettySize(speed), PrettyTime(remainingTime), percentage/10)
} else {
LOG_DEBUG("CHUNK_DOWNLOAD", "Chunk %s has been downloaded", chunkID)
}
task.completionFunc(chunk, task.chunkIndex)
chunk = nil
return
}
// UploadChunk is called by the task goroutines to perform the actual uploading
func (operator *ChunkOperator) UploadChunk(threadIndex int, task ChunkTask) bool {
chunk := task.chunk
chunkID := task.chunkID
chunkSize := chunk.GetLength()
// For a snapshot chunk, verify that its chunk id is correct
if task.isMetadata {
chunk.VerifyID()
}
if task.isMetadata && operator.storage.IsCacheNeeded() {
// Save a copy to the local snapshot.
chunkPath, exist, _, err := operator.snapshotCache.FindChunk(threadIndex, chunkID, false)
if err != nil {
LOG_WARN("UPLOAD_CACHE", "Failed to find the cache path for the chunk %s: %v", chunkID, err)
} else if exist {
LOG_DEBUG("CHUNK_CACHE", "Chunk %s already exists in the snapshot cache", chunkID)
} else if err = operator.snapshotCache.UploadFile(threadIndex, chunkPath, chunk.GetBytes()); err != nil {
LOG_WARN("UPLOAD_CACHE", "Failed to save the chunk %s to the snapshot cache: %v", chunkID, err)
} else {
LOG_DEBUG("CHUNK_CACHE", "Chunk %s has been saved to the snapshot cache", chunkID)
}
}
// This returns the path the chunk file should be at.
chunkPath, exist, _, err := operator.storage.FindChunk(threadIndex, chunkID, false)
if err != nil {
LOG_ERROR("UPLOAD_CHUNK", "Failed to find the path for the chunk %s: %v", chunkID, err)
return false
}
if exist {
// Chunk deduplication by name in effect here.
LOG_DEBUG("CHUNK_DUPLICATE", "Chunk %s already exists", chunkID)
operator.UploadCompletionFunc(chunk, task.chunkIndex, false, chunkSize, 0)
return false
}
// Encrypt the chunk only after we know that it must be uploaded.
err = chunk.Encrypt(operator.config.ChunkKey, chunk.GetHash(), task.isMetadata)
if err != nil {
LOG_ERROR("UPLOAD_CHUNK", "Failed to encrypt the chunk %s: %v", chunkID, err)
return false
}
if !operator.config.dryRun {
err = operator.storage.UploadFile(threadIndex, chunkPath, chunk.GetBytes())
if err != nil {
LOG_ERROR("UPLOAD_CHUNK", "Failed to upload the chunk %s: %v", chunkID, err)
return false
}
LOG_DEBUG("CHUNK_UPLOAD", "Chunk %s has been uploaded", chunkID)
} else {
LOG_DEBUG("CHUNK_UPLOAD", "Uploading was skipped for chunk %s", chunkID)
}
operator.UploadCompletionFunc(chunk, task.chunkIndex, false, chunkSize, chunk.GetLength())
return true
}

View File

@@ -15,11 +15,11 @@ import (
"math/rand"
)
func TestUploaderAndDownloader(t *testing.T) {
func TestChunkOperator(t *testing.T) {
rand.Seed(time.Now().UnixNano())
setTestingT(t)
SetLoggingLevel(INFO)
SetLoggingLevel(DEBUG)
defer func() {
if r := recover(); r != nil {
@@ -87,35 +87,25 @@ func TestUploaderAndDownloader(t *testing.T) {
totalFileSize += chunk.GetLength()
}
completionFunc := func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) {
chunkOperator := CreateChunkOperator(config, storage, nil, false, *testThreads, false)
chunkOperator.UploadCompletionFunc = func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) {
t.Logf("Chunk %s size %d (%d/%d) uploaded", chunk.GetID(), chunkSize, chunkIndex, len(chunks))
}
chunkUploader := CreateChunkUploader(config, storage, nil, *testThreads, nil)
chunkUploader.completionFunc = completionFunc
chunkUploader.Start()
for i, chunk := range chunks {
chunkUploader.StartChunk(chunk, i)
chunkOperator.Upload(chunk, i, false)
}
chunkUploader.Stop()
chunkDownloader := CreateChunkDownloader(config, storage, nil, true, *testThreads, false)
chunkDownloader.totalChunkSize = int64(totalFileSize)
for _, chunk := range chunks {
chunkDownloader.AddChunk(chunk.GetHash())
}
chunkOperator.WaitForCompletion()
for i, chunk := range chunks {
downloaded := chunkDownloader.WaitForChunk(i)
downloaded := chunkOperator.Download(chunk.GetHash(), i, false)
if downloaded.GetID() != chunk.GetID() {
t.Errorf("Uploaded: %s, downloaded: %s", chunk.GetID(), downloaded.GetID())
}
}
chunkDownloader.Stop()
chunkOperator.Stop()
for _, file := range listChunks(storage) {
err = storage.DeleteFile(0, "chunks/"+file)

View File

@@ -1,151 +0,0 @@
// Copyright (c) Acrosync LLC. All rights reserved.
// Free for personal use and commercial trial
// Commercial use requires per-user licenses available from https://duplicacy.com
package duplicacy
import (
"sync/atomic"
"time"
)
// ChunkUploadTask represents a chunk to be uploaded.
type ChunkUploadTask struct {
chunk *Chunk
chunkIndex int
}
// ChunkUploader uploads chunks to the storage using one or more uploading goroutines. Chunks are added
// by the call to StartChunk(), and then passed to the uploading goroutines. The completion function is
// called when the downloading is completed. Note that ChunkUploader does not release chunks to the
// chunk pool; instead
type ChunkUploader struct {
config *Config // Associated config
storage Storage // Download from this storage
snapshotCache *FileStorage // Used as cache if not nil; usually for uploading snapshot chunks
threads int // Number of uploading goroutines
taskQueue chan ChunkUploadTask // Uploading goroutines are listening on this channel for upload jobs
stopChannel chan bool // Used to terminate uploading goroutines
numberOfUploadingTasks int32 // The number of uploading tasks
// Uploading goroutines call this function after having downloaded chunks
completionFunc func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int)
}
// CreateChunkUploader creates a chunk uploader.
func CreateChunkUploader(config *Config, storage Storage, snapshotCache *FileStorage, threads int,
completionFunc func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int)) *ChunkUploader {
uploader := &ChunkUploader{
config: config,
storage: storage,
snapshotCache: snapshotCache,
threads: threads,
taskQueue: make(chan ChunkUploadTask, 1),
stopChannel: make(chan bool),
completionFunc: completionFunc,
}
return uploader
}
// Starts starts uploading goroutines.
func (uploader *ChunkUploader) Start() {
for i := 0; i < uploader.threads; i++ {
go func(threadIndex int) {
defer CatchLogException()
for {
select {
case task := <-uploader.taskQueue:
uploader.Upload(threadIndex, task)
case <-uploader.stopChannel:
return
}
}
}(i)
}
}
// StartChunk sends a chunk to be uploaded to a waiting uploading goroutine. It may block if all uploading goroutines are busy.
func (uploader *ChunkUploader) StartChunk(chunk *Chunk, chunkIndex int) {
atomic.AddInt32(&uploader.numberOfUploadingTasks, 1)
uploader.taskQueue <- ChunkUploadTask{
chunk: chunk,
chunkIndex: chunkIndex,
}
}
// Stop stops all uploading goroutines.
func (uploader *ChunkUploader) Stop() {
for atomic.LoadInt32(&uploader.numberOfUploadingTasks) > 0 {
time.Sleep(100 * time.Millisecond)
}
for i := 0; i < uploader.threads; i++ {
uploader.stopChannel <- false
}
}
// Upload is called by the uploading goroutines to perform the actual uploading
func (uploader *ChunkUploader) Upload(threadIndex int, task ChunkUploadTask) bool {
chunk := task.chunk
chunkSize := chunk.GetLength()
chunkID := chunk.GetID()
// For a snapshot chunk, verify that its chunk id is correct
if uploader.snapshotCache != nil {
chunk.VerifyID()
}
if uploader.snapshotCache != nil && uploader.storage.IsCacheNeeded() {
// Save a copy to the local snapshot.
chunkPath, exist, _, err := uploader.snapshotCache.FindChunk(threadIndex, chunkID, false)
if err != nil {
LOG_WARN("UPLOAD_CACHE", "Failed to find the cache path for the chunk %s: %v", chunkID, err)
} else if exist {
LOG_DEBUG("CHUNK_CACHE", "Chunk %s already exists in the snapshot cache", chunkID)
} else if err = uploader.snapshotCache.UploadFile(threadIndex, chunkPath, chunk.GetBytes()); err != nil {
LOG_WARN("UPLOAD_CACHE", "Failed to save the chunk %s to the snapshot cache: %v", chunkID, err)
} else {
LOG_DEBUG("CHUNK_CACHE", "Chunk %s has been saved to the snapshot cache", chunkID)
}
}
// This returns the path the chunk file should be at.
chunkPath, exist, _, err := uploader.storage.FindChunk(threadIndex, chunkID, false)
if err != nil {
LOG_ERROR("UPLOAD_CHUNK", "Failed to find the path for the chunk %s: %v", chunkID, err)
return false
}
if exist {
// Chunk deduplication by name in effect here.
LOG_DEBUG("CHUNK_DUPLICATE", "Chunk %s already exists", chunkID)
uploader.completionFunc(chunk, task.chunkIndex, true, chunkSize, 0)
atomic.AddInt32(&uploader.numberOfUploadingTasks, -1)
return false
}
// Encrypt the chunk only after we know that it must be uploaded.
err = chunk.Encrypt(uploader.config.ChunkKey, chunk.GetHash(), uploader.snapshotCache != nil)
if err != nil {
LOG_ERROR("UPLOAD_CHUNK", "Failed to encrypt the chunk %s: %v", chunkID, err)
return false
}
if !uploader.config.dryRun {
err = uploader.storage.UploadFile(threadIndex, chunkPath, chunk.GetBytes())
if err != nil {
LOG_ERROR("UPLOAD_CHUNK", "Failed to upload the chunk %s: %v", chunkID, err)
return false
}
LOG_DEBUG("CHUNK_UPLOAD", "Chunk %s has been uploaded", chunkID)
} else {
LOG_DEBUG("CHUNK_UPLOAD", "Uploading was skipped for chunk %s", chunkID)
}
uploader.completionFunc(chunk, task.chunkIndex, false, chunkSize, chunk.GetLength())
atomic.AddInt32(&uploader.numberOfUploadingTasks, -1)
return true
}

View File

@@ -16,6 +16,11 @@ import (
"strconv"
"strings"
"time"
"bytes"
"crypto/sha256"
"github.com/vmihailenco/msgpack"
)
// This is the hidden directory in the repository for storing various files.
@@ -45,7 +50,7 @@ type Entry struct {
EndChunk int
EndOffset int
Attributes map[string][]byte
Attributes *map[string][]byte
}
// CreateEntry creates an entry from file properties.
@@ -93,6 +98,27 @@ func CreateEntryFromFileInfo(fileInfo os.FileInfo, directory string) *Entry {
return entry
}
func (entry *Entry) Copy() *Entry {
return &Entry{
Path: entry.Path,
Size: entry.Size,
Time: entry.Time,
Mode: entry.Mode,
Link: entry.Link,
Hash: entry.Hash,
UID: entry.UID,
GID: entry.GID,
StartChunk: entry.StartChunk,
StartOffset: entry.StartOffset,
EndChunk: entry.EndChunk,
EndOffset: entry.EndOffset,
Attributes: entry.Attributes,
}
}
// CreateEntryFromJSON creates an entry from a json description.
func (entry *Entry) UnmarshalJSON(description []byte) (err error) {
@@ -175,17 +201,17 @@ func (entry *Entry) UnmarshalJSON(description []byte) (err error) {
if attributes, ok := value.(map[string]interface{}); !ok {
return fmt.Errorf("Attributes are invalid for file '%s' in the snapshot", entry.Path)
} else {
entry.Attributes = make(map[string][]byte)
entry.Attributes = &map[string][]byte{}
for name, object := range attributes {
if object == nil {
entry.Attributes[name] = []byte("")
(*entry.Attributes)[name] = []byte("")
} else if attributeInBase64, ok := object.(string); !ok {
return fmt.Errorf("Attribute '%s' is invalid for file '%s' in the snapshot", name, entry.Path)
} else if attribute, err := base64.StdEncoding.DecodeString(attributeInBase64); err != nil {
return fmt.Errorf("Failed to decode attribute '%s' for file '%s' in the snapshot: %v",
name, entry.Path, err)
} else {
entry.Attributes[name] = attribute
(*entry.Attributes)[name] = attribute
}
}
}
@@ -244,7 +270,7 @@ func (entry *Entry) convertToObject(encodeName bool) map[string]interface{} {
object["gid"] = entry.GID
}
if len(entry.Attributes) > 0 {
if entry.Attributes != nil && len(*entry.Attributes) > 0 {
object["attributes"] = entry.Attributes
}
@@ -259,6 +285,197 @@ func (entry *Entry) MarshalJSON() ([]byte, error) {
return description, err
}
var _ msgpack.CustomEncoder = (*Entry)(nil)
var _ msgpack.CustomDecoder = (*Entry)(nil)
func (entry *Entry) EncodeMsgpack(encoder *msgpack.Encoder) error {
err := encoder.EncodeString(entry.Path)
if err != nil {
return err
}
err = encoder.EncodeInt(entry.Size)
if err != nil {
return err
}
err = encoder.EncodeInt(entry.Time)
if err != nil {
return err
}
err = encoder.EncodeInt(int64(entry.Mode))
if err != nil {
return err
}
err = encoder.EncodeString(entry.Link)
if err != nil {
return err
}
err = encoder.EncodeString(entry.Hash)
if err != nil {
return err
}
err = encoder.EncodeInt(int64(entry.StartChunk))
if err != nil {
return err
}
err = encoder.EncodeInt(int64(entry.StartOffset))
if err != nil {
return err
}
err = encoder.EncodeInt(int64(entry.EndChunk))
if err != nil {
return err
}
err = encoder.EncodeInt(int64(entry.EndOffset))
if err != nil {
return err
}
err = encoder.EncodeInt(int64(entry.UID))
if err != nil {
return err
}
err = encoder.EncodeInt(int64(entry.GID))
if err != nil {
return err
}
var numberOfAttributes int64
if entry.Attributes != nil {
numberOfAttributes = int64(len(*entry.Attributes))
}
err = encoder.EncodeInt(numberOfAttributes)
if err != nil {
return err
}
if entry.Attributes != nil {
attributes := make([]string, numberOfAttributes)
i := 0
for attribute := range *entry.Attributes {
attributes[i] = attribute
i++
}
sort.Strings(attributes)
for _, attribute := range attributes {
err = encoder.EncodeString(attribute)
if err != nil {
return err
}
err = encoder.EncodeString(string((*entry.Attributes)[attribute]))
if err != nil {
return err
}
}
}
return nil
}
func (entry *Entry) DecodeMsgpack(decoder *msgpack.Decoder) error {
var err error
entry.Path, err = decoder.DecodeString()
if err != nil {
return err
}
entry.Size, err = decoder.DecodeInt64()
if err != nil {
return err
}
entry.Time, err = decoder.DecodeInt64()
if err != nil {
return err
}
mode, err := decoder.DecodeInt64()
if err != nil {
return err
}
entry.Mode = uint32(mode)
entry.Link, err = decoder.DecodeString()
if err != nil {
return err
}
entry.Hash, err = decoder.DecodeString()
if err != nil {
return err
}
startChunk, err := decoder.DecodeInt()
if err != nil {
return err
}
entry.StartChunk = int(startChunk)
startOffset, err := decoder.DecodeInt()
if err != nil {
return err
}
entry.StartOffset = int(startOffset)
endChunk, err := decoder.DecodeInt()
if err != nil {
return err
}
entry.EndChunk = int(endChunk)
endOffset, err := decoder.DecodeInt()
if err != nil {
return err
}
entry.EndOffset = int(endOffset)
uid, err := decoder.DecodeInt()
if err != nil {
return err
}
entry.UID = int(uid)
gid, err := decoder.DecodeInt()
if err != nil {
return err
}
entry.GID = int(gid)
numberOfAttributes, err := decoder.DecodeInt()
if err != nil {
return err
}
if numberOfAttributes > 0 {
entry.Attributes = &map[string][]byte{}
for i := 0; i < numberOfAttributes; i++ {
attribute, err := decoder.DecodeString()
if err != nil {
return err
}
value, err := decoder.DecodeString()
if err != nil {
return err
}
(*entry.Attributes)[attribute] = []byte(value)
}
}
return nil
}
func (entry *Entry) IsFile() bool {
return entry.Mode&uint32(os.ModeType) == 0
}
@@ -271,10 +488,27 @@ func (entry *Entry) IsLink() bool {
return entry.Mode&uint32(os.ModeSymlink) != 0
}
func (entry *Entry) IsComplete() bool {
return entry.Size >= 0
}
func (entry *Entry) GetPermissions() os.FileMode {
return os.FileMode(entry.Mode) & fileModeMask
}
func (entry *Entry) GetParent() string {
path := entry.Path
if path != "" && path[len(path) - 1] == '/' {
path = path[:len(path) - 1]
}
i := strings.LastIndex(path, "/")
if i == -1 {
return ""
} else {
return path[:i]
}
}
func (entry *Entry) IsSameAs(other *Entry) bool {
return entry.Size == other.Size && entry.Time <= other.Time+1 && entry.Time >= other.Time-1
}
@@ -326,7 +560,7 @@ func (entry *Entry) RestoreMetadata(fullPath string, fileInfo *os.FileInfo, setO
}
}
if len(entry.Attributes) > 0 {
if entry.Attributes != nil && len(*entry.Attributes) > 0 {
entry.SetAttributesToFile(fullPath)
}
@@ -335,47 +569,62 @@ func (entry *Entry) RestoreMetadata(fullPath string, fileInfo *os.FileInfo, setO
// Return -1 if 'left' should appear before 'right', 1 if opposite, and 0 if they are the same.
// Files are always arranged before subdirectories under the same parent directory.
func (left *Entry) Compare(right *Entry) int {
path1 := left.Path
path2 := right.Path
func ComparePaths(left string, right string) int {
p := 0
for ; p < len(path1) && p < len(path2); p++ {
if path1[p] != path2[p] {
for ; p < len(left) && p < len(right); p++ {
if left[p] != right[p] {
break
}
}
// c1, c2 is the first byte that differs
// c1, c2 are the first bytes that differ
var c1, c2 byte
if p < len(path1) {
c1 = path1[p]
if p < len(left) {
c1 = left[p]
}
if p < len(path2) {
c2 = path2[p]
if p < len(right) {
c2 = right[p]
}
// c3, c4 indicates how the current component ends
// c3 == '/': the current component is a directory
// c3 != '/': the current component is the last one
// c3, c4 indicate how the current component ends
// c3 == '/': the current component is a directory; c3 != '/': the current component is the last one
c3 := c1
for i := p; c3 != '/' && i < len(path1); i++ {
c3 = path1[i]
// last1, last2 means if the current compoent is the last component
last1 := true
for i := p; i < len(left); i++ {
c3 = left[i]
if c3 == '/' {
last1 = i == len(left) - 1
break
}
}
c4 := c2
for i := p; c4 != '/' && i < len(path2); i++ {
c4 = path2[i]
last2 := true
for i := p; i < len(right); i++ {
c4 = right[i]
if c4 == '/' {
last2 = i == len(right) - 1
break
}
}
if last1 != last2 {
if last1 {
return -1
} else {
return 1
}
}
if c3 == '/' {
if c4 == '/' {
// We are comparing two directory components
if c1 == '/' {
// left is shorter
// Note that c2 maybe smaller than c1 but c1 is '/' which is counted
// as 0
// left is shorter; note that c2 maybe smaller than c1 but c1 should be treated as 0 therefore
// this is a special case that must be handled separately
return -1
} else if c2 == '/' {
// right is shorter
@@ -397,6 +646,10 @@ func (left *Entry) Compare(right *Entry) int {
}
}
func (left *Entry) Compare(right *Entry) int {
return ComparePaths(left.Path, right.Path)
}
// This is used to sort entries by their names.
type ByName []*Entry
@@ -443,7 +696,7 @@ func (files FileInfoCompare) Less(i, j int) bool {
// ListEntries returns a list of entries representing file and subdirectories under the directory 'path'. Entry paths
// are normalized as relative to 'top'. 'patterns' are used to exclude or include certain files.
func ListEntries(top string, path string, fileList *[]*Entry, patterns []string, nobackupFile string, discardAttributes bool, excludeByAttribute bool) (directoryList []*Entry,
func ListEntries(top string, path string, patterns []string, nobackupFile string, excludeByAttribute bool, listingChannel chan *Entry) (directoryList []*Entry,
skippedFiles []string, err error) {
LOG_DEBUG("LIST_ENTRIES", "Listing %s", path)
@@ -478,8 +731,6 @@ func ListEntries(top string, path string, fileList *[]*Entry, patterns []string,
sort.Sort(FileInfoCompare(files))
entries := make([]*Entry, 0, 4)
for _, f := range files {
if f.Name() == DUPLICACY_DIRECTORY {
continue
@@ -520,11 +771,9 @@ func ListEntries(top string, path string, fileList *[]*Entry, patterns []string,
}
}
if !discardAttributes {
entry.ReadAttributes(top)
}
entry.ReadAttributes(top)
if excludeByAttribute && excludedByAttribute(entry.Attributes) {
if excludeByAttribute && entry.Attributes != nil && excludedByAttribute(*entry.Attributes) {
LOG_DEBUG("LIST_EXCLUDE", "%s is excluded by attribute", entry.Path)
continue
}
@@ -535,20 +784,20 @@ func ListEntries(top string, path string, fileList *[]*Entry, patterns []string,
continue
}
entries = append(entries, entry)
if entry.IsDir() {
directoryList = append(directoryList, entry)
} else {
listingChannel <- entry
}
}
// For top level directory we need to sort again because symlinks may have been changed
if path == "" {
sort.Sort(ByName(entries))
sort.Sort(ByName(directoryList))
}
for _, entry := range entries {
if entry.IsDir() {
directoryList = append(directoryList, entry)
} else {
*fileList = append(*fileList, entry)
}
for _, entry := range directoryList {
listingChannel <- entry
}
for i, j := 0, len(directoryList)-1; i < j; i, j = i+1, j-1 {
@@ -597,3 +846,100 @@ func (entry *Entry) Diff(chunkHashes []string, chunkLengths []int,
return modifiedLength
}
func (entry *Entry) EncodeWithHash(encoder *msgpack.Encoder) error {
entryBytes, err := msgpack.Marshal(entry)
if err != nil {
return err
}
hash := sha256.Sum256(entryBytes)
err = encoder.EncodeBytes(entryBytes)
if err != nil {
return err
}
err = encoder.EncodeBytes(hash[:])
if err != nil {
return err
}
return nil
}
func DecodeEntryWithHash(decoder *msgpack.Decoder) (*Entry, error) {
entryBytes, err := decoder.DecodeBytes()
if err != nil {
return nil, err
}
hashBytes, err := decoder.DecodeBytes()
if err != nil {
return nil, err
}
expectedHash := sha256.Sum256(entryBytes)
if bytes.Compare(expectedHash[:], hashBytes) != 0 {
return nil, fmt.Errorf("corrupted file metadata")
}
var entry Entry
err = msgpack.Unmarshal(entryBytes, &entry)
if err != nil {
return nil, err
}
return &entry, nil
}
func (entry *Entry) check(chunkLengths []int) error {
if entry.Size < 0 {
return fmt.Errorf("The file %s hash an invalid size (%d)", entry.Path, entry.Size)
}
if !entry.IsFile() || entry.Size == 0 {
return nil
}
if entry.StartChunk < 0 {
return fmt.Errorf("The file %s starts at chunk %d", entry.Path, entry.StartChunk)
}
if entry.EndChunk >= len(chunkLengths) {
return fmt.Errorf("The file %s ends at chunk %d while the number of chunks is %d",
entry.Path, entry.EndChunk, len(chunkLengths))
}
if entry.EndChunk < entry.StartChunk {
return fmt.Errorf("The file %s starts at chunk %d and ends at chunk %d",
entry.Path, entry.StartChunk, entry.EndChunk)
}
if entry.StartOffset >= chunkLengths[entry.StartChunk] {
return fmt.Errorf("The file %s starts at offset %d of chunk %d of length %d",
entry.Path, entry.StartOffset, entry.StartChunk, chunkLengths[entry.StartChunk])
}
if entry.EndOffset > chunkLengths[entry.EndChunk] {
return fmt.Errorf("The file %s ends at offset %d of chunk %d of length %d",
entry.Path, entry.EndOffset, entry.EndChunk, chunkLengths[entry.EndChunk])
}
fileSize := int64(0)
for i := entry.StartChunk; i <= entry.EndChunk; i++ {
start := 0
if i == entry.StartChunk {
start = entry.StartOffset
}
end := chunkLengths[i]
if i == entry.EndChunk {
end = entry.EndOffset
}
fileSize += int64(end - start)
}
if entry.Size != fileSize {
return fmt.Errorf("The file %s has a size of %d but the total size of chunks is %d",
entry.Path, entry.Size, fileSize)
}
return nil
}

View File

@@ -13,8 +13,11 @@ import (
"sort"
"strings"
"testing"
"bytes"
"encoding/json"
"github.com/gilbertchen/xattr"
"github.com/vmihailenco/msgpack"
)
func TestEntrySort(t *testing.T) {
@@ -27,19 +30,19 @@ func TestEntrySort(t *testing.T) {
"\xBB\xDDfile",
"\xFF\xDDfile",
"ab/",
"ab-/",
"ab0/",
"ab1/",
"ab/c",
"ab+/c-",
"ab+/c0",
"ab+/c/",
"ab+/c/d",
"ab+/c+/",
"ab+/c+/d",
"ab+/c0/",
"ab+/c/d",
"ab+/c+/d",
"ab+/c0/d",
"ab-/",
"ab-/c",
"ab0/",
"ab1/",
"ab1/c",
"ab1/\xBB\xDDfile",
"ab1/\xFF\xDDfile",
@@ -86,7 +89,7 @@ func TestEntrySort(t *testing.T) {
}
}
func TestEntryList(t *testing.T) {
func TestEntryOrder(t *testing.T) {
testDir := filepath.Join(os.TempDir(), "duplicacy_test")
os.RemoveAll(testDir)
@@ -98,16 +101,16 @@ func TestEntryList(t *testing.T) {
"ab0",
"ab1",
"ab+/",
"ab2/",
"ab3/",
"ab+/c",
"ab+/c+",
"ab+/c1",
"ab+/c-/",
"ab+/c-/d",
"ab+/c0/",
"ab+/c-/d",
"ab+/c0/d",
"ab2/",
"ab2/c",
"ab3/",
"ab3/c",
}
@@ -172,18 +175,24 @@ func TestEntryList(t *testing.T) {
directories = append(directories, CreateEntry("", 0, 0, 0))
entries := make([]*Entry, 0, 4)
entryChannel := make(chan *Entry, 1024)
entries = append(entries, CreateEntry("", 0, 0, 0))
for len(directories) > 0 {
directory := directories[len(directories)-1]
directories = directories[:len(directories)-1]
entries = append(entries, directory)
subdirectories, _, err := ListEntries(testDir, directory.Path, &entries, nil, "", false, false)
subdirectories, _, err := ListEntries(testDir, directory.Path, nil, "", false, entryChannel)
if err != nil {
t.Errorf("ListEntries(%s, %s) returned an error: %s", testDir, directory.Path, err)
}
directories = append(directories, subdirectories...)
}
close(entryChannel)
for entry := range entryChannel {
entries = append(entries, entry)
}
entries = entries[1:]
for _, entry := range entries {
@@ -274,18 +283,25 @@ func TestEntryExcludeByAttribute(t *testing.T) {
directories = append(directories, CreateEntry("", 0, 0, 0))
entries := make([]*Entry, 0, 4)
entryChannel := make(chan *Entry, 1024)
entries = append(entries, CreateEntry("", 0, 0, 0))
for len(directories) > 0 {
directory := directories[len(directories)-1]
directories = directories[:len(directories)-1]
entries = append(entries, directory)
subdirectories, _, err := ListEntries(testDir, directory.Path, &entries, nil, "", false, excludeByAttribute)
subdirectories, _, err := ListEntries(testDir, directory.Path, nil, "", excludeByAttribute, entryChannel)
if err != nil {
t.Errorf("ListEntries(%s, %s) returned an error: %s", testDir, directory.Path, err)
}
directories = append(directories, subdirectories...)
}
close(entryChannel)
for entry := range entryChannel {
entries = append(entries, entry)
}
entries = entries[1:]
for _, entry := range entries {
@@ -327,3 +343,33 @@ func TestEntryExcludeByAttribute(t *testing.T) {
}
}
func TestEntryEncoding(t *testing.T) {
buffer := new(bytes.Buffer)
encoder := msgpack.NewEncoder(buffer)
entry1 := CreateEntry("abcd", 1, 2, 0700)
err := encoder.Encode(entry1)
if err != nil {
t.Errorf("Failed to encode the entry: %v", err)
return
}
t.Logf("msgpack size: %d\n", len(buffer.Bytes()))
decoder := msgpack.NewDecoder(buffer)
description, _ := json.Marshal(entry1)
t.Logf("json size: %d\n", len(description))
var entry2 Entry
err = decoder.Decode(&entry2)
if err != nil {
t.Errorf("Failed to decode the entry: %v", err)
return
}
if entry1.Path != entry2.Path || entry1.Size != entry2.Size || entry1.Time != entry2.Time {
t.Error("Decoded entry is different than the original one")
}
}

574
src/duplicacy_entrylist.go Normal file
View File

@@ -0,0 +1,574 @@
// Copyright (c) Acrosync LLC. All rights reserved.
// Free for personal use and commercial trial
// Commercial use requires per-user licenses available from https://duplicacy.com
package duplicacy
import (
"encoding/hex"
"encoding/binary"
"fmt"
"os"
"io"
"path"
"crypto/sha256"
"crypto/rand"
"sync"
"github.com/vmihailenco/msgpack"
)
// This struct stores information about a file entry that has been modified
type ModifiedEntry struct {
Path string
Size int64
Hash string
}
// EntryList is basically a list of entries, which can be kept in the memory, or serialized to a disk file,
// depending on if maximumInMemoryEntries is reached.
//
// The idea behind the on-disk entry list is that entries are written to a disk file as they are coming in.
// Entries that have been modified and thus need to be uploaded will have their Incomplete bit set (i.e.,
// with a size of -1). When the limit is reached, entries are moved to a disk file but ModifiedEntries and
// UploadedChunks are still kept in memory. When later entries are read from the entry list, incomplete
// entries are back-annotated with info from ModifiedEntries and UploadedChunk* before sending them out.
type EntryList struct {
onDiskFile *os.File // the file to store entries
encoder *msgpack.Encoder // msgpack encoder for entry serialization
entries []*Entry // in-memory entry list
SnapshotID string // the snapshot id
Token string // this unique random token makes sure we read/write
// the same entry list
ModifiedEntries []ModifiedEntry // entries that will be uploaded
UploadedChunkHashes []string // chunks from entries that have been uploaded
UploadedChunkLengths []int // chunk lengths from entries that have been uploaded
uploadedChunkLock sync.Mutex // lock for UploadedChunkHashes and UploadedChunkLengths
PreservedChunkHashes []string // chunks from entries not changed
PreservedChunkLengths []int // chunk lengths from entries not changed
Checksum string // checksum of all entries to detect disk corruption
maximumInMemoryEntries int // max in-memory entries
NumberOfEntries int64 // number of entries (not including directories and links)
cachePath string // the directory for the on-disk file
// These 3 variables are used in entry infomation back-annotation
modifiedEntryIndex int // points to the current modified entry
uploadedChunkIndex int // counter for upload chunks
uploadedChunkOffset int // the start offset for the current modified entry
}
// Create a new entry list
func CreateEntryList(snapshotID string, cachePath string, maximumInMemoryEntries int) (*EntryList, error) {
token := make([]byte, 16)
_, err := rand.Read(token)
if err != nil {
return nil, fmt.Errorf("Failed to create a random token: %v", err)
}
entryList := &EntryList {
SnapshotID: snapshotID,
maximumInMemoryEntries: maximumInMemoryEntries,
cachePath: cachePath,
Token: string(token),
}
return entryList, nil
}
// Create the on-disk entry list file
func (entryList *EntryList)createOnDiskFile() error {
file, err := os.OpenFile(path.Join(entryList.cachePath, "incomplete_files"), os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0600)
if err != nil {
return fmt.Errorf("Failed to create on disk entry list: %v", err)
}
entryList.onDiskFile = file
entryList.encoder = msgpack.NewEncoder(file)
err = entryList.encoder.EncodeString(entryList.Token)
if err != nil {
return fmt.Errorf("Failed to create on disk entry list: %v", err)
}
for _, entry := range entryList.entries {
err = entry.EncodeWithHash(entryList.encoder)
if err != nil {
return err
}
}
return nil
}
// Add an entry to the entry list
func (entryList *EntryList)AddEntry(entry *Entry) error {
if !entry.IsDir() && !entry.IsLink() {
entryList.NumberOfEntries++
}
if !entry.IsComplete() {
if entry.IsDir() || entry.IsLink() {
entry.Size = 0
} else {
modifiedEntry := ModifiedEntry {
Path: entry.Path,
Size: -1,
}
entryList.ModifiedEntries = append(entryList.ModifiedEntries, modifiedEntry)
}
}
if entryList.onDiskFile != nil {
return entry.EncodeWithHash(entryList.encoder)
} else {
entryList.entries = append(entryList.entries, entry)
if entryList.maximumInMemoryEntries >= 0 && len(entryList.entries) > entryList.maximumInMemoryEntries {
err := entryList.createOnDiskFile()
if err != nil {
return err
}
}
}
return nil
}
// Add a preserved chunk that belongs to files that have not been modified
func (entryList *EntryList)AddPreservedChunk(chunkHash string, chunkSize int) {
entryList.PreservedChunkHashes = append(entryList.PreservedChunkHashes, chunkHash)
entryList.PreservedChunkLengths = append(entryList.PreservedChunkLengths, chunkSize)
}
// Add a chunk just uploaded (that belongs to files that have been modified)
func (entryList *EntryList)AddUploadedChunk(chunkIndex int, chunkHash string, chunkSize int) {
entryList.uploadedChunkLock.Lock()
for len(entryList.UploadedChunkHashes) <= chunkIndex {
entryList.UploadedChunkHashes = append(entryList.UploadedChunkHashes, "")
}
for len(entryList.UploadedChunkLengths) <= chunkIndex {
entryList.UploadedChunkLengths = append(entryList.UploadedChunkLengths, 0)
}
entryList.UploadedChunkHashes[chunkIndex] = chunkHash
entryList.UploadedChunkLengths[chunkIndex] = chunkSize
entryList.uploadedChunkLock.Unlock()
}
// Close the on-disk file
func (entryList *EntryList) CloseOnDiskFile() error {
if entryList.onDiskFile == nil {
return nil
}
err := entryList.onDiskFile.Sync()
if err != nil {
return err
}
err = entryList.onDiskFile.Close()
if err != nil {
return err
}
entryList.onDiskFile = nil
return nil
}
// Return the length of the `index`th chunk
func (entryList *EntryList) getChunkLength(index int) int {
if index < len(entryList.PreservedChunkLengths) {
return entryList.PreservedChunkLengths[index]
} else {
return entryList.UploadedChunkLengths[index - len(entryList.PreservedChunkLengths)]
}
}
// Sanity check for each entry
func (entryList *EntryList) checkEntry(entry *Entry) error {
if entry.Size < 0 {
return fmt.Errorf("the file %s hash an invalid size (%d)", entry.Path, entry.Size)
}
if !entry.IsFile() || entry.Size == 0 {
return nil
}
numberOfChunks := len(entryList.PreservedChunkLengths) + len(entryList.UploadedChunkLengths)
if entry.StartChunk < 0 {
return fmt.Errorf("the file %s starts at chunk %d", entry.Path, entry.StartChunk)
}
if entry.EndChunk >= numberOfChunks {
return fmt.Errorf("the file %s ends at chunk %d while the number of chunks is %d",
entry.Path, entry.EndChunk, numberOfChunks)
}
if entry.EndChunk < entry.StartChunk {
return fmt.Errorf("the file %s starts at chunk %d and ends at chunk %d",
entry.Path, entry.StartChunk, entry.EndChunk)
}
if entry.StartOffset >= entryList.getChunkLength(entry.StartChunk) {
return fmt.Errorf("the file %s starts at offset %d of chunk %d with a length of %d",
entry.Path, entry.StartOffset, entry.StartChunk, entryList.getChunkLength(entry.StartChunk))
}
if entry.EndOffset > entryList.getChunkLength(entry.EndChunk) {
return fmt.Errorf("the file %s ends at offset %d of chunk %d with a length of %d",
entry.Path, entry.EndOffset, entry.EndChunk, entryList.getChunkLength(entry.EndChunk))
}
fileSize := int64(0)
for i := entry.StartChunk; i <= entry.EndChunk; i++ {
start := 0
if i == entry.StartChunk {
start = entry.StartOffset
}
end := entryList.getChunkLength(i)
if i == entry.EndChunk {
end = entry.EndOffset
}
fileSize += int64(end - start)
}
if entry.Size != fileSize {
return fmt.Errorf("the file %s has a size of %d but the total size of chunks is %d",
entry.Path, entry.Size, fileSize)
}
return nil
}
// An incomplete entry (with a size of -1) does not have 'startChunk', 'startOffset', 'endChunk', and 'endOffset'. This function
// is to fill in these information before sending the entry out.
func (entryList *EntryList) fillAndSendEntry(entry *Entry, entryOut func(*Entry)error) (skipped bool, err error) {
if entry.IsComplete() {
err := entryList.checkEntry(entry)
if err != nil {
return false, err
}
return false, entryOut(entry)
}
if entryList.modifiedEntryIndex >= len(entryList.ModifiedEntries) {
return false, fmt.Errorf("Unexpected file index %d (%d modified files)", entryList.modifiedEntryIndex, len(entryList.ModifiedEntries))
}
modifiedEntry := &entryList.ModifiedEntries[entryList.modifiedEntryIndex]
entryList.modifiedEntryIndex++
if modifiedEntry.Path != entry.Path {
return false, fmt.Errorf("Unexpected file path %s when expecting %s", modifiedEntry.Path, entry.Path)
}
if modifiedEntry.Size <= 0 {
return true, nil
}
entry.Size = modifiedEntry.Size
entry.Hash = modifiedEntry.Hash
entry.StartChunk = entryList.uploadedChunkIndex + len(entryList.PreservedChunkHashes)
entry.StartOffset = entryList.uploadedChunkOffset
entry.EndChunk = entry.StartChunk
endOffset := int64(entry.StartOffset) + entry.Size
for entryList.uploadedChunkIndex < len(entryList.UploadedChunkLengths) && endOffset > int64(entryList.UploadedChunkLengths[entryList.uploadedChunkIndex]) {
endOffset -= int64(entryList.UploadedChunkLengths[entryList.uploadedChunkIndex])
entry.EndChunk++
entryList.uploadedChunkIndex++
}
if entryList.uploadedChunkIndex >= len(entryList.UploadedChunkLengths) {
return false, fmt.Errorf("File %s has not been completely uploaded", entry.Path)
}
entry.EndOffset = int(endOffset)
entryList.uploadedChunkOffset = entry.EndOffset
if entry.EndOffset == entryList.UploadedChunkLengths[entryList.uploadedChunkIndex] {
entryList.uploadedChunkIndex++
entryList.uploadedChunkOffset = 0
}
err = entryList.checkEntry(entry)
if err != nil {
return false, err
}
return false, entryOut(entry)
}
// Iterate through the entries in this entry list
func (entryList *EntryList) ReadEntries(entryOut func(*Entry)error) (error) {
entryList.modifiedEntryIndex = 0
entryList.uploadedChunkIndex = 0
entryList.uploadedChunkOffset = 0
if entryList.onDiskFile == nil {
for _, entry := range entryList.entries {
skipped, err := entryList.fillAndSendEntry(entry.Copy(), entryOut)
if err != nil {
return err
}
if skipped {
continue
}
}
} else {
_, err := entryList.onDiskFile.Seek(0, os.SEEK_SET)
if err != nil {
return err
}
decoder := msgpack.NewDecoder(entryList.onDiskFile)
_, err = decoder.DecodeString()
if err != nil {
return err
}
for _, err = decoder.PeekCode(); err == nil; _, err = decoder.PeekCode() {
entry, err := DecodeEntryWithHash(decoder)
if err != nil {
return err
}
skipped, err := entryList.fillAndSendEntry(entry, entryOut)
if err != nil {
return err
}
if skipped {
continue
}
}
if err != io.EOF {
return err
}
}
return nil
}
// When saving an incomplete snapshot, the on-disk entry list ('incomplete_files') is renamed to
// 'incomplete_snapshot', and this EntryList struct is saved as 'incomplete_chunks'.
func (entryList *EntryList) SaveIncompleteSnapshot() {
entryList.uploadedChunkLock.Lock()
defer entryList.uploadedChunkLock.Unlock()
if entryList.onDiskFile == nil {
err := entryList.createOnDiskFile()
if err != nil {
LOG_WARN("INCOMPLETE_SAVE", "Failed to create the incomplete snapshot file: %v", err)
return
}
for _, entry := range entryList.entries {
err = entry.EncodeWithHash(entryList.encoder)
if err != nil {
LOG_WARN("INCOMPLETE_SAVE", "Failed to save the entry %s: %v", entry.Path, err)
return
}
}
}
err := entryList.onDiskFile.Close()
if err != nil {
LOG_WARN("INCOMPLETE_SAVE", "Failed to close the on-disk file: %v", err)
return
}
filePath := path.Join(entryList.cachePath, "incomplete_snapshot")
if _, err := os.Stat(filePath); err == nil {
err = os.Remove(filePath)
if err != nil {
LOG_WARN("INCOMPLETE_REMOVE", "Failed to remove previous incomplete snapshot: %v", err)
}
}
err = os.Rename(path.Join(entryList.cachePath, "incomplete_files"), filePath)
if err != nil {
LOG_WARN("INCOMPLETE_SAVE", "Failed to rename the incomplete snapshot file: %v", err)
return
}
chunkFile := path.Join(entryList.cachePath, "incomplete_chunks")
file, err := os.OpenFile(chunkFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0600)
if err != nil {
LOG_WARN("INCOMPLETE_SAVE", "Failed to create the incomplete chunk file: %v", err)
return
}
defer file.Close()
encoder := msgpack.NewEncoder(file)
entryList.Checksum = entryList.CalculateChecksum()
err = encoder.Encode(entryList)
if err != nil {
LOG_WARN("INCOMPLETE_SAVE", "Failed to save the incomplete snapshot: %v", err)
return
}
LOG_INFO("INCOMPLETE_SAVE", "Incomplete snapshot saved to %s", filePath)
}
// Calculate a checksum for this entry list
func (entryList *EntryList) CalculateChecksum() string{
hasher := sha256.New()
for _, s := range entryList.UploadedChunkHashes {
hasher.Write([]byte(s))
}
buffer := make([]byte, 8)
for _, i := range entryList.UploadedChunkLengths {
binary.LittleEndian.PutUint64(buffer, uint64(i))
hasher.Write(buffer)
}
for _, s := range entryList.PreservedChunkHashes {
hasher.Write([]byte(s))
}
for _, i := range entryList.PreservedChunkLengths {
binary.LittleEndian.PutUint64(buffer, uint64(i))
hasher.Write(buffer)
}
for _, entry := range entryList.ModifiedEntries {
binary.LittleEndian.PutUint64(buffer, uint64(entry.Size))
hasher.Write(buffer)
hasher.Write([]byte(entry.Hash))
}
return hex.EncodeToString(hasher.Sum(nil))
}
// Check if all chunks exist in 'chunkCache'
func (entryList *EntryList) CheckChunks(config *Config, chunkCache map[string]bool) bool {
for _, chunkHash := range entryList.UploadedChunkHashes {
chunkID := config.GetChunkIDFromHash(chunkHash)
if _, ok := chunkCache[chunkID]; !ok {
return false
}
}
for _, chunkHash := range entryList.PreservedChunkHashes {
chunkID := config.GetChunkIDFromHash(chunkHash)
if _, ok := chunkCache[chunkID]; !ok {
return false
}
}
return true
}
// Recover the on disk file from 'incomplete_snapshot', and restore the EntryList struct
// from 'incomplete_chunks'
func loadIncompleteSnapshot(snapshotID string, cachePath string) *EntryList {
onDiskFilePath := path.Join(cachePath, "incomplete_snapshot")
entryListFilePath := path.Join(cachePath, "incomplete_chunks")
if _, err := os.Stat(onDiskFilePath); os.IsNotExist(err) {
return nil
}
if _, err := os.Stat(entryListFilePath); os.IsNotExist(err) {
return nil
}
entryList := &EntryList {}
entryListFile, err := os.OpenFile(entryListFilePath, os.O_RDONLY, 0600)
if err != nil {
LOG_WARN("INCOMPLETE_LOAD", "Failed to open the incomplete snapshot: %v", err)
return nil
}
defer entryListFile.Close()
decoder := msgpack.NewDecoder(entryListFile)
err = decoder.Decode(&entryList)
if err != nil {
LOG_WARN("INCOMPLETE_LOAD", "Failed to load the incomplete snapshot: %v", err)
return nil
}
checksum := entryList.CalculateChecksum()
if checksum != entryList.Checksum {
LOG_WARN("INCOMPLETE_LOAD", "Failed to load the incomplete snapshot: checksum mismatched")
return nil
}
onDiskFile, err := os.OpenFile(onDiskFilePath, os.O_RDONLY, 0600)
if err != nil {
LOG_WARN("INCOMPLETE_LOAD", "Failed to open the on disk file for the incomplete snapshot: %v", err)
return nil
}
decoder = msgpack.NewDecoder(onDiskFile)
token, err := decoder.DecodeString()
if err != nil {
LOG_WARN("INCOMPLETE_LOAD", "Failed to read the token for the incomplete snapshot: %v", err)
onDiskFile.Close()
return nil
}
if token != entryList.Token {
LOG_WARN("INCOMPLETE_LOAD", "Mismatched tokens in the incomplete snapshot")
onDiskFile.Close()
return nil
}
entryList.onDiskFile = onDiskFile
for i, hash := range entryList.UploadedChunkHashes {
if len(hash) == 0 {
// An empty hash means the chunk has not been uploaded in previous run
entryList.UploadedChunkHashes = entryList.UploadedChunkHashes[0:i]
entryList.UploadedChunkLengths = entryList.UploadedChunkLengths[0:i]
break
}
}
LOG_INFO("INCOMPLETE_LOAD", "Previous incomlete backup contains %d files and %d chunks",
entryList.NumberOfEntries, len(entryList.PreservedChunkLengths) + len(entryList.UploadedChunkHashes))
return entryList
}
// Delete the two incomplete files.
func deleteIncompleteSnapshot(cachePath string) {
for _, file := range []string{"incomplete_snapshot", "incomplete_chunks"} {
filePath := path.Join(cachePath, file)
if _, err := os.Stat(filePath); err == nil {
err = os.Remove(filePath)
if err != nil {
LOG_WARN("INCOMPLETE_REMOVE", "Failed to remove the incomplete snapshot: %v", err)
return
}
}
}
}

View File

@@ -0,0 +1,179 @@
// Copyright (c) Acrosync LLC. All rights reserved.
// Free for personal use and commercial trial
// Commercial use requires per-user licenses available from https://duplicacy.com
package duplicacy
import (
"os"
"path"
"time"
"testing"
"math/rand"
)
func generateRandomString(length int) string {
var letters = []rune("abcdefghijklmnopqrstuvwxyz")
b := make([]rune, length)
for i := range b {
b[i] = letters[rand.Intn(len(letters))]
}
return string(b)
}
var fileSizeGenerator = rand.NewZipf(rand.New(rand.NewSource(time.Now().UnixNano())), 1.2, 1.0, 1024)
func generateRandomFileSize() int64 {
return int64(fileSizeGenerator.Uint64() + 1)
}
func generateRandomChunks(totalFileSize int64) (chunks []string, lengths []int) {
totalChunkSize := int64(0)
for totalChunkSize < totalFileSize {
chunks = append(chunks, generateRandomString(64))
chunkSize := int64(1 + (rand.Int() % 64))
if chunkSize + totalChunkSize > totalFileSize {
chunkSize = totalFileSize - totalChunkSize
}
lengths = append(lengths, int(chunkSize))
totalChunkSize += chunkSize
}
return chunks, lengths
}
func getPreservedChunks(entries []*Entry, chunks []string, lengths []int) (preservedChunks []string, preservedChunkLengths []int) {
lastPreservedChunk := -1
for i := range entries {
if entries[i].Size < 0 {
continue
}
delta := entries[i].StartChunk - len(chunks)
if lastPreservedChunk != entries[i].StartChunk {
lastPreservedChunk = entries[i].StartChunk
preservedChunks = append(preservedChunks, chunks[entries[i].StartChunk])
preservedChunkLengths = append(preservedChunkLengths, lengths[entries[i].StartChunk])
delta++
}
for j := entries[i].StartChunk + 1; i <= entries[i].EndChunk; i++ {
preservedChunks = append(preservedChunks, chunks[j])
preservedChunkLengths = append(preservedChunkLengths, lengths[j])
lastPreservedChunk = j
}
}
return
}
func testEntryList(t *testing.T, numberOfEntries int, maximumInMemoryEntries int) {
entries := make([]*Entry, 0, numberOfEntries)
entrySizes := make([]int64, 0)
for i := 0; i < numberOfEntries; i++ {
entry:= CreateEntry(generateRandomString(16), -1, 0, 0700)
entries = append(entries, entry)
entrySizes = append(entrySizes, generateRandomFileSize())
}
totalFileSize := int64(0)
for _, size := range entrySizes {
totalFileSize += size
}
testDir := path.Join(os.TempDir(), "duplicacy_test")
os.RemoveAll(testDir)
os.MkdirAll(testDir, 0700)
os.MkdirAll(testDir + "/list1", 0700)
os.MkdirAll(testDir + "/list2", 0700)
os.MkdirAll(testDir + "/list3", 0700)
os.MkdirAll(testDir + "/list1", 0700)
// For the first entry list, all entries are new
entryList, _ := CreateEntryList("test", testDir + "/list1", maximumInMemoryEntries)
for _, entry := range entries {
entryList.AddEntry(entry)
}
uploadedChunks, uploadedChunksLengths := generateRandomChunks(totalFileSize)
for i, chunk := range uploadedChunks {
entryList.AddUploadedChunk(i, chunk, uploadedChunksLengths[i])
}
for i := range entryList.ModifiedEntries {
entryList.ModifiedEntries[i].Size = entrySizes[i]
}
totalEntries := 0
err := entryList.ReadEntries(func(entry *Entry) error {
totalEntries++
return nil
})
if err != nil {
t.Errorf("ReadEntries returned an error: %s", err)
return
}
if totalEntries != numberOfEntries {
t.Errorf("EntryList contains %d entries instead of %d", totalEntries, numberOfEntries)
return
}
// For the second entry list, half of the entries are new
for i := range entries {
if rand.Int() % 1 == 0 {
entries[i].Size = -1
} else {
entries[i].Size = entrySizes[i]
}
}
preservedChunks, preservedChunkLengths := getPreservedChunks(entries, uploadedChunks, uploadedChunksLengths)
entryList, _ = CreateEntryList("test", testDir + "/list2", maximumInMemoryEntries)
for _, entry := range entries {
entryList.AddEntry(entry)
}
for i, chunk := range preservedChunks {
entryList.AddPreservedChunk(chunk, preservedChunkLengths[i])
}
totalFileSize = 0
for i := range entryList.ModifiedEntries {
fileSize := generateRandomFileSize()
entryList.ModifiedEntries[i].Size = fileSize
totalFileSize += fileSize
}
uploadedChunks, uploadedChunksLengths = generateRandomChunks(totalFileSize)
for i, chunk := range uploadedChunks {
entryList.AddUploadedChunk(i, chunk, uploadedChunksLengths[i])
}
totalEntries = 0
err = entryList.ReadEntries(func(entry *Entry) error {
totalEntries++
return nil
})
if err != nil {
t.Errorf("ReadEntries returned an error: %s", err)
return
}
if totalEntries != numberOfEntries {
t.Errorf("EntryList contains %d entries instead of %d", totalEntries, numberOfEntries)
return
}
}
func TestEntryList(t *testing.T) {
testEntryList(t, 1024, 1024)
testEntryList(t, 1024, 512)
testEntryList(t, 1024, 0)
}

View File

@@ -8,17 +8,22 @@ import (
"encoding/hex"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"os"
"path"
"path/filepath"
"strconv"
"strings"
"time"
"sort"
"bytes"
"github.com/vmihailenco/msgpack"
)
// Snapshot represents a backup of the repository.
type Snapshot struct {
Version int
ID string // the snapshot id; must be different for different repositories
Revision int // the revision number
Options string // options used to create this snapshot (some not included)
@@ -37,14 +42,11 @@ type Snapshot struct {
// A sequence of chunks whose aggregated content is the json representation of 'ChunkLengths'.
LengthSequence []string
Files []*Entry // list of files and subdirectories
ChunkHashes []string // a sequence of chunks representing the file content
ChunkLengths []int // the length of each chunk
Flag bool // used to mark certain snapshots for deletion or copy
discardAttributes bool
}
// CreateEmptySnapshot creates an empty snapshot.
@@ -56,16 +58,14 @@ func CreateEmptySnapshot(id string) (snapshto *Snapshot) {
}
}
// CreateSnapshotFromDirectory creates a snapshot from the local directory 'top'. Only 'Files'
// will be constructed, while 'ChunkHashes' and 'ChunkLengths' can only be populated after uploading.
func CreateSnapshotFromDirectory(id string, top string, nobackupFile string, filtersFile string, excludeByAttribute bool) (snapshot *Snapshot, skippedDirectories []string,
skippedFiles []string, err error) {
type DirectoryListing struct {
directory string
files *[]Entry
}
snapshot = &Snapshot{
ID: id,
Revision: 0,
StartTime: time.Now().Unix(),
}
func (snapshot *Snapshot) ListLocalFiles(top string, nobackupFile string,
filtersFile string, excludeByAttribute bool, listingChannel chan *Entry,
skippedDirectories *[]string, skippedFiles *[]string) {
var patterns []string
@@ -77,45 +77,128 @@ func CreateSnapshotFromDirectory(id string, top string, nobackupFile string, fil
directories := make([]*Entry, 0, 256)
directories = append(directories, CreateEntry("", 0, 0, 0))
snapshot.Files = make([]*Entry, 0, 256)
attributeThreshold := 1024 * 1024
if attributeThresholdValue, found := os.LookupEnv("DUPLICACY_ATTRIBUTE_THRESHOLD"); found && attributeThresholdValue != "" {
attributeThreshold, _ = strconv.Atoi(attributeThresholdValue)
}
for len(directories) > 0 {
directory := directories[len(directories)-1]
directories = directories[:len(directories)-1]
snapshot.Files = append(snapshot.Files, directory)
subdirectories, skipped, err := ListEntries(top, directory.Path, &snapshot.Files, patterns, nobackupFile, snapshot.discardAttributes, excludeByAttribute)
subdirectories, skipped, err := ListEntries(top, directory.Path, patterns, nobackupFile, excludeByAttribute, listingChannel)
if err != nil {
if directory.Path == "" {
LOG_ERROR("LIST_FAILURE", "Failed to list the repository root: %v", err)
return nil, nil, nil, err
return
}
LOG_WARN("LIST_FAILURE", "Failed to list subdirectory %s: %v", directory.Path, err)
skippedDirectories = append(skippedDirectories, directory.Path)
if skippedDirectories != nil {
*skippedDirectories = append(*skippedDirectories, directory.Path)
}
continue
}
directories = append(directories, subdirectories...)
skippedFiles = append(skippedFiles, skipped...)
if !snapshot.discardAttributes && len(snapshot.Files) > attributeThreshold {
LOG_INFO("LIST_ATTRIBUTES", "Discarding file attributes")
snapshot.discardAttributes = true
for _, file := range snapshot.Files {
file.Attributes = nil
}
if skippedFiles != nil {
*skippedFiles = append(*skippedFiles, skipped...)
}
}
close(listingChannel)
}
func (snapshot *Snapshot)ListRemoteFiles(config *Config, chunkOperator *ChunkOperator, entryOut func(*Entry) bool) {
var chunks []string
for _, chunkHash := range snapshot.FileSequence {
chunks = append(chunks, chunkOperator.config.GetChunkIDFromHash(chunkHash))
}
// Remove the root entry
snapshot.Files = snapshot.Files[1:]
var chunk *Chunk
reader := sequenceReader{
sequence: snapshot.FileSequence,
buffer: new(bytes.Buffer),
refillFunc: func(chunkHash string) []byte {
if chunk != nil {
config.PutChunk(chunk)
}
chunk = chunkOperator.Download(chunkHash, 0, true)
return chunk.GetBytes()
},
}
if snapshot.Version == 0 {
LOG_INFO("SNAPSHOT_VERSION", "snapshot %s at revision %d is encoded in an old version format", snapshot.ID, snapshot.Revision)
files := make([]*Entry, 0)
decoder := json.NewDecoder(&reader)
// read open bracket
_, err := decoder.Token()
if err != nil {
LOG_ERROR("SNAPSHOT_PARSE", "Failed to open the snapshot %s at revision %d: not a list of entries",
snapshot.ID, snapshot.Revision)
return
}
for decoder.More() {
var entry Entry
err = decoder.Decode(&entry)
if err != nil {
LOG_ERROR("SNAPSHOT_PARSE", "Failed to load files specified in the snapshot %s at revision %d: %v",
snapshot.ID, snapshot.Revision, err)
return
}
files = append(files, &entry)
}
sort.Sort(ByName(files))
for _, file := range files {
if !entryOut(file) {
return
}
}
} else if snapshot.Version == 1 {
decoder := msgpack.NewDecoder(&reader)
lastEndChunk := 0
// while the array contains values
for _, err := decoder.PeekCode(); err != io.EOF; _, err = decoder.PeekCode() {
if err != nil {
LOG_ERROR("SNAPSHOT_PARSE", "Failed to parse the snapshot %s at revision %d: %v",
snapshot.ID, snapshot.Revision, err)
return
}
var entry Entry
err = decoder.Decode(&entry)
if err != nil {
LOG_ERROR("SNAPSHOT_PARSE", "Failed to load the snapshot %s at revision %d: %v",
snapshot.ID, snapshot.Revision, err)
return
}
if entry.IsFile() {
entry.StartChunk += lastEndChunk
entry.EndChunk += entry.StartChunk
lastEndChunk = entry.EndChunk
}
err = entry.check(snapshot.ChunkLengths)
if err != nil {
LOG_ERROR("SNAPSHOT_ENTRY", "Failed to load the snapshot %s at revision %d: %v",
snapshot.ID, snapshot.Revision, err)
return
}
if !entryOut(&entry) {
return
}
}
} else {
LOG_ERROR("SNAPSHOT_VERSION", "snapshot %s at revision %d is encoded in unsupported version %d format",
snapshot.ID, snapshot.Revision, snapshot.Version)
return
}
return snapshot, skippedDirectories, skippedFiles, nil
}
func AppendPattern(patterns []string, new_pattern string) (new_patterns []string) {
@@ -215,100 +298,6 @@ func ProcessFilterLines(patternFileLines []string, includedFiles []string) (patt
return patterns
}
// This is the struct used to save/load incomplete snapshots
type IncompleteSnapshot struct {
Files []*Entry
ChunkHashes []string
ChunkLengths []int
}
// LoadIncompleteSnapshot loads the incomplete snapshot if it exists
func LoadIncompleteSnapshot() (snapshot *Snapshot) {
snapshotFile := path.Join(GetDuplicacyPreferencePath(), "incomplete")
description, err := ioutil.ReadFile(snapshotFile)
if err != nil {
LOG_DEBUG("INCOMPLETE_LOCATE", "Failed to locate incomplete snapshot: %v", err)
return nil
}
var incompleteSnapshot IncompleteSnapshot
err = json.Unmarshal(description, &incompleteSnapshot)
if err != nil {
LOG_DEBUG("INCOMPLETE_PARSE", "Failed to parse incomplete snapshot: %v", err)
return nil
}
var chunkHashes []string
for _, chunkHash := range incompleteSnapshot.ChunkHashes {
hash, err := hex.DecodeString(chunkHash)
if err != nil {
LOG_DEBUG("INCOMPLETE_DECODE", "Failed to decode incomplete snapshot: %v", err)
return nil
}
chunkHashes = append(chunkHashes, string(hash))
}
snapshot = &Snapshot{
Files: incompleteSnapshot.Files,
ChunkHashes: chunkHashes,
ChunkLengths: incompleteSnapshot.ChunkLengths,
}
LOG_INFO("INCOMPLETE_LOAD", "Incomplete snapshot loaded from %s", snapshotFile)
return snapshot
}
// SaveIncompleteSnapshot saves the incomplete snapshot under the preference directory
func SaveIncompleteSnapshot(snapshot *Snapshot) {
var files []*Entry
for _, file := range snapshot.Files {
// All unprocessed files will have a size of -1
if file.Size >= 0 {
file.Attributes = nil
files = append(files, file)
} else {
break
}
}
var chunkHashes []string
for _, chunkHash := range snapshot.ChunkHashes {
chunkHashes = append(chunkHashes, hex.EncodeToString([]byte(chunkHash)))
}
incompleteSnapshot := IncompleteSnapshot{
Files: files,
ChunkHashes: chunkHashes,
ChunkLengths: snapshot.ChunkLengths,
}
description, err := json.MarshalIndent(incompleteSnapshot, "", " ")
if err != nil {
LOG_WARN("INCOMPLETE_ENCODE", "Failed to encode the incomplete snapshot: %v", err)
return
}
snapshotFile := path.Join(GetDuplicacyPreferencePath(), "incomplete")
err = ioutil.WriteFile(snapshotFile, description, 0644)
if err != nil {
LOG_WARN("INCOMPLETE_WRITE", "Failed to save the incomplete snapshot: %v", err)
return
}
LOG_INFO("INCOMPLETE_SAVE", "Incomplete snapshot saved to %s", snapshotFile)
}
func RemoveIncompleteSnapshot() {
snapshotFile := path.Join(GetDuplicacyPreferencePath(), "incomplete")
if stat, err := os.Stat(snapshotFile); err == nil && !stat.IsDir() {
err = os.Remove(snapshotFile)
if err != nil {
LOG_INFO("INCOMPLETE_SAVE", "Failed to remove ncomplete snapshot: %v", err)
} else {
LOG_INFO("INCOMPLETE_SAVE", "Removed incomplete snapshot %s", snapshotFile)
}
}
}
// CreateSnapshotFromDescription creates a snapshot from json decription.
func CreateSnapshotFromDescription(description []byte) (snapshot *Snapshot, err error) {
@@ -321,6 +310,14 @@ func CreateSnapshotFromDescription(description []byte) (snapshot *Snapshot, err
snapshot = &Snapshot{}
if value, ok := root["version"]; !ok {
snapshot.Version = 0
} else if version, ok := value.(float64); !ok {
return nil, fmt.Errorf("Invalid version is specified in the snapshot")
} else {
snapshot.Version = int(version)
}
if value, ok := root["id"]; !ok {
return nil, fmt.Errorf("No id is specified in the snapshot")
} else if snapshot.ID, ok = value.(string); !ok {
@@ -437,6 +434,7 @@ func (snapshot *Snapshot) MarshalJSON() ([]byte, error) {
object := make(map[string]interface{})
object["version"] = 1
object["id"] = snapshot.ID
object["revision"] = snapshot.Revision
object["options"] = snapshot.Options
@@ -458,9 +456,7 @@ func (snapshot *Snapshot) MarshalJSON() ([]byte, error) {
// MarshalSequence creates a json represetion for the specified chunk sequence.
func (snapshot *Snapshot) MarshalSequence(sequenceType string) ([]byte, error) {
if sequenceType == "files" {
return json.Marshal(snapshot.Files)
} else if sequenceType == "chunks" {
if sequenceType == "chunks" {
return json.Marshal(encodeSequence(snapshot.ChunkHashes))
} else {
return json.Marshal(snapshot.ChunkLengths)
@@ -489,3 +485,4 @@ func encodeSequence(sequence []string) []string {
return sequenceInHex
}

View File

@@ -20,6 +20,8 @@ import (
"strings"
"text/tabwriter"
"time"
"sync"
"sync/atomic"
"github.com/aryann/difflib"
)
@@ -189,7 +191,6 @@ type SnapshotManager struct {
fileChunk *Chunk
snapshotCache *FileStorage
chunkDownloader *ChunkDownloader
chunkOperator *ChunkOperator
}
@@ -268,72 +269,26 @@ func (reader *sequenceReader) Read(data []byte) (n int, err error) {
return reader.buffer.Read(data)
}
func (manager *SnapshotManager) CreateChunkDownloader() {
if manager.chunkDownloader == nil {
manager.chunkDownloader = CreateChunkDownloader(manager.config, manager.storage, manager.snapshotCache, false, 1, false)
func (manager *SnapshotManager) CreateChunkOperator(resurrect bool, threads int, allowFailures bool) {
if manager.chunkOperator == nil {
manager.chunkOperator = CreateChunkOperator(manager.config, manager.storage, manager.snapshotCache, resurrect, threads, allowFailures)
}
}
// DownloadSequence returns the content represented by a sequence of chunks.
func (manager *SnapshotManager) DownloadSequence(sequence []string) (content []byte) {
manager.CreateChunkDownloader()
manager.CreateChunkOperator(false, 1, false)
for _, chunkHash := range sequence {
i := manager.chunkDownloader.AddChunk(chunkHash)
chunk := manager.chunkDownloader.WaitForChunk(i)
chunk := manager.chunkOperator.Download(chunkHash, 0, true)
content = append(content, chunk.GetBytes()...)
manager.config.PutChunk(chunk)
}
return content
}
func (manager *SnapshotManager) DownloadSnapshotFileSequence(snapshot *Snapshot, patterns []string, attributesNeeded bool) bool {
manager.CreateChunkDownloader()
reader := sequenceReader{
sequence: snapshot.FileSequence,
buffer: new(bytes.Buffer),
refillFunc: func(chunkHash string) []byte {
i := manager.chunkDownloader.AddChunk(chunkHash)
chunk := manager.chunkDownloader.WaitForChunk(i)
return chunk.GetBytes()
},
}
files := make([]*Entry, 0)
decoder := json.NewDecoder(&reader)
// read open bracket
_, err := decoder.Token()
if err != nil {
LOG_ERROR("SNAPSHOT_PARSE", "Failed to load files specified in the snapshot %s at revision %d: not a list of entries",
snapshot.ID, snapshot.Revision)
return false
}
// while the array contains values
for decoder.More() {
var entry Entry
err = decoder.Decode(&entry)
if err != nil {
LOG_ERROR("SNAPSHOT_PARSE", "Failed to load files specified in the snapshot %s at revision %d: %v",
snapshot.ID, snapshot.Revision, err)
return false
}
// If we don't need the attributes or the file isn't included we clear the attributes to save memory
if !attributesNeeded || (len(patterns) != 0 && !MatchPath(entry.Path, patterns)) {
entry.Attributes = nil
}
files = append(files, &entry)
}
snapshot.Files = files
return true
}
// DownloadSnapshotSequence downloads the content represented by a sequence of chunks, and then unmarshal the content
// using the specified 'loadFunction'. It purpose is to decode the chunk sequences representing chunk hashes or chunk lengths
// using the specified 'loadFunction'. Its purpose is to decode the chunk sequences representing chunk hashes or chunk lengths
// in a snapshot.
func (manager *SnapshotManager) DownloadSnapshotSequence(snapshot *Snapshot, sequenceType string) bool {
@@ -362,30 +317,21 @@ func (manager *SnapshotManager) DownloadSnapshotSequence(snapshot *Snapshot, seq
return true
}
// DownloadSnapshotContents loads all chunk sequences in a snapshot. A snapshot, when just created, only contains
// some metadata and theree sequence representing files, chunk hashes, and chunk lengths. This function must be called
// for the actual content of the snapshot to be usable.
func (manager *SnapshotManager) DownloadSnapshotContents(snapshot *Snapshot, patterns []string, attributesNeeded bool) bool {
// DownloadSnapshotSequences loads all chunk sequences in a snapshot. A snapshot, when just created, only contains
// some metadata and three sequence representing files, chunk hashes, and chunk lengths. This function must be called
// for the chunk hash sequence and chunk length sequence to be usable.
func (manager *SnapshotManager) DownloadSnapshotSequences(snapshot *Snapshot) bool {
manager.DownloadSnapshotFileSequence(snapshot, patterns, attributesNeeded)
manager.DownloadSnapshotSequence(snapshot, "chunks")
manager.DownloadSnapshotSequence(snapshot, "lengths")
err := manager.CheckSnapshot(snapshot)
if err != nil {
LOG_ERROR("SNAPSHOT_CHECK", "The snapshot %s at revision %d contains an error: %v",
snapshot.ID, snapshot.Revision, err)
return false
}
return true
}
// ClearSnapshotContents removes contents loaded by DownloadSnapshotContents
func (manager *SnapshotManager) ClearSnapshotContents(snapshot *Snapshot) {
// ClearSnapshotContents removes sequences loaded by DownloadSnapshotSequences
func (manager *SnapshotManager) ClearSnapshotSequences(snapshot *Snapshot) {
snapshot.ChunkHashes = nil
snapshot.ChunkLengths = nil
snapshot.Files = nil
}
// CleanSnapshotCache removes all files not referenced by the specified 'snapshot' in the snapshot cache.
@@ -577,10 +523,6 @@ func (manager *SnapshotManager) downloadLatestSnapshot(snapshotID string) (remot
remote = manager.DownloadSnapshot(snapshotID, latest)
}
if remote != nil {
manager.DownloadSnapshotContents(remote, nil, false)
}
return remote
}
@@ -712,6 +654,12 @@ func (manager *SnapshotManager) ListSnapshots(snapshotID string, revisionsToList
LOG_DEBUG("LIST_PARAMETERS", "id: %s, revisions: %v, tag: %s, showFiles: %t, showChunks: %t",
snapshotID, revisionsToList, tag, showFiles, showChunks)
manager.CreateChunkOperator(false, 1, false)
defer func() {
manager.chunkOperator.Stop()
manager.chunkOperator = nil
}()
var snapshotIDs []string
var err error
@@ -749,14 +697,16 @@ func (manager *SnapshotManager) ListSnapshots(snapshotID string, revisionsToList
if len(snapshot.Tag) > 0 {
tagWithSpace = snapshot.Tag + " "
}
LOG_INFO("SNAPSHOT_INFO", "Snapshot %s revision %d created at %s %s%s",
snapshotID, revision, creationTime, tagWithSpace, snapshot.Options)
if showFiles {
manager.DownloadSnapshotFileSequence(snapshot, nil, false)
options := snapshot.Options
if snapshot.Version == 0 {
options += " (0)"
}
LOG_INFO("SNAPSHOT_INFO", "Snapshot %s revision %d created at %s %s%s",
snapshotID, revision, creationTime, tagWithSpace, options)
if showFiles {
// We need to fill in ChunkHashes and ChunkLengths to verify that each entry is valid
manager.DownloadSnapshotSequences(snapshot)
if snapshot.NumberOfFiles > 0 {
LOG_INFO("SNAPSHOT_STATS", "Files: %d", snapshot.NumberOfFiles)
@@ -768,7 +718,7 @@ func (manager *SnapshotManager) ListSnapshots(snapshotID string, revisionsToList
totalFileSize := int64(0)
lastChunk := 0
for _, file := range snapshot.Files {
snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func(file *Entry)bool {
if file.IsFile() {
totalFiles++
totalFileSize += file.Size
@@ -780,17 +730,18 @@ func (manager *SnapshotManager) ListSnapshots(snapshotID string, revisionsToList
lastChunk = file.EndChunk
}
}
}
return true
})
for _, file := range snapshot.Files {
snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func(file *Entry)bool {
if file.IsFile() {
LOG_INFO("SNAPSHOT_FILE", "%s", file.String(maxSizeDigits))
}
}
return true
})
metaChunks := len(snapshot.FileSequence) + len(snapshot.ChunkSequence) + len(snapshot.LengthSequence)
LOG_INFO("SNAPSHOT_STATS", "Files: %d, total size: %d, file chunks: %d, metadata chunks: %d",
totalFiles, totalFileSize, lastChunk+1, metaChunks)
LOG_INFO("SNAPSHOT_STATS", "Total size: %d, file chunks: %d, metadata chunks: %d", totalFileSize, lastChunk+1, metaChunks)
}
if showChunks {
@@ -807,11 +758,15 @@ func (manager *SnapshotManager) ListSnapshots(snapshotID string, revisionsToList
}
// ListSnapshots shows the information about a snapshot.
// CheckSnapshots checks if there is any problem with a snapshot.
func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToCheck []int, tag string, showStatistics bool, showTabular bool,
checkFiles bool, checkChunks, searchFossils bool, resurrect bool, threads int, allowFailures bool) bool {
manager.chunkDownloader = CreateChunkDownloader(manager.config, manager.storage, manager.snapshotCache, false, threads, allowFailures)
manager.CreateChunkOperator(resurrect, threads, allowFailures)
defer func() {
manager.chunkOperator.Stop()
manager.chunkOperator = nil
}()
LOG_DEBUG("LIST_PARAMETERS", "id: %s, revisions: %v, tag: %s, showStatistics: %t, showTabular: %t, checkFiles: %t, searchFossils: %t, resurrect: %t",
snapshotID, revisionsToCheck, tag, showStatistics, showTabular, checkFiles, searchFossils, resurrect)
@@ -911,9 +866,9 @@ func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToChe
for _, snapshot := range snapshotMap[snapshotID] {
if checkFiles {
manager.DownloadSnapshotContents(snapshot, nil, false)
manager.DownloadSnapshotSequences(snapshot)
manager.VerifySnapshot(snapshot)
manager.ClearSnapshotContents(snapshot)
manager.ClearSnapshotSequences(snapshot)
continue
}
@@ -1026,6 +981,7 @@ func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToChe
// .duplicacy/cache/storage/verified_chunks. Note that it contains the chunk ids not chunk
// hashes.
verifiedChunks := make(map[string]int64)
var verifiedChunksLock sync.Mutex
verifiedChunksFile := "verified_chunks"
manager.fileChunk.Reset(false)
@@ -1061,16 +1017,11 @@ func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToChe
defer saveVerifiedChunks()
RunAtError = saveVerifiedChunks
manager.chunkDownloader.snapshotCache = nil
LOG_INFO("SNAPSHOT_VERIFY", "Verifying %d chunks", len(*allChunkHashes))
startTime := time.Now()
var chunkHashes []string
// The index of the first chunk to add to the downloader, which may have already downloaded
// some metadata chunks so the index doesn't start with 0.
chunkIndex := -1
skippedChunks := 0
for chunkHash := range *allChunkHashes {
if len(verifiedChunks) > 0 {
@@ -1081,38 +1032,65 @@ func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToChe
}
}
chunkHashes = append(chunkHashes, chunkHash)
if chunkIndex == -1 {
chunkIndex = manager.chunkDownloader.AddChunk(chunkHash)
} else {
manager.chunkDownloader.AddChunk(chunkHash)
}
}
if skippedChunks > 0 {
LOG_INFO("SNAPSHOT_VERIFY", "Skipped %d chunks that have already been verified before", skippedChunks)
}
var downloadedChunkSize int64
totalChunks := len(chunkHashes)
for i := 0; i < totalChunks; i++ {
chunk := manager.chunkDownloader.WaitForChunk(i + chunkIndex)
chunkID := manager.config.GetChunkIDFromHash(chunkHashes[i])
if chunk.isBroken {
continue
}
verifiedChunks[chunkID] = startTime.Unix()
downloadedChunkSize += int64(chunk.GetLength())
var totalDownloadedChunkSize int64
var totalDownloadedChunks int64
totalChunks := int64(len(chunkHashes))
elapsedTime := time.Now().Sub(startTime).Seconds()
speed := int64(float64(downloadedChunkSize) / elapsedTime)
remainingTime := int64(float64(totalChunks - i - 1) / float64(i + 1) * elapsedTime)
percentage := float64(i + 1) / float64(totalChunks) * 100.0
LOG_INFO("VERIFY_PROGRESS", "Verified chunk %s (%d/%d), %sB/s %s %.1f%%",
chunkID, i + 1, totalChunks, PrettySize(speed), PrettyTime(remainingTime), percentage)
chunkChannel := make(chan int, threads)
var wg sync.WaitGroup
wg.Add(threads)
for i := 0; i < threads; i++ {
go func() {
defer CatchLogException()
for {
chunkIndex, ok := <- chunkChannel
if !ok {
wg.Done()
return
}
chunk := manager.chunkOperator.Download(chunkHashes[chunkIndex], chunkIndex, false)
if chunk == nil {
continue
}
chunkID := manager.config.GetChunkIDFromHash(chunkHashes[chunkIndex])
verifiedChunksLock.Lock()
verifiedChunks[chunkID] = startTime.Unix()
verifiedChunksLock.Unlock()
downloadedChunkSize := atomic.AddInt64(&totalDownloadedChunkSize, int64(chunk.GetLength()))
downloadedChunks := atomic.AddInt64(&totalDownloadedChunks, 1)
elapsedTime := time.Now().Sub(startTime).Seconds()
speed := int64(float64(downloadedChunkSize) / elapsedTime)
remainingTime := int64(float64(totalChunks - downloadedChunks) / float64(downloadedChunks) * elapsedTime)
percentage := float64(downloadedChunks) / float64(totalChunks) * 100.0
LOG_INFO("VERIFY_PROGRESS", "Verified chunk %s (%d/%d), %sB/s %s %.1f%%",
chunkID, downloadedChunks, totalChunks, PrettySize(speed), PrettyTime(remainingTime), percentage)
manager.config.PutChunk(chunk)
}
} ()
}
if manager.chunkDownloader.NumberOfFailedChunks > 0 {
LOG_ERROR("SNAPSHOT_VERIFY", "%d out of %d chunks are corrupted", manager.chunkDownloader.NumberOfFailedChunks, len(*allChunkHashes))
for chunkIndex := range chunkHashes {
chunkChannel <- chunkIndex
}
close(chunkChannel)
wg.Wait()
manager.chunkOperator.WaitForCompletion()
if manager.chunkOperator.NumberOfFailedChunks > 0 {
LOG_ERROR("SNAPSHOT_VERIFY", "%d out of %d chunks are corrupted", manager.chunkOperator.NumberOfFailedChunks, len(*allChunkHashes))
} else {
LOG_INFO("SNAPSHOT_VERIFY", "All %d chunks have been successfully verified", len(*allChunkHashes))
}
@@ -1280,14 +1258,6 @@ func (manager *SnapshotManager) PrintSnapshot(snapshot *Snapshot) bool {
object["chunks"] = manager.ConvertSequence(snapshot.ChunkHashes)
object["lengths"] = snapshot.ChunkLengths
// By default the json serialization of a file entry contains the path in base64 format. This is
// to convert every file entry into an object which include the path in a more readable format.
var files []map[string]interface{}
for _, file := range snapshot.Files {
files = append(files, file.convertToObject(false))
}
object["files"] = files
description, err := json.MarshalIndent(object, "", " ")
if err != nil {
@@ -1296,8 +1266,24 @@ func (manager *SnapshotManager) PrintSnapshot(snapshot *Snapshot) bool {
return false
}
fmt.Printf("%s\n", string(description))
// Don't print the ending bracket
fmt.Printf("%s", string(description[:len(description) - 2]))
fmt.Printf(",\n \"files\": [\n")
isFirstFile := true
snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func (file *Entry) bool {
fileDescription, _ := json.MarshalIndent(file.convertToObject(false), "", " ")
if isFirstFile {
fmt.Printf("%s", fileDescription)
isFirstFile = false
} else {
fmt.Printf(",\n%s", fileDescription)
}
return true
})
fmt.Printf(" ]\n}\n")
return true
}
@@ -1313,17 +1299,20 @@ func (manager *SnapshotManager) VerifySnapshot(snapshot *Snapshot) bool {
return false
}
files := make([]*Entry, 0, len(snapshot.Files)/2)
for _, file := range snapshot.Files {
files := make([]*Entry, 0)
snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func (file *Entry) bool {
if file.IsFile() && file.Size != 0 {
file.Attributes = nil
files = append(files, file)
}
}
return true
})
sort.Sort(ByChunk(files))
corruptedFiles := 0
var lastChunk *Chunk
for _, file := range files {
if !manager.RetrieveFile(snapshot, file, func([]byte) {}) {
if !manager.RetrieveFile(snapshot, file, &lastChunk, func([]byte) {}) {
corruptedFiles++
}
LOG_TRACE("SNAPSHOT_VERIFY", "%s", file.Path)
@@ -1341,21 +1330,13 @@ func (manager *SnapshotManager) VerifySnapshot(snapshot *Snapshot) bool {
}
// RetrieveFile retrieves the file in the specified snapshot.
func (manager *SnapshotManager) RetrieveFile(snapshot *Snapshot, file *Entry, output func([]byte)) bool {
func (manager *SnapshotManager) RetrieveFile(snapshot *Snapshot, file *Entry, lastChunk **Chunk, output func([]byte)) bool {
if file.Size == 0 {
return true
}
manager.CreateChunkDownloader()
// Temporarily disable the snapshot cache of the download so that downloaded file chunks won't be saved
// to the cache.
snapshotCache := manager.chunkDownloader.snapshotCache
manager.chunkDownloader.snapshotCache = nil
defer func() {
manager.chunkDownloader.snapshotCache = snapshotCache
}()
manager.CreateChunkOperator(false, 1, false)
fileHasher := manager.config.NewFileHasher()
alternateHash := false
@@ -1376,12 +1357,19 @@ func (manager *SnapshotManager) RetrieveFile(snapshot *Snapshot, file *Entry, ou
}
hash := snapshot.ChunkHashes[i]
lastChunk, lastChunkHash := manager.chunkDownloader.GetLastDownloadedChunk()
if lastChunkHash != hash {
i := manager.chunkDownloader.AddChunk(hash)
chunk = manager.chunkDownloader.WaitForChunk(i)
if lastChunk == nil {
chunk = manager.chunkOperator.Download(hash, 0, false)
} else if *lastChunk == nil {
chunk = manager.chunkOperator.Download(hash, 0, false)
*lastChunk = chunk
} else {
chunk = lastChunk
if (*lastChunk).GetHash() == hash {
chunk = *lastChunk
} else {
manager.config.PutChunk(*lastChunk)
chunk = manager.chunkOperator.Download(hash, 0, false)
*lastChunk = chunk
}
}
output(chunk.GetBytes()[start:end])
@@ -1405,10 +1393,18 @@ func (manager *SnapshotManager) RetrieveFile(snapshot *Snapshot, file *Entry, ou
// FindFile returns the file entry that has the given file name.
func (manager *SnapshotManager) FindFile(snapshot *Snapshot, filePath string, suppressError bool) *Entry {
for _, entry := range snapshot.Files {
var found *Entry
snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func (entry *Entry) bool {
if entry.Path == filePath {
return entry
found = entry
return false
}
return true
})
if found != nil {
return found
}
if !suppressError {
@@ -1440,13 +1436,8 @@ func (manager *SnapshotManager) PrintFile(snapshotID string, revision int, path
return false
}
patterns := []string{}
if path != "" {
patterns = []string{path}
}
// If no path is specified, we're printing the snapshot so we need all attributes
if !manager.DownloadSnapshotContents(snapshot, patterns, path == "") {
// If no path is specified, we're printing the snapshot
if !manager.DownloadSnapshotSequences(snapshot) {
return false
}
@@ -1456,7 +1447,7 @@ func (manager *SnapshotManager) PrintFile(snapshotID string, revision int, path
}
file := manager.FindFile(snapshot, path, false)
if !manager.RetrieveFile(snapshot, file, func(chunk []byte) {
if !manager.RetrieveFile(snapshot, file, nil, func(chunk []byte) {
fmt.Printf("%s", chunk)
}) {
LOG_ERROR("SNAPSHOT_RETRIEVE", "File %s is corrupted in snapshot %s at revision %d",
@@ -1474,22 +1465,38 @@ func (manager *SnapshotManager) Diff(top string, snapshotID string, revisions []
LOG_DEBUG("DIFF_PARAMETERS", "top: %s, id: %s, revision: %v, path: %s, compareByHash: %t",
top, snapshotID, revisions, filePath, compareByHash)
manager.CreateChunkOperator(false, 1, false)
defer func() {
manager.chunkOperator.Stop()
manager.chunkOperator = nil
} ()
var leftSnapshot *Snapshot
var rightSnapshot *Snapshot
var err error
leftSnapshotFiles := make([]*Entry, 0, 1024)
rightSnapshotFiles := make([]*Entry, 0, 1024)
// If no or only one revision is specified, use the on-disk version for the right-hand side.
if len(revisions) <= 1 {
// Only scan the repository if filePath is not provided
if len(filePath) == 0 {
rightSnapshot, _, _, err = CreateSnapshotFromDirectory(snapshotID, top, nobackupFile, filtersFile, excludeByAttribute)
if err != nil {
LOG_ERROR("SNAPSHOT_LIST", "Failed to list the directory %s: %v", top, err)
return false
rightSnapshot = CreateEmptySnapshot(snapshotID)
localListingChannel := make(chan *Entry)
go func() {
defer CatchLogException()
rightSnapshot.ListLocalFiles(top, nobackupFile, filtersFile, excludeByAttribute, localListingChannel, nil, nil)
} ()
for entry := range localListingChannel {
entry.Attributes = nil // attributes are not compared
rightSnapshotFiles = append(rightSnapshotFiles, entry)
}
}
} else {
rightSnapshot = manager.DownloadSnapshot(snapshotID, revisions[1])
manager.DownloadSnapshotSequences(rightSnapshot)
}
// If no revision is specified, use the latest revision as the left-hand side.
@@ -1503,15 +1510,11 @@ func (manager *SnapshotManager) Diff(top string, snapshotID string, revisions []
leftSnapshot = manager.DownloadSnapshot(snapshotID, revisions[0])
}
manager.DownloadSnapshotSequences(leftSnapshot)
if len(filePath) > 0 {
manager.DownloadSnapshotContents(leftSnapshot, nil, false)
if rightSnapshot != nil && rightSnapshot.Revision != 0 {
manager.DownloadSnapshotContents(rightSnapshot, nil, false)
}
var leftFile []byte
if !manager.RetrieveFile(leftSnapshot, manager.FindFile(leftSnapshot, filePath, false), func(content []byte) {
if !manager.RetrieveFile(leftSnapshot, manager.FindFile(leftSnapshot, filePath, false), nil, func(content []byte) {
leftFile = append(leftFile, content...)
}) {
LOG_ERROR("SNAPSHOT_DIFF", "File %s is corrupted in snapshot %s at revision %d",
@@ -1521,7 +1524,7 @@ func (manager *SnapshotManager) Diff(top string, snapshotID string, revisions []
var rightFile []byte
if rightSnapshot != nil {
if !manager.RetrieveFile(rightSnapshot, manager.FindFile(rightSnapshot, filePath, false), func(content []byte) {
if !manager.RetrieveFile(rightSnapshot, manager.FindFile(rightSnapshot, filePath, false), nil, func(content []byte) {
rightFile = append(rightFile, content...)
}) {
LOG_ERROR("SNAPSHOT_DIFF", "File %s is corrupted in snapshot %s at revision %d",
@@ -1582,24 +1585,32 @@ func (manager *SnapshotManager) Diff(top string, snapshotID string, revisions []
return true
}
// We only need to decode the 'files' sequence, not 'chunkhashes' or 'chunklengthes'
manager.DownloadSnapshotFileSequence(leftSnapshot, nil, false)
if rightSnapshot != nil && rightSnapshot.Revision != 0 {
manager.DownloadSnapshotFileSequence(rightSnapshot, nil, false)
leftSnapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func(entry *Entry) bool {
entry.Attributes = nil
leftSnapshotFiles = append(leftSnapshotFiles, entry)
return true
})
if rightSnapshot.Revision != 0 {
rightSnapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func(entry *Entry) bool {
entry.Attributes = nil
rightSnapshotFiles = append(rightSnapshotFiles, entry)
return true
})
}
maxSize := int64(9)
maxSizeDigits := 1
// Find the max Size value in order for pretty alignment.
for _, file := range leftSnapshot.Files {
for _, file := range leftSnapshotFiles {
for !file.IsDir() && file.Size > maxSize {
maxSize = maxSize*10 + 9
maxSizeDigits += 1
}
}
for _, file := range rightSnapshot.Files {
for _, file := range rightSnapshotFiles {
for !file.IsDir() && file.Size > maxSize {
maxSize = maxSize*10 + 9
maxSizeDigits += 1
@@ -1609,22 +1620,22 @@ func (manager *SnapshotManager) Diff(top string, snapshotID string, revisions []
buffer := make([]byte, 32*1024)
var i, j int
for i < len(leftSnapshot.Files) || j < len(rightSnapshot.Files) {
for i < len(leftSnapshotFiles) || j < len(rightSnapshotFiles) {
if i >= len(leftSnapshot.Files) {
if rightSnapshot.Files[j].IsFile() {
LOG_INFO("SNAPSHOT_DIFF", "+ %s", rightSnapshot.Files[j].String(maxSizeDigits))
if i >= len(leftSnapshotFiles) {
if rightSnapshotFiles[j].IsFile() {
LOG_INFO("SNAPSHOT_DIFF", "+ %s", rightSnapshotFiles[j].String(maxSizeDigits))
}
j++
} else if j >= len(rightSnapshot.Files) {
if leftSnapshot.Files[i].IsFile() {
LOG_INFO("SNAPSHOT_DIFF", "- %s", leftSnapshot.Files[i].String(maxSizeDigits))
} else if j >= len(rightSnapshotFiles) {
if leftSnapshotFiles[i].IsFile() {
LOG_INFO("SNAPSHOT_DIFF", "- %s", leftSnapshotFiles[i].String(maxSizeDigits))
}
i++
} else {
left := leftSnapshot.Files[i]
right := rightSnapshot.Files[j]
left := leftSnapshotFiles[i]
right := rightSnapshotFiles[j]
if !left.IsFile() {
i++
@@ -1679,6 +1690,12 @@ func (manager *SnapshotManager) ShowHistory(top string, snapshotID string, revis
LOG_DEBUG("HISTORY_PARAMETERS", "top: %s, id: %s, revisions: %v, path: %s, showLocalHash: %t",
top, snapshotID, revisions, filePath, showLocalHash)
manager.CreateChunkOperator(false, 1, false)
defer func() {
manager.chunkOperator.Stop()
manager.chunkOperator = nil
} ()
var err error
if len(revisions) == 0 {
@@ -1693,7 +1710,7 @@ func (manager *SnapshotManager) ShowHistory(top string, snapshotID string, revis
sort.Ints(revisions)
for _, revision := range revisions {
snapshot := manager.DownloadSnapshot(snapshotID, revision)
manager.DownloadSnapshotFileSequence(snapshot, nil, false)
manager.DownloadSnapshotSequences(snapshot)
file := manager.FindFile(snapshot, filePath, true)
if file != nil {
@@ -1801,8 +1818,11 @@ func (manager *SnapshotManager) PruneSnapshots(selfID string, snapshotID string,
LOG_WARN("DELETE_OPTIONS", "Tags or retention policy will be ignored if at least one revision is specified")
}
manager.chunkOperator = CreateChunkOperator(manager.storage, threads)
defer manager.chunkOperator.Stop()
manager.CreateChunkOperator(false, threads, false)
defer func() {
manager.chunkOperator.Stop()
manager.chunkOperator = nil
} ()
prefPath := GetDuplicacyPreferencePath()
logDir := path.Join(prefPath, "logs")
@@ -2184,7 +2204,7 @@ func (manager *SnapshotManager) PruneSnapshots(selfID string, snapshotID string,
return false
}
manager.chunkOperator.Stop()
manager.chunkOperator.WaitForCompletion()
for _, fossil := range manager.chunkOperator.fossils {
collection.AddFossil(fossil)
}
@@ -2265,6 +2285,7 @@ func (manager *SnapshotManager) PruneSnapshots(selfID string, snapshotID string,
} else {
manager.CleanSnapshotCache(nil, allSnapshots)
}
manager.chunkOperator.WaitForCompletion()
return true
}
@@ -2477,8 +2498,6 @@ func (manager *SnapshotManager) pruneSnapshotsExhaustive(referencedFossils map[s
// CheckSnapshot performs sanity checks on the given snapshot.
func (manager *SnapshotManager) CheckSnapshot(snapshot *Snapshot) (err error) {
lastChunk := 0
lastOffset := 0
var lastEntry *Entry
numberOfChunks := len(snapshot.ChunkHashes)
@@ -2488,57 +2507,39 @@ func (manager *SnapshotManager) CheckSnapshot(snapshot *Snapshot) (err error) {
numberOfChunks, len(snapshot.ChunkLengths))
}
entries := make([]*Entry, len(snapshot.Files))
copy(entries, snapshot.Files)
sort.Sort(ByChunk(entries))
snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func (entry *Entry) bool {
for _, entry := range snapshot.Files {
if lastEntry != nil && lastEntry.Compare(entry) >= 0 && !strings.Contains(lastEntry.Path, "\ufffd") {
return fmt.Errorf("The entry %s appears before the entry %s", lastEntry.Path, entry.Path)
err = fmt.Errorf("The entry %s appears before the entry %s", lastEntry.Path, entry.Path)
return false
}
lastEntry = entry
}
for _, entry := range entries {
if !entry.IsFile() || entry.Size == 0 {
continue
return true
}
if entry.StartChunk < 0 {
return fmt.Errorf("The file %s starts at chunk %d", entry.Path, entry.StartChunk)
err = fmt.Errorf("The file %s starts at chunk %d", entry.Path, entry.StartChunk)
return false
}
if entry.EndChunk >= numberOfChunks {
return fmt.Errorf("The file %s ends at chunk %d while the number of chunks is %d",
err = fmt.Errorf("The file %s ends at chunk %d while the number of chunks is %d",
entry.Path, entry.EndChunk, numberOfChunks)
return false
}
if entry.EndChunk < entry.StartChunk {
return fmt.Errorf("The file %s starts at chunk %d and ends at chunk %d",
fmt.Errorf("The file %s starts at chunk %d and ends at chunk %d",
entry.Path, entry.StartChunk, entry.EndChunk)
return false
}
if entry.StartOffset > 0 {
if entry.StartChunk < lastChunk {
return fmt.Errorf("The file %s starts at chunk %d while the last chunk is %d",
entry.Path, entry.StartChunk, lastChunk)
}
if entry.StartChunk > lastChunk+1 {
return fmt.Errorf("The file %s starts at chunk %d while the last chunk is %d",
entry.Path, entry.StartChunk, lastChunk)
}
if entry.StartChunk == lastChunk && entry.StartOffset < lastOffset {
return fmt.Errorf("The file %s starts at offset %d of chunk %d while the last file ends at offset %d",
entry.Path, entry.StartOffset, entry.StartChunk, lastOffset)
}
if entry.StartChunk == entry.EndChunk && entry.StartOffset > entry.EndOffset {
return fmt.Errorf("The file %s starts at offset %d and ends at offset %d of the same chunk %d",
entry.Path, entry.StartOffset, entry.EndOffset, entry.StartChunk)
}
if entry.StartChunk == entry.EndChunk && entry.StartOffset > entry.EndOffset {
err = fmt.Errorf("The file %s starts at offset %d and ends at offset %d of the same chunk %d",
entry.Path, entry.StartOffset, entry.EndOffset, entry.StartChunk)
return false
}
fileSize := int64(0)
@@ -2558,22 +2559,13 @@ func (manager *SnapshotManager) CheckSnapshot(snapshot *Snapshot) (err error) {
}
if entry.Size != fileSize {
return fmt.Errorf("The file %s has a size of %d but the total size of chunks is %d",
err = fmt.Errorf("The file %s has a size of %d but the total size of chunks is %d",
entry.Path, entry.Size, fileSize)
return false
}
lastChunk = entry.EndChunk
lastOffset = entry.EndOffset
}
if len(entries) > 0 && entries[0].StartChunk != 0 {
return fmt.Errorf("The first file starts at chunk %d", entries[0].StartChunk)
}
// There may be a last chunk whose size is 0 so we allow this to happen
if lastChunk < numberOfChunks-2 {
return fmt.Errorf("The last file ends at chunk %d but the number of chunks is %d", lastChunk, numberOfChunks)
}
return true
})
return nil
}

View File

@@ -116,19 +116,18 @@ func createTestSnapshotManager(testDir string) *SnapshotManager {
func uploadTestChunk(manager *SnapshotManager, content []byte) string {
completionFunc := func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) {
chunkOperator := CreateChunkOperator(manager.config, manager.storage, nil, false, testThreads, false)
chunkOperator.UploadCompletionFunc = func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) {
LOG_INFO("UPLOAD_CHUNK", "Chunk %s size %d uploaded", chunk.GetID(), chunkSize)
}
chunkUploader := CreateChunkUploader(manager.config, manager.storage, nil, *testThreads, nil)
chunkUploader.completionFunc = completionFunc
chunkUploader.Start()
chunk := CreateChunk(manager.config, true)
chunk.Reset(true)
chunk.Write(content)
chunkUploader.StartChunk(chunk, 0)
chunkUploader.Stop()
chunkOperator.Upload(chunk, 0, false)
chunkOperator.WaitForCompletion()
chunkOperator.Stop()
return chunk.GetHash()
}
@@ -180,6 +179,12 @@ func createTestSnapshot(manager *SnapshotManager, snapshotID string, revision in
func checkTestSnapshots(manager *SnapshotManager, expectedSnapshots int, expectedFossils int) {
manager.CreateChunkOperator(false, 1, false)
defer func() {
manager.chunkOperator.Stop()
manager.chunkOperator = nil
}()
var snapshotIDs []string
var err error

View File

@@ -14,7 +14,7 @@ import (
"strconv"
"strings"
"time"
"runtime"
"runtime"
"github.com/gilbertchen/gopass"
"golang.org/x/crypto/pbkdf2"
@@ -473,4 +473,4 @@ func PrintMemoryUsage() {
time.Sleep(time.Second)
}
}
}

View File

@@ -52,11 +52,11 @@ func (entry *Entry) ReadAttributes(top string) {
fullPath := filepath.Join(top, entry.Path)
attributes, _ := xattr.List(fullPath)
if len(attributes) > 0 {
entry.Attributes = make(map[string][]byte)
entry.Attributes = &map[string][]byte{}
for _, name := range attributes {
attribute, err := xattr.Get(fullPath, name)
if err == nil {
entry.Attributes[name] = attribute
(*entry.Attributes)[name] = attribute
}
}
}
@@ -68,19 +68,19 @@ func (entry *Entry) SetAttributesToFile(fullPath string) {
for _, name := range names {
newAttribute, found := entry.Attributes[name]
newAttribute, found := (*entry.Attributes)[name]
if found {
oldAttribute, _ := xattr.Get(fullPath, name)
if !bytes.Equal(oldAttribute, newAttribute) {
xattr.Set(fullPath, name, newAttribute)
}
delete(entry.Attributes, name)
delete(*entry.Attributes, name)
} else {
xattr.Remove(fullPath, name)
}
}
for name, attribute := range entry.Attributes {
for name, attribute := range *entry.Attributes {
xattr.Set(fullPath, name, attribute)
}