mirror of
https://github.com/gilbertchen/duplicacy
synced 2025-12-06 00:03:38 +00:00
Merge pull request #625 from gilbertchen/memory_optimization
Rewrite the backup procedure to reduce memory usage
This commit is contained in:
@@ -785,7 +785,10 @@ func backupRepository(context *cli.Context) {
|
||||
|
||||
backupManager.SetupSnapshotCache(preference.Name)
|
||||
backupManager.SetDryRun(dryRun)
|
||||
backupManager.Backup(repository, quickMode, threads, context.String("t"), showStatistics, enableVSS, vssTimeout, enumOnly)
|
||||
|
||||
metadataChunkSize := context.Int("metadata-chunk-size")
|
||||
maximumInMemoryEntries := context.Int("max-in-memory-entries")
|
||||
backupManager.Backup(repository, quickMode, threads, context.String("t"), showStatistics, enableVSS, vssTimeout, enumOnly, metadataChunkSize, maximumInMemoryEntries)
|
||||
|
||||
runScript(context, preference.Name, "post")
|
||||
}
|
||||
@@ -1510,6 +1513,19 @@ func main() {
|
||||
Name: "enum-only",
|
||||
Usage: "enumerate the repository recursively and then exit",
|
||||
},
|
||||
cli.IntFlag{
|
||||
Name: "metadata-chunk-size",
|
||||
Value: 1024 * 1024,
|
||||
Usage: "the average size of metadata chunks (defaults to 1M)",
|
||||
Argument: "<size>",
|
||||
},
|
||||
cli.IntFlag{
|
||||
Name: "max-in-memory-entries",
|
||||
Value: 1024 * 1024,
|
||||
Usage: "the maximum number of entries kept in memory (defaults to 1M)",
|
||||
Argument: "<number>",
|
||||
},
|
||||
|
||||
},
|
||||
Usage: "Save a snapshot of the repository to the storage",
|
||||
ArgsUsage: " ",
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -257,7 +257,7 @@ func TestBackupManager(t *testing.T) {
|
||||
backupManager.SetupSnapshotCache("default")
|
||||
|
||||
SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy")
|
||||
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false)
|
||||
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false, 1024, 1024)
|
||||
time.Sleep(time.Duration(delay) * time.Second)
|
||||
SetDuplicacyPreferencePath(testDir + "/repository2/.duplicacy")
|
||||
failedFiles := backupManager.Restore(testDir+"/repository2", threads /*inPlace=*/, false /*quickMode=*/, false, threads /*overwrite=*/, true,
|
||||
@@ -282,7 +282,7 @@ func TestBackupManager(t *testing.T) {
|
||||
modifyFile(testDir+"/repository1/dir1/file3", 0.3)
|
||||
|
||||
SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy")
|
||||
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "second", false, false, 0, false)
|
||||
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "second", false, false, 0, false, 1024, 1024)
|
||||
time.Sleep(time.Duration(delay) * time.Second)
|
||||
SetDuplicacyPreferencePath(testDir + "/repository2/.duplicacy")
|
||||
failedFiles = backupManager.Restore(testDir+"/repository2", 2 /*inPlace=*/, true /*quickMode=*/, true, threads /*overwrite=*/, true,
|
||||
@@ -303,7 +303,7 @@ func TestBackupManager(t *testing.T) {
|
||||
os.Mkdir(testDir+"/repository1/dir2/dir3", 0700)
|
||||
os.Mkdir(testDir+"/repository1/dir4", 0700)
|
||||
SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy")
|
||||
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, false, threads, "third", false, false, 0, false)
|
||||
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, false, threads, "third", false, false, 0, false, 1024, 1024)
|
||||
time.Sleep(time.Duration(delay) * time.Second)
|
||||
|
||||
// Create some directories and files under repository2 that will be deleted during restore
|
||||
@@ -368,7 +368,7 @@ func TestBackupManager(t *testing.T) {
|
||||
}
|
||||
backupManager.SnapshotManager.CheckSnapshots( /*snapshotID*/ "host1" /*revisions*/, []int{2, 3} /*tag*/, "",
|
||||
/*showStatistics*/ false /*showTabular*/, false /*checkFiles*/, false /*checkChunks*/, false /*searchFossils*/, false /*resurrect*/, false, 1 /*allowFailures*/, false)
|
||||
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, false, threads, "fourth", false, false, 0, false)
|
||||
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, false, threads, "fourth", false, false, 0, false, 1024, 1024)
|
||||
backupManager.SnapshotManager.PruneSnapshots("host1", "host1" /*revisions*/, nil /*tags*/, nil /*retentions*/, nil,
|
||||
/*exhaustive*/ false /*exclusive=*/, true /*ignoredIDs*/, nil /*dryRun*/, false /*deleteOnly*/, false /*collectOnly*/, false, 1)
|
||||
numberOfSnapshots = backupManager.SnapshotManager.ListSnapshots( /*snapshotID*/ "host1" /*revisionsToList*/, nil /*tag*/, "" /*showFiles*/, false /*showChunks*/, false)
|
||||
@@ -533,7 +533,7 @@ func TestPersistRestore(t *testing.T) {
|
||||
unencBackupManager.SetupSnapshotCache("default")
|
||||
|
||||
SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy")
|
||||
unencBackupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false)
|
||||
unencBackupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false, 1024, 1024)
|
||||
time.Sleep(time.Duration(delay) * time.Second)
|
||||
|
||||
|
||||
@@ -543,7 +543,7 @@ func TestPersistRestore(t *testing.T) {
|
||||
encBackupManager.SetupSnapshotCache("default")
|
||||
|
||||
SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy")
|
||||
encBackupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false)
|
||||
encBackupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false, 1024, 1024)
|
||||
time.Sleep(time.Duration(delay) * time.Second)
|
||||
|
||||
|
||||
|
||||
@@ -29,29 +29,29 @@ func benchmarkSplit(reader *bytes.Reader, fileSize int64, chunkSize int, compres
|
||||
config.HashKey = DEFAULT_KEY
|
||||
config.IDKey = DEFAULT_KEY
|
||||
|
||||
maker := CreateChunkMaker(config, false)
|
||||
maker := CreateFileChunkMaker(config, false)
|
||||
|
||||
startTime := float64(time.Now().UnixNano()) / 1e9
|
||||
numberOfChunks := 0
|
||||
reader.Seek(0, os.SEEK_SET)
|
||||
maker.ForEachChunk(reader,
|
||||
func(chunk *Chunk, final bool) {
|
||||
if compression {
|
||||
key := ""
|
||||
if encryption {
|
||||
key = "0123456789abcdef0123456789abcdef"
|
||||
}
|
||||
err := chunk.Encrypt([]byte(key), "", false)
|
||||
if err != nil {
|
||||
LOG_ERROR("BENCHMARK_ENCRYPT", "Failed to encrypt the chunk: %v", err)
|
||||
}
|
||||
|
||||
chunkFunc := func(chunk *Chunk) {
|
||||
if compression {
|
||||
key := ""
|
||||
if encryption {
|
||||
key = "0123456789abcdef0123456789abcdef"
|
||||
}
|
||||
config.PutChunk(chunk)
|
||||
numberOfChunks++
|
||||
},
|
||||
func(size int64, hash string) (io.Reader, bool) {
|
||||
return nil, false
|
||||
})
|
||||
err := chunk.Encrypt([]byte(key), "", false)
|
||||
if err != nil {
|
||||
LOG_ERROR("BENCHMARK_ENCRYPT", "Failed to encrypt the chunk: %v", err)
|
||||
}
|
||||
}
|
||||
config.PutChunk(chunk)
|
||||
numberOfChunks++
|
||||
}
|
||||
|
||||
maker.AddData(reader, chunkFunc)
|
||||
maker.AddData(nil, chunkFunc)
|
||||
|
||||
runningTime := float64(time.Now().UnixNano())/1e9 - startTime
|
||||
speed := int64(float64(fileSize) / runningTime)
|
||||
|
||||
@@ -65,8 +65,8 @@ type Chunk struct {
|
||||
config *Config // Every chunk is associated with a Config object. Which hashing algorithm to use is determined
|
||||
// by the config
|
||||
|
||||
isSnapshot bool // Indicates if the chunk is a snapshot chunk (instead of a file chunk). This is only used by RSA
|
||||
// encryption, where a snapshot chunk is not encrypted by RSA
|
||||
isMetadata bool // Indicates if the chunk is a metadata chunk (instead of a file chunk). This is primarily used by RSA
|
||||
// encryption, where a metadata chunk is not encrypted by RSA
|
||||
|
||||
isBroken bool // Indicates the chunk did not download correctly. This is only used for -persist (allowFailures) mode
|
||||
}
|
||||
@@ -127,7 +127,7 @@ func (chunk *Chunk) Reset(hashNeeded bool) {
|
||||
chunk.hash = nil
|
||||
chunk.id = ""
|
||||
chunk.size = 0
|
||||
chunk.isSnapshot = false
|
||||
chunk.isMetadata = false
|
||||
chunk.isBroken = false
|
||||
}
|
||||
|
||||
@@ -186,7 +186,7 @@ func (chunk *Chunk) VerifyID() {
|
||||
|
||||
// Encrypt encrypts the plain data stored in the chunk buffer. If derivationKey is not nil, the actual
|
||||
// encryption key will be HMAC-SHA256(encryptionKey, derivationKey).
|
||||
func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isSnapshot bool) (err error) {
|
||||
func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isMetadata bool) (err error) {
|
||||
|
||||
var aesBlock cipher.Block
|
||||
var gcm cipher.AEAD
|
||||
@@ -203,8 +203,8 @@ func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isSnapsh
|
||||
|
||||
key := encryptionKey
|
||||
usingRSA := false
|
||||
// Enable RSA encryption only when the chunk is not a snapshot chunk
|
||||
if chunk.config.rsaPublicKey != nil && !isSnapshot && !chunk.isSnapshot {
|
||||
// Enable RSA encryption only when the chunk is not a metadata chunk
|
||||
if chunk.config.rsaPublicKey != nil && !isMetadata && !chunk.isMetadata {
|
||||
randomKey := make([]byte, 32)
|
||||
_, err := rand.Read(randomKey)
|
||||
if err != nil {
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
package duplicacy
|
||||
|
||||
import (
|
||||
"io"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
)
|
||||
@@ -20,78 +19,47 @@ type ChunkDownloadTask struct {
|
||||
isDownloading bool // 'true' means the chunk has been downloaded or is being downloaded
|
||||
}
|
||||
|
||||
// ChunkDownloadCompletion represents the nofication when a chunk has been downloaded.
|
||||
type ChunkDownloadCompletion struct {
|
||||
chunkIndex int // The index of this chunk in the chunk list
|
||||
chunk *Chunk // The chunk that has been downloaded
|
||||
chunk *Chunk
|
||||
chunkIndex int
|
||||
}
|
||||
|
||||
// ChunkDownloader is capable of performing multi-threaded downloading. Chunks to be downloaded are first organized
|
||||
// ChunkDownloader is a wrapper of ChunkOperator and is only used by the restore procedure.capable of performing multi-threaded downloading. Chunks to be downloaded are first organized
|
||||
// as a list of ChunkDownloadTasks, with only the chunkHash field initialized. When a chunk is needed, the
|
||||
// corresponding ChunkDownloadTask is sent to the dowloading goroutine. Once a chunk is downloaded, it will be
|
||||
// inserted in the completed task list.
|
||||
type ChunkDownloader struct {
|
||||
|
||||
operator *ChunkOperator
|
||||
|
||||
totalChunkSize int64 // Total chunk size
|
||||
downloadedChunkSize int64 // Downloaded chunk size
|
||||
|
||||
config *Config // Associated config
|
||||
storage Storage // Download from this storage
|
||||
snapshotCache *FileStorage // Used as cache if not nil; usually for downloading snapshot chunks
|
||||
showStatistics bool // Show a stats log for each chunk if true
|
||||
threads int // Number of threads
|
||||
allowFailures bool // Whether to failfast on download error, or continue
|
||||
|
||||
taskList []ChunkDownloadTask // The list of chunks to be downloaded
|
||||
completedTasks map[int]bool // Store downloaded chunks
|
||||
lastChunkIndex int // a monotonically increasing number indicating the last chunk to be downloaded
|
||||
|
||||
taskQueue chan ChunkDownloadTask // Downloading goroutines are waiting on this channel for input
|
||||
stopChannel chan bool // Used to stop the dowloading goroutines
|
||||
completionChannel chan ChunkDownloadCompletion // A downloading goroutine sends back the chunk via this channel after downloading
|
||||
|
||||
startTime int64 // The time it starts downloading
|
||||
numberOfDownloadedChunks int // The number of chunks that have been downloaded
|
||||
numberOfDownloadingChunks int // The number of chunks still being downloaded
|
||||
numberOfActiveChunks int // The number of chunks that is being downloaded or has been downloaded but not reclaimed
|
||||
|
||||
NumberOfFailedChunks int // The number of chunks that can't be downloaded
|
||||
}
|
||||
|
||||
func CreateChunkDownloader(config *Config, storage Storage, snapshotCache *FileStorage, showStatistics bool, threads int, allowFailures bool) *ChunkDownloader {
|
||||
func CreateChunkDownloader(operator *ChunkOperator) *ChunkDownloader {
|
||||
downloader := &ChunkDownloader{
|
||||
config: config,
|
||||
storage: storage,
|
||||
snapshotCache: snapshotCache,
|
||||
showStatistics: showStatistics,
|
||||
threads: threads,
|
||||
allowFailures: allowFailures,
|
||||
operator: operator,
|
||||
|
||||
taskList: nil,
|
||||
completedTasks: make(map[int]bool),
|
||||
lastChunkIndex: 0,
|
||||
|
||||
taskQueue: make(chan ChunkDownloadTask, threads),
|
||||
stopChannel: make(chan bool),
|
||||
completionChannel: make(chan ChunkDownloadCompletion),
|
||||
|
||||
startTime: time.Now().Unix(),
|
||||
}
|
||||
|
||||
// Start the downloading goroutines
|
||||
for i := 0; i < downloader.threads; i++ {
|
||||
go func(threadIndex int) {
|
||||
defer CatchLogException()
|
||||
for {
|
||||
select {
|
||||
case task := <-downloader.taskQueue:
|
||||
downloader.Download(threadIndex, task)
|
||||
case <-downloader.stopChannel:
|
||||
return
|
||||
}
|
||||
}
|
||||
}(i)
|
||||
}
|
||||
|
||||
return downloader
|
||||
}
|
||||
|
||||
@@ -127,26 +95,7 @@ func (downloader *ChunkDownloader) AddFiles(snapshot *Snapshot, files []*Entry)
|
||||
maximumChunks = file.EndChunk - file.StartChunk
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// AddChunk adds a single chunk the download list.
|
||||
func (downloader *ChunkDownloader) AddChunk(chunkHash string) int {
|
||||
|
||||
task := ChunkDownloadTask{
|
||||
chunkIndex: len(downloader.taskList),
|
||||
chunkHash: chunkHash,
|
||||
chunkLength: 0,
|
||||
needed: true,
|
||||
isDownloading: false,
|
||||
}
|
||||
downloader.taskList = append(downloader.taskList, task)
|
||||
if downloader.numberOfActiveChunks < downloader.threads {
|
||||
downloader.taskQueue <- task
|
||||
downloader.numberOfDownloadingChunks++
|
||||
downloader.numberOfActiveChunks++
|
||||
downloader.taskList[len(downloader.taskList)-1].isDownloading = true
|
||||
}
|
||||
return len(downloader.taskList) - 1
|
||||
downloader.operator.totalChunkSize = downloader.totalChunkSize
|
||||
}
|
||||
|
||||
// Prefetch adds up to 'threads' chunks needed by a file to the download list
|
||||
@@ -159,20 +108,22 @@ func (downloader *ChunkDownloader) Prefetch(file *Entry) {
|
||||
task := &downloader.taskList[i]
|
||||
if task.needed {
|
||||
if !task.isDownloading {
|
||||
if downloader.numberOfActiveChunks >= downloader.threads {
|
||||
if downloader.numberOfActiveChunks >= downloader.operator.threads {
|
||||
return
|
||||
}
|
||||
|
||||
LOG_DEBUG("DOWNLOAD_PREFETCH", "Prefetching %s chunk %s", file.Path,
|
||||
downloader.config.GetChunkIDFromHash(task.chunkHash))
|
||||
downloader.taskQueue <- *task
|
||||
downloader.operator.config.GetChunkIDFromHash(task.chunkHash))
|
||||
downloader.operator.DownloadAsync(task.chunkHash, i, false, func (chunk *Chunk, chunkIndex int) {
|
||||
downloader.completionChannel <- ChunkDownloadCompletion { chunk: chunk, chunkIndex: chunkIndex }
|
||||
})
|
||||
task.isDownloading = true
|
||||
downloader.numberOfDownloadingChunks++
|
||||
downloader.numberOfActiveChunks++
|
||||
}
|
||||
} else {
|
||||
LOG_DEBUG("DOWNLOAD_PREFETCH", "%s chunk %s is not needed", file.Path,
|
||||
downloader.config.GetChunkIDFromHash(task.chunkHash))
|
||||
downloader.operator.config.GetChunkIDFromHash(task.chunkHash))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -186,7 +137,7 @@ func (downloader *ChunkDownloader) Reclaim(chunkIndex int) {
|
||||
|
||||
for i := range downloader.completedTasks {
|
||||
if i < chunkIndex && downloader.taskList[i].chunk != nil {
|
||||
downloader.config.PutChunk(downloader.taskList[i].chunk)
|
||||
downloader.operator.config.PutChunk(downloader.taskList[i].chunk)
|
||||
downloader.taskList[i].chunk = nil
|
||||
delete(downloader.completedTasks, i)
|
||||
downloader.numberOfActiveChunks--
|
||||
@@ -222,8 +173,10 @@ func (downloader *ChunkDownloader) WaitForChunk(chunkIndex int) (chunk *Chunk) {
|
||||
// If we haven't started download the specified chunk, download it now
|
||||
if !downloader.taskList[chunkIndex].isDownloading {
|
||||
LOG_DEBUG("DOWNLOAD_FETCH", "Fetching chunk %s",
|
||||
downloader.config.GetChunkIDFromHash(downloader.taskList[chunkIndex].chunkHash))
|
||||
downloader.taskQueue <- downloader.taskList[chunkIndex]
|
||||
downloader.operator.config.GetChunkIDFromHash(downloader.taskList[chunkIndex].chunkHash))
|
||||
downloader.operator.DownloadAsync(downloader.taskList[chunkIndex].chunkHash, chunkIndex, false, func (chunk *Chunk, chunkIndex int) {
|
||||
downloader.completionChannel <- ChunkDownloadCompletion { chunk: chunk, chunkIndex: chunkIndex }
|
||||
})
|
||||
downloader.taskList[chunkIndex].isDownloading = true
|
||||
downloader.numberOfDownloadingChunks++
|
||||
downloader.numberOfActiveChunks++
|
||||
@@ -231,7 +184,7 @@ func (downloader *ChunkDownloader) WaitForChunk(chunkIndex int) (chunk *Chunk) {
|
||||
|
||||
// We also need to look ahead and prefetch other chunks as many as permitted by the number of threads
|
||||
for i := chunkIndex + 1; i < len(downloader.taskList); i++ {
|
||||
if downloader.numberOfActiveChunks >= downloader.threads {
|
||||
if downloader.numberOfActiveChunks >= downloader.operator.threads {
|
||||
break
|
||||
}
|
||||
task := &downloader.taskList[i]
|
||||
@@ -240,8 +193,10 @@ func (downloader *ChunkDownloader) WaitForChunk(chunkIndex int) (chunk *Chunk) {
|
||||
}
|
||||
|
||||
if !task.isDownloading {
|
||||
LOG_DEBUG("DOWNLOAD_PREFETCH", "Prefetching chunk %s", downloader.config.GetChunkIDFromHash(task.chunkHash))
|
||||
downloader.taskQueue <- *task
|
||||
LOG_DEBUG("DOWNLOAD_PREFETCH", "Prefetching chunk %s", downloader.operator.config.GetChunkIDFromHash(task.chunkHash))
|
||||
downloader.operator.DownloadAsync(task.chunkHash, task.chunkIndex, false, func (chunk *Chunk, chunkIndex int) {
|
||||
downloader.completionChannel <- ChunkDownloadCompletion { chunk: chunk, chunkIndex: chunkIndex }
|
||||
})
|
||||
task.isDownloading = true
|
||||
downloader.numberOfDownloadingChunks++
|
||||
downloader.numberOfActiveChunks++
|
||||
@@ -255,9 +210,6 @@ func (downloader *ChunkDownloader) WaitForChunk(chunkIndex int) (chunk *Chunk) {
|
||||
downloader.taskList[completion.chunkIndex].chunk = completion.chunk
|
||||
downloader.numberOfDownloadedChunks++
|
||||
downloader.numberOfDownloadingChunks--
|
||||
if completion.chunk.isBroken {
|
||||
downloader.NumberOfFailedChunks++
|
||||
}
|
||||
}
|
||||
return downloader.taskList[chunkIndex].chunk
|
||||
}
|
||||
@@ -281,13 +233,10 @@ func (downloader *ChunkDownloader) WaitForCompletion() {
|
||||
// Wait for a completion event first
|
||||
if downloader.numberOfActiveChunks > 0 {
|
||||
completion := <-downloader.completionChannel
|
||||
downloader.config.PutChunk(completion.chunk)
|
||||
downloader.operator.config.PutChunk(completion.chunk)
|
||||
downloader.numberOfActiveChunks--
|
||||
downloader.numberOfDownloadedChunks++
|
||||
downloader.numberOfDownloadingChunks--
|
||||
if completion.chunk.isBroken {
|
||||
downloader.NumberOfFailedChunks++
|
||||
}
|
||||
}
|
||||
|
||||
// Pass the tasks one by one to the download queue
|
||||
@@ -297,215 +246,13 @@ func (downloader *ChunkDownloader) WaitForCompletion() {
|
||||
downloader.lastChunkIndex++
|
||||
continue
|
||||
}
|
||||
downloader.taskQueue <- *task
|
||||
downloader.operator.DownloadAsync(task.chunkHash, task.chunkIndex, false, func (chunk *Chunk, chunkIndex int) {
|
||||
downloader.completionChannel <- ChunkDownloadCompletion { chunk: chunk, chunkIndex: chunkIndex }
|
||||
})
|
||||
task.isDownloading = true
|
||||
downloader.numberOfDownloadingChunks++
|
||||
downloader.numberOfActiveChunks++
|
||||
downloader.lastChunkIndex++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Stop terminates all downloading goroutines
|
||||
func (downloader *ChunkDownloader) Stop() {
|
||||
for downloader.numberOfDownloadingChunks > 0 {
|
||||
completion := <-downloader.completionChannel
|
||||
downloader.completedTasks[completion.chunkIndex] = true
|
||||
downloader.taskList[completion.chunkIndex].chunk = completion.chunk
|
||||
downloader.numberOfDownloadedChunks++
|
||||
downloader.numberOfDownloadingChunks--
|
||||
if completion.chunk.isBroken {
|
||||
downloader.NumberOfFailedChunks++
|
||||
}
|
||||
}
|
||||
|
||||
for i := range downloader.completedTasks {
|
||||
downloader.config.PutChunk(downloader.taskList[i].chunk)
|
||||
downloader.taskList[i].chunk = nil
|
||||
downloader.numberOfActiveChunks--
|
||||
}
|
||||
|
||||
for i := 0; i < downloader.threads; i++ {
|
||||
downloader.stopChannel <- true
|
||||
}
|
||||
}
|
||||
|
||||
// Download downloads a chunk from the storage.
|
||||
func (downloader *ChunkDownloader) Download(threadIndex int, task ChunkDownloadTask) bool {
|
||||
|
||||
cachedPath := ""
|
||||
chunk := downloader.config.GetChunk()
|
||||
chunkID := downloader.config.GetChunkIDFromHash(task.chunkHash)
|
||||
|
||||
if downloader.snapshotCache != nil && downloader.storage.IsCacheNeeded() {
|
||||
|
||||
var exist bool
|
||||
var err error
|
||||
|
||||
// Reset the chunk with a hasher -- we're reading from the cache where chunk are not encrypted or compressed
|
||||
chunk.Reset(true)
|
||||
|
||||
cachedPath, exist, _, err = downloader.snapshotCache.FindChunk(threadIndex, chunkID, false)
|
||||
if err != nil {
|
||||
LOG_WARN("DOWNLOAD_CACHE", "Failed to find the cache path for the chunk %s: %v", chunkID, err)
|
||||
} else if exist {
|
||||
err = downloader.snapshotCache.DownloadFile(0, cachedPath, chunk)
|
||||
if err != nil {
|
||||
LOG_WARN("DOWNLOAD_CACHE", "Failed to load the chunk %s from the snapshot cache: %v", chunkID, err)
|
||||
} else {
|
||||
actualChunkID := chunk.GetID()
|
||||
if actualChunkID != chunkID {
|
||||
LOG_WARN("DOWNLOAD_CACHE_CORRUPTED",
|
||||
"The chunk %s load from the snapshot cache has a hash id of %s", chunkID, actualChunkID)
|
||||
} else {
|
||||
LOG_DEBUG("CHUNK_CACHE", "Chunk %s has been loaded from the snapshot cache", chunkID)
|
||||
|
||||
downloader.completionChannel <- ChunkDownloadCompletion{chunk: chunk, chunkIndex: task.chunkIndex}
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reset the chunk without a hasher -- the downloaded content will be encrypted and/or compressed and the hasher
|
||||
// will be set up before the encryption
|
||||
chunk.Reset(false)
|
||||
|
||||
// If failures are allowed, complete the task properly
|
||||
completeFailedChunk := func(chunk *Chunk) {
|
||||
if downloader.allowFailures {
|
||||
chunk.isBroken = true
|
||||
downloader.completionChannel <- ChunkDownloadCompletion{chunk: chunk, chunkIndex: task.chunkIndex}
|
||||
}
|
||||
}
|
||||
|
||||
const MaxDownloadAttempts = 3
|
||||
for downloadAttempt := 0; ; downloadAttempt++ {
|
||||
|
||||
// Find the chunk by ID first.
|
||||
chunkPath, exist, _, err := downloader.storage.FindChunk(threadIndex, chunkID, false)
|
||||
if err != nil {
|
||||
completeFailedChunk(chunk)
|
||||
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CHUNK", "Failed to find the chunk %s: %v", chunkID, err)
|
||||
return false
|
||||
}
|
||||
|
||||
if !exist {
|
||||
// No chunk is found. Have to find it in the fossil pool again.
|
||||
fossilPath, exist, _, err := downloader.storage.FindChunk(threadIndex, chunkID, true)
|
||||
if err != nil {
|
||||
completeFailedChunk(chunk)
|
||||
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CHUNK", "Failed to find the chunk %s: %v", chunkID, err)
|
||||
return false
|
||||
}
|
||||
|
||||
if !exist {
|
||||
|
||||
retry := false
|
||||
|
||||
// Retry for Hubic or WebDAV as it may return 404 even when the chunk exists
|
||||
if _, ok := downloader.storage.(*HubicStorage); ok {
|
||||
retry = true
|
||||
}
|
||||
|
||||
if _, ok := downloader.storage.(*WebDAVStorage); ok {
|
||||
retry = true
|
||||
}
|
||||
|
||||
if retry && downloadAttempt < MaxDownloadAttempts {
|
||||
LOG_WARN("DOWNLOAD_RETRY", "Failed to find the chunk %s; retrying", chunkID)
|
||||
continue
|
||||
}
|
||||
|
||||
completeFailedChunk(chunk)
|
||||
// A chunk is not found. This is a serious error and hopefully it will never happen.
|
||||
if err != nil {
|
||||
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CHUNK", "Chunk %s can't be found: %v", chunkID, err)
|
||||
} else {
|
||||
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CHUNK", "Chunk %s can't be found", chunkID)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Don't try to resurrect the fossil as we did before. This is to avoid the potential read-after-rename
|
||||
// consistency issue. Instead, download the fossil directly; resurrection should be taken care of later.
|
||||
chunkPath = fossilPath
|
||||
LOG_WARN("DOWNLOAD_FOSSIL", "Chunk %s is a fossil", chunkID)
|
||||
}
|
||||
|
||||
err = downloader.storage.DownloadFile(threadIndex, chunkPath, chunk)
|
||||
if err != nil {
|
||||
_, isHubic := downloader.storage.(*HubicStorage)
|
||||
// Retry on EOF or if it is a Hubic backend as it may return 404 even when the chunk exists
|
||||
if (err == io.ErrUnexpectedEOF || isHubic) && downloadAttempt < MaxDownloadAttempts {
|
||||
LOG_WARN("DOWNLOAD_RETRY", "Failed to download the chunk %s: %v; retrying", chunkID, err)
|
||||
chunk.Reset(false)
|
||||
continue
|
||||
} else {
|
||||
completeFailedChunk(chunk)
|
||||
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CHUNK", "Failed to download the chunk %s: %v", chunkID, err)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
err = chunk.Decrypt(downloader.config.ChunkKey, task.chunkHash)
|
||||
if err != nil {
|
||||
if downloadAttempt < MaxDownloadAttempts {
|
||||
LOG_WARN("DOWNLOAD_RETRY", "Failed to decrypt the chunk %s: %v; retrying", chunkID, err)
|
||||
chunk.Reset(false)
|
||||
continue
|
||||
} else {
|
||||
completeFailedChunk(chunk)
|
||||
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_DECRYPT", "Failed to decrypt the chunk %s: %v", chunkID, err)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
actualChunkID := chunk.GetID()
|
||||
if actualChunkID != chunkID {
|
||||
if downloadAttempt < MaxDownloadAttempts {
|
||||
LOG_WARN("DOWNLOAD_RETRY", "The chunk %s has a hash id of %s; retrying", chunkID, actualChunkID)
|
||||
chunk.Reset(false)
|
||||
continue
|
||||
} else {
|
||||
completeFailedChunk(chunk)
|
||||
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CORRUPTED", "The chunk %s has a hash id of %s", chunkID, actualChunkID)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
if len(cachedPath) > 0 {
|
||||
// Save a copy to the local snapshot cache
|
||||
err := downloader.snapshotCache.UploadFile(threadIndex, cachedPath, chunk.GetBytes())
|
||||
if err != nil {
|
||||
LOG_WARN("DOWNLOAD_CACHE", "Failed to add the chunk %s to the snapshot cache: %v", chunkID, err)
|
||||
}
|
||||
}
|
||||
|
||||
downloadedChunkSize := atomic.AddInt64(&downloader.downloadedChunkSize, int64(chunk.GetLength()))
|
||||
|
||||
if (downloader.showStatistics || IsTracing()) && downloader.totalChunkSize > 0 {
|
||||
|
||||
now := time.Now().Unix()
|
||||
if now <= downloader.startTime {
|
||||
now = downloader.startTime + 1
|
||||
}
|
||||
speed := downloadedChunkSize / (now - downloader.startTime)
|
||||
remainingTime := int64(0)
|
||||
if speed > 0 {
|
||||
remainingTime = (downloader.totalChunkSize-downloadedChunkSize)/speed + 1
|
||||
}
|
||||
percentage := float32(downloadedChunkSize * 1000 / downloader.totalChunkSize)
|
||||
LOG_INFO("DOWNLOAD_PROGRESS", "Downloaded chunk %d size %d, %sB/s %s %.1f%%",
|
||||
task.chunkIndex+1, chunk.GetLength(),
|
||||
PrettySize(speed), PrettyTime(remainingTime), percentage/10)
|
||||
} else {
|
||||
LOG_DEBUG("CHUNK_DOWNLOAD", "Chunk %s has been downloaded", chunkID)
|
||||
}
|
||||
|
||||
downloader.completionChannel <- ChunkDownloadCompletion{chunk: chunk, chunkIndex: task.chunkIndex}
|
||||
return true
|
||||
}
|
||||
}
|
||||
@@ -25,15 +25,20 @@ type ChunkMaker struct {
|
||||
bufferSize int
|
||||
bufferStart int
|
||||
|
||||
minimumReached bool
|
||||
hashSum uint64
|
||||
chunk *Chunk
|
||||
|
||||
config *Config
|
||||
|
||||
hashOnly bool
|
||||
hashOnlyChunk *Chunk
|
||||
|
||||
}
|
||||
|
||||
// CreateChunkMaker creates a chunk maker. 'randomSeed' is used to generate the character-to-integer table needed by
|
||||
// buzhash.
|
||||
func CreateChunkMaker(config *Config, hashOnly bool) *ChunkMaker {
|
||||
func CreateFileChunkMaker(config *Config, hashOnly bool) *ChunkMaker {
|
||||
size := 1
|
||||
for size*2 <= config.AverageChunkSize {
|
||||
size *= 2
|
||||
@@ -67,6 +72,33 @@ func CreateChunkMaker(config *Config, hashOnly bool) *ChunkMaker {
|
||||
}
|
||||
|
||||
maker.buffer = make([]byte, 2*config.MinimumChunkSize)
|
||||
maker.bufferStart = 0
|
||||
maker.bufferSize = 0
|
||||
|
||||
maker.startNewChunk()
|
||||
|
||||
return maker
|
||||
}
|
||||
|
||||
// CreateMetaDataChunkMaker creates a chunk maker that always uses the variable-sized chunking algorithm
|
||||
func CreateMetaDataChunkMaker(config *Config, chunkSize int) *ChunkMaker {
|
||||
|
||||
size := 1
|
||||
for size*2 <= chunkSize {
|
||||
size *= 2
|
||||
}
|
||||
|
||||
if size != chunkSize {
|
||||
LOG_FATAL("CHUNK_SIZE", "Invalid metadata chunk size: %d is not a power of 2", chunkSize)
|
||||
return nil
|
||||
}
|
||||
|
||||
maker := CreateFileChunkMaker(config, false)
|
||||
maker.hashMask = uint64(chunkSize - 1)
|
||||
maker.maximumChunkSize = chunkSize * 4
|
||||
maker.minimumChunkSize = chunkSize / 4
|
||||
maker.bufferCapacity = 2 * maker.minimumChunkSize
|
||||
maker.buffer = make([]byte, maker.bufferCapacity)
|
||||
|
||||
return maker
|
||||
}
|
||||
@@ -90,62 +122,50 @@ func (maker *ChunkMaker) buzhashUpdate(sum uint64, out byte, in byte, length int
|
||||
return rotateLeftByOne(sum) ^ rotateLeft(maker.randomTable[out], uint(length)) ^ maker.randomTable[in]
|
||||
}
|
||||
|
||||
// ForEachChunk reads data from 'reader'. If EOF is encountered, it will call 'nextReader' to ask for next file. If
|
||||
// 'nextReader' returns false, it will process remaining data in the buffer and then quit. When a chunk is identified,
|
||||
// it will call 'endOfChunk' to return the chunk size and a boolean flag indicating if it is the last chunk.
|
||||
func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *Chunk, final bool),
|
||||
nextReader func(size int64, hash string) (io.Reader, bool)) {
|
||||
func (maker *ChunkMaker) startNewChunk() (chunk *Chunk) {
|
||||
maker.hashSum = 0
|
||||
maker.minimumReached = false
|
||||
if maker.hashOnly {
|
||||
maker.chunk = maker.hashOnlyChunk
|
||||
maker.chunk.Reset(true)
|
||||
} else {
|
||||
maker.chunk = maker.config.GetChunk()
|
||||
maker.chunk.Reset(true)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
maker.bufferStart = 0
|
||||
maker.bufferSize = 0
|
||||
|
||||
var minimumReached bool
|
||||
var hashSum uint64
|
||||
var chunk *Chunk
|
||||
func (maker *ChunkMaker) AddData(reader io.Reader, sendChunk func(*Chunk)) (int64, string) {
|
||||
|
||||
isEOF := false
|
||||
fileSize := int64(0)
|
||||
fileHasher := maker.config.NewFileHasher()
|
||||
|
||||
// Start a new chunk.
|
||||
startNewChunk := func() {
|
||||
hashSum = 0
|
||||
minimumReached = false
|
||||
if maker.hashOnly {
|
||||
chunk = maker.hashOnlyChunk
|
||||
chunk.Reset(true)
|
||||
} else {
|
||||
chunk = maker.config.GetChunk()
|
||||
chunk.Reset(true)
|
||||
}
|
||||
}
|
||||
|
||||
// Move data from the buffer to the chunk.
|
||||
fill := func(count int) {
|
||||
|
||||
if maker.bufferStart+count < maker.bufferCapacity {
|
||||
chunk.Write(maker.buffer[maker.bufferStart : maker.bufferStart+count])
|
||||
maker.chunk.Write(maker.buffer[maker.bufferStart : maker.bufferStart+count])
|
||||
maker.bufferStart += count
|
||||
maker.bufferSize -= count
|
||||
} else {
|
||||
chunk.Write(maker.buffer[maker.bufferStart:])
|
||||
chunk.Write(maker.buffer[:count-(maker.bufferCapacity-maker.bufferStart)])
|
||||
maker.chunk.Write(maker.buffer[maker.bufferStart:])
|
||||
maker.chunk.Write(maker.buffer[:count-(maker.bufferCapacity-maker.bufferStart)])
|
||||
maker.bufferStart = count - (maker.bufferCapacity - maker.bufferStart)
|
||||
maker.bufferSize -= count
|
||||
}
|
||||
}
|
||||
|
||||
startNewChunk()
|
||||
|
||||
var err error
|
||||
|
||||
isEOF := false
|
||||
|
||||
if maker.minimumChunkSize == maker.maximumChunkSize {
|
||||
|
||||
if maker.bufferCapacity < maker.minimumChunkSize {
|
||||
maker.buffer = make([]byte, maker.minimumChunkSize)
|
||||
if reader == nil {
|
||||
return 0, ""
|
||||
}
|
||||
|
||||
for {
|
||||
maker.startNewChunk()
|
||||
maker.bufferStart = 0
|
||||
for maker.bufferStart < maker.minimumChunkSize && !isEOF {
|
||||
count, err := reader.Read(maker.buffer[maker.bufferStart:maker.minimumChunkSize])
|
||||
@@ -153,7 +173,7 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C
|
||||
if err != nil {
|
||||
if err != io.EOF {
|
||||
LOG_ERROR("CHUNK_MAKER", "Failed to read %d bytes: %s", count, err.Error())
|
||||
return
|
||||
return 0, ""
|
||||
} else {
|
||||
isEOF = true
|
||||
}
|
||||
@@ -161,26 +181,15 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C
|
||||
maker.bufferStart += count
|
||||
}
|
||||
|
||||
fileHasher.Write(maker.buffer[:maker.bufferStart])
|
||||
fileSize += int64(maker.bufferStart)
|
||||
chunk.Write(maker.buffer[:maker.bufferStart])
|
||||
if maker.bufferStart > 0 {
|
||||
fileHasher.Write(maker.buffer[:maker.bufferStart])
|
||||
fileSize += int64(maker.bufferStart)
|
||||
maker.chunk.Write(maker.buffer[:maker.bufferStart])
|
||||
sendChunk(maker.chunk)
|
||||
}
|
||||
|
||||
if isEOF {
|
||||
var ok bool
|
||||
reader, ok = nextReader(fileSize, hex.EncodeToString(fileHasher.Sum(nil)))
|
||||
if !ok {
|
||||
endOfChunk(chunk, true)
|
||||
return
|
||||
} else {
|
||||
endOfChunk(chunk, false)
|
||||
startNewChunk()
|
||||
fileSize = 0
|
||||
fileHasher = maker.config.NewFileHasher()
|
||||
isEOF = false
|
||||
}
|
||||
} else {
|
||||
endOfChunk(chunk, false)
|
||||
startNewChunk()
|
||||
return fileSize, hex.EncodeToString(fileHasher.Sum(nil))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -189,7 +198,7 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C
|
||||
for {
|
||||
|
||||
// If the buffer still has some space left and EOF is not seen, read more data.
|
||||
for maker.bufferSize < maker.bufferCapacity && !isEOF {
|
||||
for maker.bufferSize < maker.bufferCapacity && !isEOF && reader != nil {
|
||||
start := maker.bufferStart + maker.bufferSize
|
||||
count := maker.bufferCapacity - start
|
||||
if start >= maker.bufferCapacity {
|
||||
@@ -201,7 +210,7 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C
|
||||
|
||||
if err != nil && err != io.EOF {
|
||||
LOG_ERROR("CHUNK_MAKER", "Failed to read %d bytes: %s", count, err.Error())
|
||||
return
|
||||
return 0, ""
|
||||
}
|
||||
|
||||
maker.bufferSize += count
|
||||
@@ -210,54 +219,55 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C
|
||||
|
||||
// if EOF is seen, try to switch to next file and continue
|
||||
if err == io.EOF {
|
||||
var ok bool
|
||||
reader, ok = nextReader(fileSize, hex.EncodeToString(fileHasher.Sum(nil)))
|
||||
if !ok {
|
||||
isEOF = true
|
||||
} else {
|
||||
fileSize = 0
|
||||
fileHasher = maker.config.NewFileHasher()
|
||||
isEOF = false
|
||||
}
|
||||
isEOF = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// No eough data to meet the minimum chunk size requirement, so just return as a chunk.
|
||||
if maker.bufferSize < maker.minimumChunkSize {
|
||||
fill(maker.bufferSize)
|
||||
endOfChunk(chunk, true)
|
||||
return
|
||||
if reader == nil {
|
||||
fill(maker.bufferSize)
|
||||
if maker.chunk.GetLength() > 0 {
|
||||
sendChunk(maker.chunk)
|
||||
}
|
||||
return 0, ""
|
||||
} else if isEOF {
|
||||
return fileSize, hex.EncodeToString(fileHasher.Sum(nil))
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Minimum chunk size has been reached. Calculate the buzhash for the minimum size chunk.
|
||||
if !minimumReached {
|
||||
if !maker.minimumReached {
|
||||
|
||||
bytes := maker.minimumChunkSize
|
||||
|
||||
if maker.bufferStart+bytes < maker.bufferCapacity {
|
||||
hashSum = maker.buzhashSum(0, maker.buffer[maker.bufferStart:maker.bufferStart+bytes])
|
||||
maker.hashSum = maker.buzhashSum(0, maker.buffer[maker.bufferStart:maker.bufferStart+bytes])
|
||||
} else {
|
||||
hashSum = maker.buzhashSum(0, maker.buffer[maker.bufferStart:])
|
||||
hashSum = maker.buzhashSum(hashSum,
|
||||
maker.hashSum = maker.buzhashSum(0, maker.buffer[maker.bufferStart:])
|
||||
maker.hashSum = maker.buzhashSum(maker.hashSum,
|
||||
maker.buffer[:bytes-(maker.bufferCapacity-maker.bufferStart)])
|
||||
}
|
||||
|
||||
if (hashSum & maker.hashMask) == 0 {
|
||||
if (maker.hashSum & maker.hashMask) == 0 {
|
||||
// This is a minimum size chunk
|
||||
fill(bytes)
|
||||
endOfChunk(chunk, false)
|
||||
startNewChunk()
|
||||
sendChunk(maker.chunk)
|
||||
maker.startNewChunk()
|
||||
continue
|
||||
}
|
||||
|
||||
minimumReached = true
|
||||
maker.minimumReached = true
|
||||
}
|
||||
|
||||
// Now check the buzhash of the data in the buffer, shifting one byte at a time.
|
||||
bytes := maker.bufferSize - maker.minimumChunkSize
|
||||
isEOC := false
|
||||
maxSize := maker.maximumChunkSize - chunk.GetLength()
|
||||
for i := 0; i < maker.bufferSize-maker.minimumChunkSize; i++ {
|
||||
isEOC := false // chunk boundary found
|
||||
maxSize := maker.maximumChunkSize - maker.chunk.GetLength()
|
||||
for i := 0; i < bytes; i++ {
|
||||
out := maker.bufferStart + i
|
||||
if out >= maker.bufferCapacity {
|
||||
out -= maker.bufferCapacity
|
||||
@@ -267,8 +277,8 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C
|
||||
in -= maker.bufferCapacity
|
||||
}
|
||||
|
||||
hashSum = maker.buzhashUpdate(hashSum, maker.buffer[out], maker.buffer[in], maker.minimumChunkSize)
|
||||
if (hashSum&maker.hashMask) == 0 || i == maxSize-maker.minimumChunkSize-1 {
|
||||
maker.hashSum = maker.buzhashUpdate(maker.hashSum, maker.buffer[out], maker.buffer[in], maker.minimumChunkSize)
|
||||
if (maker.hashSum&maker.hashMask) == 0 || i == maxSize-maker.minimumChunkSize-1 {
|
||||
// A chunk is completed.
|
||||
bytes = i + 1 + maker.minimumChunkSize
|
||||
isEOC = true
|
||||
@@ -277,21 +287,20 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C
|
||||
}
|
||||
|
||||
fill(bytes)
|
||||
|
||||
if isEOC {
|
||||
if isEOF && maker.bufferSize == 0 {
|
||||
endOfChunk(chunk, true)
|
||||
return
|
||||
sendChunk(maker.chunk)
|
||||
maker.startNewChunk()
|
||||
} else {
|
||||
if reader == nil {
|
||||
fill(maker.minimumChunkSize)
|
||||
sendChunk(maker.chunk)
|
||||
maker.startNewChunk()
|
||||
return 0, ""
|
||||
}
|
||||
endOfChunk(chunk, false)
|
||||
startNewChunk()
|
||||
continue
|
||||
}
|
||||
|
||||
if isEOF {
|
||||
fill(maker.bufferSize)
|
||||
endOfChunk(chunk, true)
|
||||
return
|
||||
return fileSize, hex.EncodeToString(fileHasher.Sum(nil))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,14 +7,12 @@ package duplicacy
|
||||
import (
|
||||
"bytes"
|
||||
crypto_rand "crypto/rand"
|
||||
"io"
|
||||
"math/rand"
|
||||
"sort"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func splitIntoChunks(content []byte, n, averageChunkSize, maxChunkSize, minChunkSize,
|
||||
bufferCapacity int) ([]string, int) {
|
||||
func splitIntoChunks(content []byte, n, averageChunkSize, maxChunkSize, minChunkSize int) ([]string, int) {
|
||||
|
||||
config := CreateConfig()
|
||||
|
||||
@@ -27,14 +25,12 @@ func splitIntoChunks(content []byte, n, averageChunkSize, maxChunkSize, minChunk
|
||||
config.HashKey = DEFAULT_KEY
|
||||
config.IDKey = DEFAULT_KEY
|
||||
|
||||
maker := CreateChunkMaker(config, false)
|
||||
maker := CreateFileChunkMaker(config, false)
|
||||
|
||||
var chunks []string
|
||||
totalChunkSize := 0
|
||||
totalFileSize := int64(0)
|
||||
|
||||
//LOG_INFO("CHUNK_SPLIT", "bufferCapacity: %d", bufferCapacity)
|
||||
|
||||
buffers := make([]*bytes.Buffer, n)
|
||||
sizes := make([]int, n)
|
||||
sizes[0] = 0
|
||||
@@ -42,7 +38,7 @@ func splitIntoChunks(content []byte, n, averageChunkSize, maxChunkSize, minChunk
|
||||
same := true
|
||||
for same {
|
||||
same = false
|
||||
sizes[i] = rand.Int() % n
|
||||
sizes[i] = rand.Int() % len(content)
|
||||
for j := 0; j < i; j++ {
|
||||
if sizes[i] == sizes[j] {
|
||||
same = true
|
||||
@@ -59,22 +55,17 @@ func splitIntoChunks(content []byte, n, averageChunkSize, maxChunkSize, minChunk
|
||||
}
|
||||
buffers[n-1] = bytes.NewBuffer(content[sizes[n-1]:])
|
||||
|
||||
i := 0
|
||||
chunkFunc := func(chunk *Chunk) {
|
||||
chunks = append(chunks, chunk.GetHash())
|
||||
totalChunkSize += chunk.GetLength()
|
||||
config.PutChunk(chunk)
|
||||
}
|
||||
|
||||
maker.ForEachChunk(buffers[0],
|
||||
func(chunk *Chunk, final bool) {
|
||||
//LOG_INFO("CHUNK_SPLIT", "i: %d, chunk: %s, size: %d", i, chunk.GetHash(), size)
|
||||
chunks = append(chunks, chunk.GetHash())
|
||||
totalChunkSize += chunk.GetLength()
|
||||
},
|
||||
func(size int64, hash string) (io.Reader, bool) {
|
||||
totalFileSize += size
|
||||
i++
|
||||
if i >= len(buffers) {
|
||||
return nil, false
|
||||
}
|
||||
return buffers[i], true
|
||||
})
|
||||
for _, buffer := range buffers {
|
||||
fileSize, _ := maker.AddData(buffer, chunkFunc)
|
||||
totalFileSize += fileSize
|
||||
}
|
||||
maker.AddData(nil, chunkFunc)
|
||||
|
||||
if totalFileSize != int64(totalChunkSize) {
|
||||
LOG_ERROR("CHUNK_SPLIT", "total chunk size: %d, total file size: %d", totalChunkSize, totalFileSize)
|
||||
@@ -96,35 +87,28 @@ func TestChunkMaker(t *testing.T) {
|
||||
continue
|
||||
}
|
||||
|
||||
chunkArray1, totalSize1 := splitIntoChunks(content, 10, 32, 64, 16, 32)
|
||||
chunkArray1, totalSize1 := splitIntoChunks(content, 10, 32, 64, 16)
|
||||
|
||||
capacities := [...]int{32, 33, 34, 61, 62, 63, 64, 65, 66, 126, 127, 128, 129, 130,
|
||||
255, 256, 257, 511, 512, 513, 1023, 1024, 1025,
|
||||
32, 48, 64, 128, 256, 512, 1024, 2048}
|
||||
|
||||
//capacities := [...]int { 32 }
|
||||
for _, n := range [...]int{6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} {
|
||||
chunkArray2, totalSize2 := splitIntoChunks(content, n, 32, 64, 16)
|
||||
|
||||
for _, capacity := range capacities {
|
||||
if totalSize1 != totalSize2 {
|
||||
t.Errorf("[size %d] total size is %d instead of %d",
|
||||
size, totalSize2, totalSize1)
|
||||
}
|
||||
|
||||
for _, n := range [...]int{6, 7, 8, 9, 10} {
|
||||
chunkArray2, totalSize2 := splitIntoChunks(content, n, 32, 64, 16, capacity)
|
||||
|
||||
if totalSize1 != totalSize2 {
|
||||
t.Errorf("[size %d, capacity %d] total size is %d instead of %d",
|
||||
size, capacity, totalSize2, totalSize1)
|
||||
}
|
||||
|
||||
if len(chunkArray1) != len(chunkArray2) {
|
||||
t.Errorf("[size %d, capacity %d] number of chunks is %d instead of %d",
|
||||
size, capacity, len(chunkArray2), len(chunkArray1))
|
||||
} else {
|
||||
for i := 0; i < len(chunkArray1); i++ {
|
||||
if chunkArray1[i] != chunkArray2[i] {
|
||||
t.Errorf("[size %d, capacity %d, chunk %d] chunk is different", size, capacity, i)
|
||||
}
|
||||
if len(chunkArray1) != len(chunkArray2) {
|
||||
t.Errorf("[size %d] number of chunks is %d instead of %d",
|
||||
size, len(chunkArray2), len(chunkArray1))
|
||||
} else {
|
||||
for i := 0; i < len(chunkArray1); i++ {
|
||||
if chunkArray1[i] != chunkArray2[i] {
|
||||
t.Errorf("[size %d, chunk %d] chunk is different", size, i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
package duplicacy
|
||||
|
||||
import (
|
||||
"io"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
@@ -12,42 +13,69 @@ import (
|
||||
|
||||
// These are operations that ChunkOperator will perform.
|
||||
const (
|
||||
ChunkOperationFind = 0
|
||||
ChunkOperationDelete = 1
|
||||
ChunkOperationFossilize = 2
|
||||
ChunkOperationResurrect = 3
|
||||
ChunkOperationDownload = 0
|
||||
ChunkOperationUpload = 1
|
||||
ChunkOperationDelete = 2
|
||||
ChunkOperationFossilize = 3
|
||||
ChunkOperationResurrect = 4
|
||||
ChunkOperationFind = 5
|
||||
)
|
||||
|
||||
// ChunkOperatorTask is used to pass parameters for different kinds of chunk operations.
|
||||
type ChunkOperatorTask struct {
|
||||
operation int // The type of operation
|
||||
chunkID string // The chunk id
|
||||
filePath string // The path of the chunk file; it may be empty
|
||||
// ChunkTask is used to pass parameters for different kinds of chunk operations.
|
||||
type ChunkTask struct {
|
||||
operation int // The type of operation
|
||||
chunkID string // The chunk id
|
||||
chunkHash string // The chunk hash
|
||||
chunkIndex int // The chunk index
|
||||
filePath string // The path of the chunk file; it may be empty
|
||||
|
||||
isMetadata bool
|
||||
chunk *Chunk
|
||||
|
||||
completionFunc func(chunk *Chunk, chunkIndex int)
|
||||
}
|
||||
|
||||
// ChunkOperator is capable of performing multi-threaded operations on chunks.
|
||||
type ChunkOperator struct {
|
||||
numberOfActiveTasks int64 // The number of chunks that are being operated on
|
||||
storage Storage // This storage
|
||||
threads int // Number of threads
|
||||
taskQueue chan ChunkOperatorTask // Operating goroutines are waiting on this channel for input
|
||||
stopChannel chan bool // Used to stop all the goroutines
|
||||
config *Config // Associated config
|
||||
storage Storage // This storage
|
||||
snapshotCache *FileStorage
|
||||
showStatistics bool
|
||||
threads int // Number of threads
|
||||
taskQueue chan ChunkTask // Operating goroutines are waiting on this channel for input
|
||||
stopChannel chan bool // Used to stop all the goroutines
|
||||
|
||||
fossils []string // For fossilize operation, the paths of the fossils are stored in this slice
|
||||
fossilsLock *sync.Mutex // The lock for 'fossils'
|
||||
numberOfActiveTasks int64 // The number of chunks that are being operated on
|
||||
|
||||
fossils []string // For fossilize operation, the paths of the fossils are stored in this slice
|
||||
collectionLock *sync.Mutex // The lock for accessing 'fossils'
|
||||
|
||||
startTime int64 // The time it starts downloading
|
||||
totalChunkSize int64 // Total chunk size
|
||||
downloadedChunkSize int64 // Downloaded chunk size
|
||||
|
||||
allowFailures bool // Whether to fail on download error, or continue
|
||||
NumberOfFailedChunks int64 // The number of chunks that can't be downloaded
|
||||
|
||||
UploadCompletionFunc func(chunk *Chunk, chunkIndex int, inCache bool, chunkSize int, uploadSize int)
|
||||
}
|
||||
|
||||
// CreateChunkOperator creates a new ChunkOperator.
|
||||
func CreateChunkOperator(storage Storage, threads int) *ChunkOperator {
|
||||
func CreateChunkOperator(config *Config, storage Storage, snapshotCache *FileStorage, showStatistics bool, threads int, allowFailures bool) *ChunkOperator {
|
||||
|
||||
operator := &ChunkOperator{
|
||||
config: config,
|
||||
storage: storage,
|
||||
snapshotCache: snapshotCache,
|
||||
showStatistics: showStatistics,
|
||||
threads: threads,
|
||||
|
||||
taskQueue: make(chan ChunkOperatorTask, threads*4),
|
||||
taskQueue: make(chan ChunkTask, threads),
|
||||
stopChannel: make(chan bool),
|
||||
|
||||
fossils: make([]string, 0),
|
||||
fossilsLock: &sync.Mutex{},
|
||||
collectionLock: &sync.Mutex{},
|
||||
|
||||
allowFailures: allowFailures,
|
||||
}
|
||||
|
||||
// Start the operator goroutines
|
||||
@@ -84,38 +112,78 @@ func (operator *ChunkOperator) Stop() {
|
||||
atomic.AddInt64(&operator.numberOfActiveTasks, int64(-1))
|
||||
}
|
||||
|
||||
func (operator *ChunkOperator) AddTask(operation int, chunkID string, filePath string) {
|
||||
func (operator *ChunkOperator) WaitForCompletion() {
|
||||
|
||||
task := ChunkOperatorTask{
|
||||
operation: operation,
|
||||
chunkID: chunkID,
|
||||
filePath: filePath,
|
||||
for atomic.LoadInt64(&operator.numberOfActiveTasks) > 0 {
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
operator.taskQueue <- task
|
||||
atomic.AddInt64(&operator.numberOfActiveTasks, int64(1))
|
||||
}
|
||||
|
||||
func (operator *ChunkOperator) Find(chunkID string) {
|
||||
operator.AddTask(ChunkOperationFind, chunkID, "")
|
||||
func (operator *ChunkOperator) AddTask(operation int, chunkID string, chunkHash string, filePath string, chunkIndex int, chunk *Chunk, isMetadata bool, completionFunc func(*Chunk, int)) {
|
||||
|
||||
task := ChunkTask {
|
||||
operation: operation,
|
||||
chunkID: chunkID,
|
||||
chunkHash: chunkHash,
|
||||
chunkIndex: chunkIndex,
|
||||
filePath: filePath,
|
||||
chunk: chunk,
|
||||
isMetadata: isMetadata,
|
||||
completionFunc: completionFunc,
|
||||
}
|
||||
|
||||
operator.taskQueue <- task
|
||||
atomic.AddInt64(&operator.numberOfActiveTasks, int64(1))
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (operator *ChunkOperator) Download(chunkHash string, chunkIndex int, isMetadata bool) *Chunk {
|
||||
chunkID := operator.config.GetChunkIDFromHash(chunkHash)
|
||||
completionChannel := make(chan *Chunk)
|
||||
completionFunc := func(chunk *Chunk, chunkIndex int) {
|
||||
completionChannel <- chunk
|
||||
}
|
||||
operator.AddTask(ChunkOperationDownload, chunkID, chunkHash, "", chunkIndex, nil, isMetadata, completionFunc)
|
||||
return <- completionChannel
|
||||
}
|
||||
|
||||
func (operator *ChunkOperator) DownloadAsync(chunkHash string, chunkIndex int, isMetadata bool, completionFunc func(*Chunk, int)) {
|
||||
chunkID := operator.config.GetChunkIDFromHash(chunkHash)
|
||||
operator.AddTask(ChunkOperationDownload, chunkID, chunkHash, "", chunkIndex, nil, isMetadata, completionFunc)
|
||||
}
|
||||
|
||||
func (operator *ChunkOperator) Upload(chunk *Chunk, chunkIndex int, isMetadata bool) {
|
||||
chunkHash := chunk.GetHash()
|
||||
chunkID := operator.config.GetChunkIDFromHash(chunkHash)
|
||||
operator.AddTask(ChunkOperationUpload, chunkID, chunkHash, "", chunkIndex, chunk, isMetadata, nil)
|
||||
}
|
||||
|
||||
func (operator *ChunkOperator) Delete(chunkID string, filePath string) {
|
||||
operator.AddTask(ChunkOperationDelete, chunkID, filePath)
|
||||
operator.AddTask(ChunkOperationDelete, chunkID, "", filePath, 0, nil, false, nil)
|
||||
}
|
||||
|
||||
func (operator *ChunkOperator) Fossilize(chunkID string, filePath string) {
|
||||
operator.AddTask(ChunkOperationFossilize, chunkID, filePath)
|
||||
operator.AddTask(ChunkOperationFossilize, chunkID, "", filePath, 0, nil, false, nil)
|
||||
}
|
||||
|
||||
func (operator *ChunkOperator) Resurrect(chunkID string, filePath string) {
|
||||
operator.AddTask(ChunkOperationResurrect, chunkID, filePath)
|
||||
operator.AddTask(ChunkOperationResurrect, chunkID, "", filePath, 0, nil, false, nil)
|
||||
}
|
||||
|
||||
func (operator *ChunkOperator) Run(threadIndex int, task ChunkOperatorTask) {
|
||||
func (operator *ChunkOperator) Run(threadIndex int, task ChunkTask) {
|
||||
defer func() {
|
||||
atomic.AddInt64(&operator.numberOfActiveTasks, int64(-1))
|
||||
}()
|
||||
|
||||
if task.operation == ChunkOperationDownload {
|
||||
operator.DownloadChunk(threadIndex, task)
|
||||
return
|
||||
} else if task.operation == ChunkOperationUpload {
|
||||
operator.UploadChunk(threadIndex, task)
|
||||
return
|
||||
}
|
||||
|
||||
// task.filePath may be empty. If so, find the chunk first.
|
||||
if task.operation == ChunkOperationDelete || task.operation == ChunkOperationFossilize {
|
||||
if task.filePath == "" {
|
||||
@@ -132,9 +200,9 @@ func (operator *ChunkOperator) Run(threadIndex int, task ChunkOperatorTask) {
|
||||
fossilPath, exist, _, _ := operator.storage.FindChunk(threadIndex, task.chunkID, true)
|
||||
if exist {
|
||||
LOG_WARN("CHUNK_FOSSILIZE", "Chunk %s is already a fossil", task.chunkID)
|
||||
operator.fossilsLock.Lock()
|
||||
operator.collectionLock.Lock()
|
||||
operator.fossils = append(operator.fossils, fossilPath)
|
||||
operator.fossilsLock.Unlock()
|
||||
operator.collectionLock.Unlock()
|
||||
} else {
|
||||
LOG_ERROR("CHUNK_FIND", "Chunk %s does not exist in the storage", task.chunkID)
|
||||
}
|
||||
@@ -175,17 +243,17 @@ func (operator *ChunkOperator) Run(threadIndex int, task ChunkOperatorTask) {
|
||||
if err == nil {
|
||||
LOG_TRACE("CHUNK_DELETE", "Deleted chunk file %s as the fossil already exists", task.chunkID)
|
||||
}
|
||||
operator.fossilsLock.Lock()
|
||||
operator.collectionLock.Lock()
|
||||
operator.fossils = append(operator.fossils, fossilPath)
|
||||
operator.fossilsLock.Unlock()
|
||||
operator.collectionLock.Unlock()
|
||||
} else {
|
||||
LOG_ERROR("CHUNK_DELETE", "Failed to fossilize the chunk %s: %v", task.chunkID, err)
|
||||
}
|
||||
} else {
|
||||
LOG_TRACE("CHUNK_FOSSILIZE", "The chunk %s has been marked as a fossil", task.chunkID)
|
||||
operator.fossilsLock.Lock()
|
||||
operator.collectionLock.Lock()
|
||||
operator.fossils = append(operator.fossils, fossilPath)
|
||||
operator.fossilsLock.Unlock()
|
||||
operator.collectionLock.Unlock()
|
||||
}
|
||||
} else if task.operation == ChunkOperationResurrect {
|
||||
chunkPath, exist, _, err := operator.storage.FindChunk(threadIndex, task.chunkID, false)
|
||||
@@ -207,3 +275,267 @@ func (operator *ChunkOperator) Run(threadIndex int, task ChunkOperatorTask) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Download downloads a chunk from the storage.
|
||||
func (operator *ChunkOperator) DownloadChunk(threadIndex int, task ChunkTask) {
|
||||
|
||||
cachedPath := ""
|
||||
chunk := operator.config.GetChunk()
|
||||
chunk.isMetadata = task.isMetadata
|
||||
chunkID := task.chunkID
|
||||
|
||||
defer func() {
|
||||
if chunk != nil {
|
||||
operator.config.PutChunk(chunk)
|
||||
}
|
||||
} ()
|
||||
|
||||
if task.isMetadata && operator.snapshotCache != nil {
|
||||
|
||||
var exist bool
|
||||
var err error
|
||||
|
||||
// Reset the chunk with a hasher -- we're reading from the cache where chunk are not encrypted or compressed
|
||||
chunk.Reset(true)
|
||||
|
||||
cachedPath, exist, _, err = operator.snapshotCache.FindChunk(threadIndex, chunkID, false)
|
||||
if err != nil {
|
||||
LOG_WARN("DOWNLOAD_CACHE", "Failed to find the cache path for the chunk %s: %v", chunkID, err)
|
||||
} else if exist {
|
||||
err = operator.snapshotCache.DownloadFile(0, cachedPath, chunk)
|
||||
if err != nil {
|
||||
LOG_WARN("DOWNLOAD_CACHE", "Failed to load the chunk %s from the snapshot cache: %v", chunkID, err)
|
||||
} else {
|
||||
actualChunkID := chunk.GetID()
|
||||
if actualChunkID != chunkID {
|
||||
LOG_WARN("DOWNLOAD_CACHE_CORRUPTED",
|
||||
"The chunk %s load from the snapshot cache has a hash id of %s", chunkID, actualChunkID)
|
||||
} else {
|
||||
LOG_DEBUG("CHUNK_CACHE", "Chunk %s has been loaded from the snapshot cache", chunkID)
|
||||
|
||||
task.completionFunc(chunk, task.chunkIndex)
|
||||
chunk = nil
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reset the chunk without a hasher -- the downloaded content will be encrypted and/or compressed and the hasher
|
||||
// will be set up before the encryption
|
||||
chunk.Reset(false)
|
||||
chunk.isMetadata = task.isMetadata
|
||||
|
||||
// If failures are allowed, complete the task properly
|
||||
completeFailedChunk := func() {
|
||||
|
||||
atomic.AddInt64(&operator.NumberOfFailedChunks, 1)
|
||||
if operator.allowFailures {
|
||||
task.completionFunc(chunk, task.chunkIndex)
|
||||
}
|
||||
}
|
||||
|
||||
const MaxDownloadAttempts = 3
|
||||
for downloadAttempt := 0; ; downloadAttempt++ {
|
||||
|
||||
// Find the chunk by ID first.
|
||||
chunkPath, exist, _, err := operator.storage.FindChunk(threadIndex, chunkID, false)
|
||||
if err != nil {
|
||||
completeFailedChunk()
|
||||
LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Failed to find the chunk %s: %v", chunkID, err)
|
||||
return
|
||||
}
|
||||
|
||||
if !exist {
|
||||
// No chunk is found. Have to find it in the fossil pool again.
|
||||
fossilPath, exist, _, err := operator.storage.FindChunk(threadIndex, chunkID, true)
|
||||
if err != nil {
|
||||
completeFailedChunk()
|
||||
LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Failed to find the chunk %s: %v", chunkID, err)
|
||||
return
|
||||
}
|
||||
|
||||
if !exist {
|
||||
|
||||
retry := false
|
||||
|
||||
// Retry for Hubic or WebDAV as it may return 404 even when the chunk exists
|
||||
if _, ok := operator.storage.(*HubicStorage); ok {
|
||||
retry = true
|
||||
}
|
||||
|
||||
if _, ok := operator.storage.(*WebDAVStorage); ok {
|
||||
retry = true
|
||||
}
|
||||
|
||||
if retry && downloadAttempt < MaxDownloadAttempts {
|
||||
LOG_WARN("DOWNLOAD_RETRY", "Failed to find the chunk %s; retrying", chunkID)
|
||||
continue
|
||||
}
|
||||
|
||||
// A chunk is not found. This is a serious error and hopefully it will never happen.
|
||||
completeFailedChunk()
|
||||
if err != nil {
|
||||
LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Chunk %s can't be found: %v", chunkID, err)
|
||||
} else {
|
||||
LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Chunk %s can't be found", chunkID)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// We can't download the fossil directly. We have to turn it back into a regular chunk and try
|
||||
// downloading again.
|
||||
err = operator.storage.MoveFile(threadIndex, fossilPath, chunkPath)
|
||||
if err != nil {
|
||||
completeFailedChunk()
|
||||
LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Failed to resurrect chunk %s: %v", chunkID, err)
|
||||
return
|
||||
}
|
||||
|
||||
LOG_WARN("DOWNLOAD_RESURRECT", "Fossil %s has been resurrected", chunkID)
|
||||
continue
|
||||
}
|
||||
|
||||
err = operator.storage.DownloadFile(threadIndex, chunkPath, chunk)
|
||||
if err != nil {
|
||||
_, isHubic := operator.storage.(*HubicStorage)
|
||||
// Retry on EOF or if it is a Hubic backend as it may return 404 even when the chunk exists
|
||||
if (err == io.ErrUnexpectedEOF || isHubic) && downloadAttempt < MaxDownloadAttempts {
|
||||
LOG_WARN("DOWNLOAD_RETRY", "Failed to download the chunk %s: %v; retrying", chunkID, err)
|
||||
chunk.Reset(false)
|
||||
chunk.isMetadata = task.isMetadata
|
||||
continue
|
||||
} else {
|
||||
completeFailedChunk()
|
||||
LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Failed to download the chunk %s: %v", chunkID, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
err = chunk.Decrypt(operator.config.ChunkKey, task.chunkHash)
|
||||
if err != nil {
|
||||
if downloadAttempt < MaxDownloadAttempts {
|
||||
LOG_WARN("DOWNLOAD_RETRY", "Failed to decrypt the chunk %s: %v; retrying", chunkID, err)
|
||||
chunk.Reset(false)
|
||||
chunk.isMetadata = task.isMetadata
|
||||
continue
|
||||
} else {
|
||||
completeFailedChunk()
|
||||
LOG_WERROR(operator.allowFailures, "DOWNLOAD_DECRYPT", "Failed to decrypt the chunk %s: %v", chunkID, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
actualChunkID := chunk.GetID()
|
||||
if actualChunkID != chunkID {
|
||||
if downloadAttempt < MaxDownloadAttempts {
|
||||
LOG_WARN("DOWNLOAD_RETRY", "The chunk %s has a hash id of %s; retrying", chunkID, actualChunkID)
|
||||
chunk.Reset(false)
|
||||
chunk.isMetadata = task.isMetadata
|
||||
continue
|
||||
} else {
|
||||
completeFailedChunk()
|
||||
LOG_WERROR(operator.allowFailures, "DOWNLOAD_CORRUPTED", "The chunk %s has a hash id of %s", chunkID, actualChunkID)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
if chunk.isMetadata && len(cachedPath) > 0 {
|
||||
// Save a copy to the local snapshot cache
|
||||
err := operator.snapshotCache.UploadFile(threadIndex, cachedPath, chunk.GetBytes())
|
||||
if err != nil {
|
||||
LOG_WARN("DOWNLOAD_CACHE", "Failed to add the chunk %s to the snapshot cache: %v", chunkID, err)
|
||||
}
|
||||
}
|
||||
|
||||
downloadedChunkSize := atomic.AddInt64(&operator.downloadedChunkSize, int64(chunk.GetLength()))
|
||||
|
||||
if (operator.showStatistics || IsTracing()) && operator.totalChunkSize > 0 {
|
||||
|
||||
now := time.Now().Unix()
|
||||
if now <= operator.startTime {
|
||||
now = operator.startTime + 1
|
||||
}
|
||||
speed := downloadedChunkSize / (now - operator.startTime)
|
||||
remainingTime := int64(0)
|
||||
if speed > 0 {
|
||||
remainingTime = (operator.totalChunkSize-downloadedChunkSize)/speed + 1
|
||||
}
|
||||
percentage := float32(downloadedChunkSize * 1000 / operator.totalChunkSize)
|
||||
LOG_INFO("DOWNLOAD_PROGRESS", "Downloaded chunk %d size %d, %sB/s %s %.1f%%",
|
||||
task.chunkIndex+1, chunk.GetLength(),
|
||||
PrettySize(speed), PrettyTime(remainingTime), percentage/10)
|
||||
} else {
|
||||
LOG_DEBUG("CHUNK_DOWNLOAD", "Chunk %s has been downloaded", chunkID)
|
||||
}
|
||||
|
||||
task.completionFunc(chunk, task.chunkIndex)
|
||||
chunk = nil
|
||||
return
|
||||
}
|
||||
|
||||
// UploadChunk is called by the task goroutines to perform the actual uploading
|
||||
func (operator *ChunkOperator) UploadChunk(threadIndex int, task ChunkTask) bool {
|
||||
|
||||
chunk := task.chunk
|
||||
chunkID := task.chunkID
|
||||
chunkSize := chunk.GetLength()
|
||||
|
||||
// For a snapshot chunk, verify that its chunk id is correct
|
||||
if task.isMetadata {
|
||||
chunk.VerifyID()
|
||||
}
|
||||
|
||||
if task.isMetadata && operator.storage.IsCacheNeeded() {
|
||||
// Save a copy to the local snapshot.
|
||||
chunkPath, exist, _, err := operator.snapshotCache.FindChunk(threadIndex, chunkID, false)
|
||||
if err != nil {
|
||||
LOG_WARN("UPLOAD_CACHE", "Failed to find the cache path for the chunk %s: %v", chunkID, err)
|
||||
} else if exist {
|
||||
LOG_DEBUG("CHUNK_CACHE", "Chunk %s already exists in the snapshot cache", chunkID)
|
||||
} else if err = operator.snapshotCache.UploadFile(threadIndex, chunkPath, chunk.GetBytes()); err != nil {
|
||||
LOG_WARN("UPLOAD_CACHE", "Failed to save the chunk %s to the snapshot cache: %v", chunkID, err)
|
||||
} else {
|
||||
LOG_DEBUG("CHUNK_CACHE", "Chunk %s has been saved to the snapshot cache", chunkID)
|
||||
}
|
||||
}
|
||||
|
||||
// This returns the path the chunk file should be at.
|
||||
chunkPath, exist, _, err := operator.storage.FindChunk(threadIndex, chunkID, false)
|
||||
if err != nil {
|
||||
LOG_ERROR("UPLOAD_CHUNK", "Failed to find the path for the chunk %s: %v", chunkID, err)
|
||||
return false
|
||||
}
|
||||
|
||||
if exist {
|
||||
// Chunk deduplication by name in effect here.
|
||||
LOG_DEBUG("CHUNK_DUPLICATE", "Chunk %s already exists", chunkID)
|
||||
|
||||
operator.UploadCompletionFunc(chunk, task.chunkIndex, false, chunkSize, 0)
|
||||
return false
|
||||
}
|
||||
|
||||
// Encrypt the chunk only after we know that it must be uploaded.
|
||||
err = chunk.Encrypt(operator.config.ChunkKey, chunk.GetHash(), task.isMetadata)
|
||||
if err != nil {
|
||||
LOG_ERROR("UPLOAD_CHUNK", "Failed to encrypt the chunk %s: %v", chunkID, err)
|
||||
return false
|
||||
}
|
||||
|
||||
if !operator.config.dryRun {
|
||||
err = operator.storage.UploadFile(threadIndex, chunkPath, chunk.GetBytes())
|
||||
if err != nil {
|
||||
LOG_ERROR("UPLOAD_CHUNK", "Failed to upload the chunk %s: %v", chunkID, err)
|
||||
return false
|
||||
}
|
||||
LOG_DEBUG("CHUNK_UPLOAD", "Chunk %s has been uploaded", chunkID)
|
||||
} else {
|
||||
LOG_DEBUG("CHUNK_UPLOAD", "Uploading was skipped for chunk %s", chunkID)
|
||||
}
|
||||
|
||||
operator.UploadCompletionFunc(chunk, task.chunkIndex, false, chunkSize, chunk.GetLength())
|
||||
return true
|
||||
}
|
||||
@@ -15,11 +15,11 @@ import (
|
||||
"math/rand"
|
||||
)
|
||||
|
||||
func TestUploaderAndDownloader(t *testing.T) {
|
||||
func TestChunkOperator(t *testing.T) {
|
||||
|
||||
rand.Seed(time.Now().UnixNano())
|
||||
setTestingT(t)
|
||||
SetLoggingLevel(INFO)
|
||||
SetLoggingLevel(DEBUG)
|
||||
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
@@ -87,35 +87,25 @@ func TestUploaderAndDownloader(t *testing.T) {
|
||||
totalFileSize += chunk.GetLength()
|
||||
}
|
||||
|
||||
completionFunc := func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) {
|
||||
chunkOperator := CreateChunkOperator(config, storage, nil, false, *testThreads, false)
|
||||
chunkOperator.UploadCompletionFunc = func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) {
|
||||
t.Logf("Chunk %s size %d (%d/%d) uploaded", chunk.GetID(), chunkSize, chunkIndex, len(chunks))
|
||||
}
|
||||
|
||||
chunkUploader := CreateChunkUploader(config, storage, nil, *testThreads, nil)
|
||||
chunkUploader.completionFunc = completionFunc
|
||||
chunkUploader.Start()
|
||||
|
||||
for i, chunk := range chunks {
|
||||
chunkUploader.StartChunk(chunk, i)
|
||||
chunkOperator.Upload(chunk, i, false)
|
||||
}
|
||||
|
||||
chunkUploader.Stop()
|
||||
|
||||
chunkDownloader := CreateChunkDownloader(config, storage, nil, true, *testThreads, false)
|
||||
chunkDownloader.totalChunkSize = int64(totalFileSize)
|
||||
|
||||
for _, chunk := range chunks {
|
||||
chunkDownloader.AddChunk(chunk.GetHash())
|
||||
}
|
||||
chunkOperator.WaitForCompletion()
|
||||
|
||||
for i, chunk := range chunks {
|
||||
downloaded := chunkDownloader.WaitForChunk(i)
|
||||
downloaded := chunkOperator.Download(chunk.GetHash(), i, false)
|
||||
if downloaded.GetID() != chunk.GetID() {
|
||||
t.Errorf("Uploaded: %s, downloaded: %s", chunk.GetID(), downloaded.GetID())
|
||||
}
|
||||
}
|
||||
|
||||
chunkDownloader.Stop()
|
||||
chunkOperator.Stop()
|
||||
|
||||
for _, file := range listChunks(storage) {
|
||||
err = storage.DeleteFile(0, "chunks/"+file)
|
||||
@@ -1,151 +0,0 @@
|
||||
// Copyright (c) Acrosync LLC. All rights reserved.
|
||||
// Free for personal use and commercial trial
|
||||
// Commercial use requires per-user licenses available from https://duplicacy.com
|
||||
|
||||
package duplicacy
|
||||
|
||||
import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
)
|
||||
|
||||
// ChunkUploadTask represents a chunk to be uploaded.
|
||||
type ChunkUploadTask struct {
|
||||
chunk *Chunk
|
||||
chunkIndex int
|
||||
}
|
||||
|
||||
// ChunkUploader uploads chunks to the storage using one or more uploading goroutines. Chunks are added
|
||||
// by the call to StartChunk(), and then passed to the uploading goroutines. The completion function is
|
||||
// called when the downloading is completed. Note that ChunkUploader does not release chunks to the
|
||||
// chunk pool; instead
|
||||
type ChunkUploader struct {
|
||||
config *Config // Associated config
|
||||
storage Storage // Download from this storage
|
||||
snapshotCache *FileStorage // Used as cache if not nil; usually for uploading snapshot chunks
|
||||
threads int // Number of uploading goroutines
|
||||
taskQueue chan ChunkUploadTask // Uploading goroutines are listening on this channel for upload jobs
|
||||
stopChannel chan bool // Used to terminate uploading goroutines
|
||||
|
||||
numberOfUploadingTasks int32 // The number of uploading tasks
|
||||
|
||||
// Uploading goroutines call this function after having downloaded chunks
|
||||
completionFunc func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int)
|
||||
}
|
||||
|
||||
// CreateChunkUploader creates a chunk uploader.
|
||||
func CreateChunkUploader(config *Config, storage Storage, snapshotCache *FileStorage, threads int,
|
||||
completionFunc func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int)) *ChunkUploader {
|
||||
uploader := &ChunkUploader{
|
||||
config: config,
|
||||
storage: storage,
|
||||
snapshotCache: snapshotCache,
|
||||
threads: threads,
|
||||
taskQueue: make(chan ChunkUploadTask, 1),
|
||||
stopChannel: make(chan bool),
|
||||
completionFunc: completionFunc,
|
||||
}
|
||||
|
||||
return uploader
|
||||
}
|
||||
|
||||
// Starts starts uploading goroutines.
|
||||
func (uploader *ChunkUploader) Start() {
|
||||
for i := 0; i < uploader.threads; i++ {
|
||||
go func(threadIndex int) {
|
||||
defer CatchLogException()
|
||||
for {
|
||||
select {
|
||||
case task := <-uploader.taskQueue:
|
||||
uploader.Upload(threadIndex, task)
|
||||
case <-uploader.stopChannel:
|
||||
return
|
||||
}
|
||||
}
|
||||
}(i)
|
||||
}
|
||||
}
|
||||
|
||||
// StartChunk sends a chunk to be uploaded to a waiting uploading goroutine. It may block if all uploading goroutines are busy.
|
||||
func (uploader *ChunkUploader) StartChunk(chunk *Chunk, chunkIndex int) {
|
||||
atomic.AddInt32(&uploader.numberOfUploadingTasks, 1)
|
||||
uploader.taskQueue <- ChunkUploadTask{
|
||||
chunk: chunk,
|
||||
chunkIndex: chunkIndex,
|
||||
}
|
||||
}
|
||||
|
||||
// Stop stops all uploading goroutines.
|
||||
func (uploader *ChunkUploader) Stop() {
|
||||
for atomic.LoadInt32(&uploader.numberOfUploadingTasks) > 0 {
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
for i := 0; i < uploader.threads; i++ {
|
||||
uploader.stopChannel <- false
|
||||
}
|
||||
}
|
||||
|
||||
// Upload is called by the uploading goroutines to perform the actual uploading
|
||||
func (uploader *ChunkUploader) Upload(threadIndex int, task ChunkUploadTask) bool {
|
||||
|
||||
chunk := task.chunk
|
||||
chunkSize := chunk.GetLength()
|
||||
chunkID := chunk.GetID()
|
||||
|
||||
// For a snapshot chunk, verify that its chunk id is correct
|
||||
if uploader.snapshotCache != nil {
|
||||
chunk.VerifyID()
|
||||
}
|
||||
|
||||
if uploader.snapshotCache != nil && uploader.storage.IsCacheNeeded() {
|
||||
// Save a copy to the local snapshot.
|
||||
chunkPath, exist, _, err := uploader.snapshotCache.FindChunk(threadIndex, chunkID, false)
|
||||
if err != nil {
|
||||
LOG_WARN("UPLOAD_CACHE", "Failed to find the cache path for the chunk %s: %v", chunkID, err)
|
||||
} else if exist {
|
||||
LOG_DEBUG("CHUNK_CACHE", "Chunk %s already exists in the snapshot cache", chunkID)
|
||||
} else if err = uploader.snapshotCache.UploadFile(threadIndex, chunkPath, chunk.GetBytes()); err != nil {
|
||||
LOG_WARN("UPLOAD_CACHE", "Failed to save the chunk %s to the snapshot cache: %v", chunkID, err)
|
||||
} else {
|
||||
LOG_DEBUG("CHUNK_CACHE", "Chunk %s has been saved to the snapshot cache", chunkID)
|
||||
}
|
||||
}
|
||||
|
||||
// This returns the path the chunk file should be at.
|
||||
chunkPath, exist, _, err := uploader.storage.FindChunk(threadIndex, chunkID, false)
|
||||
if err != nil {
|
||||
LOG_ERROR("UPLOAD_CHUNK", "Failed to find the path for the chunk %s: %v", chunkID, err)
|
||||
return false
|
||||
}
|
||||
|
||||
if exist {
|
||||
// Chunk deduplication by name in effect here.
|
||||
LOG_DEBUG("CHUNK_DUPLICATE", "Chunk %s already exists", chunkID)
|
||||
|
||||
uploader.completionFunc(chunk, task.chunkIndex, true, chunkSize, 0)
|
||||
atomic.AddInt32(&uploader.numberOfUploadingTasks, -1)
|
||||
return false
|
||||
}
|
||||
|
||||
// Encrypt the chunk only after we know that it must be uploaded.
|
||||
err = chunk.Encrypt(uploader.config.ChunkKey, chunk.GetHash(), uploader.snapshotCache != nil)
|
||||
if err != nil {
|
||||
LOG_ERROR("UPLOAD_CHUNK", "Failed to encrypt the chunk %s: %v", chunkID, err)
|
||||
return false
|
||||
}
|
||||
|
||||
if !uploader.config.dryRun {
|
||||
err = uploader.storage.UploadFile(threadIndex, chunkPath, chunk.GetBytes())
|
||||
if err != nil {
|
||||
LOG_ERROR("UPLOAD_CHUNK", "Failed to upload the chunk %s: %v", chunkID, err)
|
||||
return false
|
||||
}
|
||||
LOG_DEBUG("CHUNK_UPLOAD", "Chunk %s has been uploaded", chunkID)
|
||||
} else {
|
||||
LOG_DEBUG("CHUNK_UPLOAD", "Uploading was skipped for chunk %s", chunkID)
|
||||
}
|
||||
|
||||
uploader.completionFunc(chunk, task.chunkIndex, false, chunkSize, chunk.GetLength())
|
||||
atomic.AddInt32(&uploader.numberOfUploadingTasks, -1)
|
||||
return true
|
||||
}
|
||||
@@ -16,6 +16,11 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
"bytes"
|
||||
"crypto/sha256"
|
||||
|
||||
"github.com/vmihailenco/msgpack"
|
||||
|
||||
)
|
||||
|
||||
// This is the hidden directory in the repository for storing various files.
|
||||
@@ -45,7 +50,7 @@ type Entry struct {
|
||||
EndChunk int
|
||||
EndOffset int
|
||||
|
||||
Attributes map[string][]byte
|
||||
Attributes *map[string][]byte
|
||||
}
|
||||
|
||||
// CreateEntry creates an entry from file properties.
|
||||
@@ -93,6 +98,27 @@ func CreateEntryFromFileInfo(fileInfo os.FileInfo, directory string) *Entry {
|
||||
return entry
|
||||
}
|
||||
|
||||
func (entry *Entry) Copy() *Entry {
|
||||
return &Entry{
|
||||
Path: entry.Path,
|
||||
Size: entry.Size,
|
||||
Time: entry.Time,
|
||||
Mode: entry.Mode,
|
||||
Link: entry.Link,
|
||||
Hash: entry.Hash,
|
||||
|
||||
UID: entry.UID,
|
||||
GID: entry.GID,
|
||||
|
||||
StartChunk: entry.StartChunk,
|
||||
StartOffset: entry.StartOffset,
|
||||
EndChunk: entry.EndChunk,
|
||||
EndOffset: entry.EndOffset,
|
||||
|
||||
Attributes: entry.Attributes,
|
||||
}
|
||||
}
|
||||
|
||||
// CreateEntryFromJSON creates an entry from a json description.
|
||||
func (entry *Entry) UnmarshalJSON(description []byte) (err error) {
|
||||
|
||||
@@ -175,17 +201,17 @@ func (entry *Entry) UnmarshalJSON(description []byte) (err error) {
|
||||
if attributes, ok := value.(map[string]interface{}); !ok {
|
||||
return fmt.Errorf("Attributes are invalid for file '%s' in the snapshot", entry.Path)
|
||||
} else {
|
||||
entry.Attributes = make(map[string][]byte)
|
||||
entry.Attributes = &map[string][]byte{}
|
||||
for name, object := range attributes {
|
||||
if object == nil {
|
||||
entry.Attributes[name] = []byte("")
|
||||
(*entry.Attributes)[name] = []byte("")
|
||||
} else if attributeInBase64, ok := object.(string); !ok {
|
||||
return fmt.Errorf("Attribute '%s' is invalid for file '%s' in the snapshot", name, entry.Path)
|
||||
} else if attribute, err := base64.StdEncoding.DecodeString(attributeInBase64); err != nil {
|
||||
return fmt.Errorf("Failed to decode attribute '%s' for file '%s' in the snapshot: %v",
|
||||
name, entry.Path, err)
|
||||
} else {
|
||||
entry.Attributes[name] = attribute
|
||||
(*entry.Attributes)[name] = attribute
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -244,7 +270,7 @@ func (entry *Entry) convertToObject(encodeName bool) map[string]interface{} {
|
||||
object["gid"] = entry.GID
|
||||
}
|
||||
|
||||
if len(entry.Attributes) > 0 {
|
||||
if entry.Attributes != nil && len(*entry.Attributes) > 0 {
|
||||
object["attributes"] = entry.Attributes
|
||||
}
|
||||
|
||||
@@ -259,6 +285,197 @@ func (entry *Entry) MarshalJSON() ([]byte, error) {
|
||||
return description, err
|
||||
}
|
||||
|
||||
var _ msgpack.CustomEncoder = (*Entry)(nil)
|
||||
var _ msgpack.CustomDecoder = (*Entry)(nil)
|
||||
|
||||
func (entry *Entry) EncodeMsgpack(encoder *msgpack.Encoder) error {
|
||||
|
||||
err := encoder.EncodeString(entry.Path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = encoder.EncodeInt(entry.Size)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = encoder.EncodeInt(entry.Time)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = encoder.EncodeInt(int64(entry.Mode))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = encoder.EncodeString(entry.Link)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = encoder.EncodeString(entry.Hash)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = encoder.EncodeInt(int64(entry.StartChunk))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = encoder.EncodeInt(int64(entry.StartOffset))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = encoder.EncodeInt(int64(entry.EndChunk))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = encoder.EncodeInt(int64(entry.EndOffset))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = encoder.EncodeInt(int64(entry.UID))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = encoder.EncodeInt(int64(entry.GID))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var numberOfAttributes int64
|
||||
if entry.Attributes != nil {
|
||||
numberOfAttributes = int64(len(*entry.Attributes))
|
||||
}
|
||||
|
||||
err = encoder.EncodeInt(numberOfAttributes)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if entry.Attributes != nil {
|
||||
attributes := make([]string, numberOfAttributes)
|
||||
i := 0
|
||||
for attribute := range *entry.Attributes {
|
||||
attributes[i] = attribute
|
||||
i++
|
||||
}
|
||||
sort.Strings(attributes)
|
||||
for _, attribute := range attributes {
|
||||
err = encoder.EncodeString(attribute)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = encoder.EncodeString(string((*entry.Attributes)[attribute]))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (entry *Entry) DecodeMsgpack(decoder *msgpack.Decoder) error {
|
||||
|
||||
var err error
|
||||
|
||||
entry.Path, err = decoder.DecodeString()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
entry.Size, err = decoder.DecodeInt64()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
entry.Time, err = decoder.DecodeInt64()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
mode, err := decoder.DecodeInt64()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
entry.Mode = uint32(mode)
|
||||
|
||||
entry.Link, err = decoder.DecodeString()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
entry.Hash, err = decoder.DecodeString()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
startChunk, err := decoder.DecodeInt()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
entry.StartChunk = int(startChunk)
|
||||
|
||||
startOffset, err := decoder.DecodeInt()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
entry.StartOffset = int(startOffset)
|
||||
|
||||
endChunk, err := decoder.DecodeInt()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
entry.EndChunk = int(endChunk)
|
||||
|
||||
endOffset, err := decoder.DecodeInt()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
entry.EndOffset = int(endOffset)
|
||||
|
||||
uid, err := decoder.DecodeInt()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
entry.UID = int(uid)
|
||||
|
||||
gid, err := decoder.DecodeInt()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
entry.GID = int(gid)
|
||||
|
||||
numberOfAttributes, err := decoder.DecodeInt()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if numberOfAttributes > 0 {
|
||||
entry.Attributes = &map[string][]byte{}
|
||||
for i := 0; i < numberOfAttributes; i++ {
|
||||
attribute, err := decoder.DecodeString()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
value, err := decoder.DecodeString()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
(*entry.Attributes)[attribute] = []byte(value)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (entry *Entry) IsFile() bool {
|
||||
return entry.Mode&uint32(os.ModeType) == 0
|
||||
}
|
||||
@@ -271,10 +488,27 @@ func (entry *Entry) IsLink() bool {
|
||||
return entry.Mode&uint32(os.ModeSymlink) != 0
|
||||
}
|
||||
|
||||
func (entry *Entry) IsComplete() bool {
|
||||
return entry.Size >= 0
|
||||
}
|
||||
|
||||
func (entry *Entry) GetPermissions() os.FileMode {
|
||||
return os.FileMode(entry.Mode) & fileModeMask
|
||||
}
|
||||
|
||||
func (entry *Entry) GetParent() string {
|
||||
path := entry.Path
|
||||
if path != "" && path[len(path) - 1] == '/' {
|
||||
path = path[:len(path) - 1]
|
||||
}
|
||||
i := strings.LastIndex(path, "/")
|
||||
if i == -1 {
|
||||
return ""
|
||||
} else {
|
||||
return path[:i]
|
||||
}
|
||||
}
|
||||
|
||||
func (entry *Entry) IsSameAs(other *Entry) bool {
|
||||
return entry.Size == other.Size && entry.Time <= other.Time+1 && entry.Time >= other.Time-1
|
||||
}
|
||||
@@ -326,7 +560,7 @@ func (entry *Entry) RestoreMetadata(fullPath string, fileInfo *os.FileInfo, setO
|
||||
}
|
||||
}
|
||||
|
||||
if len(entry.Attributes) > 0 {
|
||||
if entry.Attributes != nil && len(*entry.Attributes) > 0 {
|
||||
entry.SetAttributesToFile(fullPath)
|
||||
}
|
||||
|
||||
@@ -335,47 +569,62 @@ func (entry *Entry) RestoreMetadata(fullPath string, fileInfo *os.FileInfo, setO
|
||||
|
||||
// Return -1 if 'left' should appear before 'right', 1 if opposite, and 0 if they are the same.
|
||||
// Files are always arranged before subdirectories under the same parent directory.
|
||||
func (left *Entry) Compare(right *Entry) int {
|
||||
|
||||
path1 := left.Path
|
||||
path2 := right.Path
|
||||
|
||||
func ComparePaths(left string, right string) int {
|
||||
p := 0
|
||||
for ; p < len(path1) && p < len(path2); p++ {
|
||||
if path1[p] != path2[p] {
|
||||
for ; p < len(left) && p < len(right); p++ {
|
||||
if left[p] != right[p] {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// c1, c2 is the first byte that differs
|
||||
// c1, c2 are the first bytes that differ
|
||||
var c1, c2 byte
|
||||
if p < len(path1) {
|
||||
c1 = path1[p]
|
||||
if p < len(left) {
|
||||
c1 = left[p]
|
||||
}
|
||||
if p < len(path2) {
|
||||
c2 = path2[p]
|
||||
if p < len(right) {
|
||||
c2 = right[p]
|
||||
}
|
||||
|
||||
// c3, c4 indicates how the current component ends
|
||||
// c3 == '/': the current component is a directory
|
||||
// c3 != '/': the current component is the last one
|
||||
// c3, c4 indicate how the current component ends
|
||||
// c3 == '/': the current component is a directory; c3 != '/': the current component is the last one
|
||||
c3 := c1
|
||||
for i := p; c3 != '/' && i < len(path1); i++ {
|
||||
c3 = path1[i]
|
||||
|
||||
// last1, last2 means if the current compoent is the last component
|
||||
last1 := true
|
||||
for i := p; i < len(left); i++ {
|
||||
c3 = left[i]
|
||||
if c3 == '/' {
|
||||
last1 = i == len(left) - 1
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
c4 := c2
|
||||
for i := p; c4 != '/' && i < len(path2); i++ {
|
||||
c4 = path2[i]
|
||||
last2 := true
|
||||
for i := p; i < len(right); i++ {
|
||||
c4 = right[i]
|
||||
if c4 == '/' {
|
||||
last2 = i == len(right) - 1
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if last1 != last2 {
|
||||
if last1 {
|
||||
return -1
|
||||
} else {
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
if c3 == '/' {
|
||||
if c4 == '/' {
|
||||
// We are comparing two directory components
|
||||
if c1 == '/' {
|
||||
// left is shorter
|
||||
// Note that c2 maybe smaller than c1 but c1 is '/' which is counted
|
||||
// as 0
|
||||
// left is shorter; note that c2 maybe smaller than c1 but c1 should be treated as 0 therefore
|
||||
// this is a special case that must be handled separately
|
||||
return -1
|
||||
} else if c2 == '/' {
|
||||
// right is shorter
|
||||
@@ -397,6 +646,10 @@ func (left *Entry) Compare(right *Entry) int {
|
||||
}
|
||||
}
|
||||
|
||||
func (left *Entry) Compare(right *Entry) int {
|
||||
return ComparePaths(left.Path, right.Path)
|
||||
}
|
||||
|
||||
// This is used to sort entries by their names.
|
||||
type ByName []*Entry
|
||||
|
||||
@@ -443,7 +696,7 @@ func (files FileInfoCompare) Less(i, j int) bool {
|
||||
|
||||
// ListEntries returns a list of entries representing file and subdirectories under the directory 'path'. Entry paths
|
||||
// are normalized as relative to 'top'. 'patterns' are used to exclude or include certain files.
|
||||
func ListEntries(top string, path string, fileList *[]*Entry, patterns []string, nobackupFile string, discardAttributes bool, excludeByAttribute bool) (directoryList []*Entry,
|
||||
func ListEntries(top string, path string, patterns []string, nobackupFile string, excludeByAttribute bool, listingChannel chan *Entry) (directoryList []*Entry,
|
||||
skippedFiles []string, err error) {
|
||||
|
||||
LOG_DEBUG("LIST_ENTRIES", "Listing %s", path)
|
||||
@@ -478,8 +731,6 @@ func ListEntries(top string, path string, fileList *[]*Entry, patterns []string,
|
||||
|
||||
sort.Sort(FileInfoCompare(files))
|
||||
|
||||
entries := make([]*Entry, 0, 4)
|
||||
|
||||
for _, f := range files {
|
||||
if f.Name() == DUPLICACY_DIRECTORY {
|
||||
continue
|
||||
@@ -520,11 +771,9 @@ func ListEntries(top string, path string, fileList *[]*Entry, patterns []string,
|
||||
}
|
||||
}
|
||||
|
||||
if !discardAttributes {
|
||||
entry.ReadAttributes(top)
|
||||
}
|
||||
entry.ReadAttributes(top)
|
||||
|
||||
if excludeByAttribute && excludedByAttribute(entry.Attributes) {
|
||||
if excludeByAttribute && entry.Attributes != nil && excludedByAttribute(*entry.Attributes) {
|
||||
LOG_DEBUG("LIST_EXCLUDE", "%s is excluded by attribute", entry.Path)
|
||||
continue
|
||||
}
|
||||
@@ -535,20 +784,20 @@ func ListEntries(top string, path string, fileList *[]*Entry, patterns []string,
|
||||
continue
|
||||
}
|
||||
|
||||
entries = append(entries, entry)
|
||||
if entry.IsDir() {
|
||||
directoryList = append(directoryList, entry)
|
||||
} else {
|
||||
listingChannel <- entry
|
||||
}
|
||||
}
|
||||
|
||||
// For top level directory we need to sort again because symlinks may have been changed
|
||||
if path == "" {
|
||||
sort.Sort(ByName(entries))
|
||||
sort.Sort(ByName(directoryList))
|
||||
}
|
||||
|
||||
for _, entry := range entries {
|
||||
if entry.IsDir() {
|
||||
directoryList = append(directoryList, entry)
|
||||
} else {
|
||||
*fileList = append(*fileList, entry)
|
||||
}
|
||||
for _, entry := range directoryList {
|
||||
listingChannel <- entry
|
||||
}
|
||||
|
||||
for i, j := 0, len(directoryList)-1; i < j; i, j = i+1, j-1 {
|
||||
@@ -597,3 +846,100 @@ func (entry *Entry) Diff(chunkHashes []string, chunkLengths []int,
|
||||
|
||||
return modifiedLength
|
||||
}
|
||||
|
||||
func (entry *Entry) EncodeWithHash(encoder *msgpack.Encoder) error {
|
||||
entryBytes, err := msgpack.Marshal(entry)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
hash := sha256.Sum256(entryBytes)
|
||||
err = encoder.EncodeBytes(entryBytes)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = encoder.EncodeBytes(hash[:])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func DecodeEntryWithHash(decoder *msgpack.Decoder) (*Entry, error) {
|
||||
entryBytes, err := decoder.DecodeBytes()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
hashBytes, err := decoder.DecodeBytes()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
expectedHash := sha256.Sum256(entryBytes)
|
||||
if bytes.Compare(expectedHash[:], hashBytes) != 0 {
|
||||
return nil, fmt.Errorf("corrupted file metadata")
|
||||
}
|
||||
|
||||
var entry Entry
|
||||
err = msgpack.Unmarshal(entryBytes, &entry)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &entry, nil
|
||||
}
|
||||
|
||||
func (entry *Entry) check(chunkLengths []int) error {
|
||||
|
||||
if entry.Size < 0 {
|
||||
return fmt.Errorf("The file %s hash an invalid size (%d)", entry.Path, entry.Size)
|
||||
}
|
||||
|
||||
if !entry.IsFile() || entry.Size == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
if entry.StartChunk < 0 {
|
||||
return fmt.Errorf("The file %s starts at chunk %d", entry.Path, entry.StartChunk)
|
||||
}
|
||||
|
||||
if entry.EndChunk >= len(chunkLengths) {
|
||||
return fmt.Errorf("The file %s ends at chunk %d while the number of chunks is %d",
|
||||
entry.Path, entry.EndChunk, len(chunkLengths))
|
||||
}
|
||||
|
||||
if entry.EndChunk < entry.StartChunk {
|
||||
return fmt.Errorf("The file %s starts at chunk %d and ends at chunk %d",
|
||||
entry.Path, entry.StartChunk, entry.EndChunk)
|
||||
}
|
||||
|
||||
if entry.StartOffset >= chunkLengths[entry.StartChunk] {
|
||||
return fmt.Errorf("The file %s starts at offset %d of chunk %d of length %d",
|
||||
entry.Path, entry.StartOffset, entry.StartChunk, chunkLengths[entry.StartChunk])
|
||||
}
|
||||
|
||||
if entry.EndOffset > chunkLengths[entry.EndChunk] {
|
||||
return fmt.Errorf("The file %s ends at offset %d of chunk %d of length %d",
|
||||
entry.Path, entry.EndOffset, entry.EndChunk, chunkLengths[entry.EndChunk])
|
||||
}
|
||||
|
||||
fileSize := int64(0)
|
||||
|
||||
for i := entry.StartChunk; i <= entry.EndChunk; i++ {
|
||||
|
||||
start := 0
|
||||
if i == entry.StartChunk {
|
||||
start = entry.StartOffset
|
||||
}
|
||||
end := chunkLengths[i]
|
||||
if i == entry.EndChunk {
|
||||
end = entry.EndOffset
|
||||
}
|
||||
|
||||
fileSize += int64(end - start)
|
||||
}
|
||||
|
||||
if entry.Size != fileSize {
|
||||
return fmt.Errorf("The file %s has a size of %d but the total size of chunks is %d",
|
||||
entry.Path, entry.Size, fileSize)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -13,8 +13,11 @@ import (
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
|
||||
"github.com/gilbertchen/xattr"
|
||||
"github.com/vmihailenco/msgpack"
|
||||
)
|
||||
|
||||
func TestEntrySort(t *testing.T) {
|
||||
@@ -27,19 +30,19 @@ func TestEntrySort(t *testing.T) {
|
||||
"\xBB\xDDfile",
|
||||
"\xFF\xDDfile",
|
||||
"ab/",
|
||||
"ab-/",
|
||||
"ab0/",
|
||||
"ab1/",
|
||||
"ab/c",
|
||||
"ab+/c-",
|
||||
"ab+/c0",
|
||||
"ab+/c/",
|
||||
"ab+/c/d",
|
||||
"ab+/c+/",
|
||||
"ab+/c+/d",
|
||||
"ab+/c0/",
|
||||
"ab+/c/d",
|
||||
"ab+/c+/d",
|
||||
"ab+/c0/d",
|
||||
"ab-/",
|
||||
"ab-/c",
|
||||
"ab0/",
|
||||
"ab1/",
|
||||
"ab1/c",
|
||||
"ab1/\xBB\xDDfile",
|
||||
"ab1/\xFF\xDDfile",
|
||||
@@ -86,7 +89,7 @@ func TestEntrySort(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestEntryList(t *testing.T) {
|
||||
func TestEntryOrder(t *testing.T) {
|
||||
|
||||
testDir := filepath.Join(os.TempDir(), "duplicacy_test")
|
||||
os.RemoveAll(testDir)
|
||||
@@ -98,16 +101,16 @@ func TestEntryList(t *testing.T) {
|
||||
"ab0",
|
||||
"ab1",
|
||||
"ab+/",
|
||||
"ab2/",
|
||||
"ab3/",
|
||||
"ab+/c",
|
||||
"ab+/c+",
|
||||
"ab+/c1",
|
||||
"ab+/c-/",
|
||||
"ab+/c-/d",
|
||||
"ab+/c0/",
|
||||
"ab+/c-/d",
|
||||
"ab+/c0/d",
|
||||
"ab2/",
|
||||
"ab2/c",
|
||||
"ab3/",
|
||||
"ab3/c",
|
||||
}
|
||||
|
||||
@@ -172,18 +175,24 @@ func TestEntryList(t *testing.T) {
|
||||
directories = append(directories, CreateEntry("", 0, 0, 0))
|
||||
|
||||
entries := make([]*Entry, 0, 4)
|
||||
entryChannel := make(chan *Entry, 1024)
|
||||
entries = append(entries, CreateEntry("", 0, 0, 0))
|
||||
|
||||
for len(directories) > 0 {
|
||||
directory := directories[len(directories)-1]
|
||||
directories = directories[:len(directories)-1]
|
||||
entries = append(entries, directory)
|
||||
subdirectories, _, err := ListEntries(testDir, directory.Path, &entries, nil, "", false, false)
|
||||
subdirectories, _, err := ListEntries(testDir, directory.Path, nil, "", false, entryChannel)
|
||||
if err != nil {
|
||||
t.Errorf("ListEntries(%s, %s) returned an error: %s", testDir, directory.Path, err)
|
||||
}
|
||||
directories = append(directories, subdirectories...)
|
||||
}
|
||||
|
||||
close(entryChannel)
|
||||
for entry := range entryChannel {
|
||||
entries = append(entries, entry)
|
||||
}
|
||||
|
||||
entries = entries[1:]
|
||||
|
||||
for _, entry := range entries {
|
||||
@@ -274,18 +283,25 @@ func TestEntryExcludeByAttribute(t *testing.T) {
|
||||
directories = append(directories, CreateEntry("", 0, 0, 0))
|
||||
|
||||
entries := make([]*Entry, 0, 4)
|
||||
entryChannel := make(chan *Entry, 1024)
|
||||
entries = append(entries, CreateEntry("", 0, 0, 0))
|
||||
|
||||
for len(directories) > 0 {
|
||||
directory := directories[len(directories)-1]
|
||||
directories = directories[:len(directories)-1]
|
||||
entries = append(entries, directory)
|
||||
subdirectories, _, err := ListEntries(testDir, directory.Path, &entries, nil, "", false, excludeByAttribute)
|
||||
subdirectories, _, err := ListEntries(testDir, directory.Path, nil, "", excludeByAttribute, entryChannel)
|
||||
if err != nil {
|
||||
t.Errorf("ListEntries(%s, %s) returned an error: %s", testDir, directory.Path, err)
|
||||
}
|
||||
directories = append(directories, subdirectories...)
|
||||
}
|
||||
|
||||
close(entryChannel)
|
||||
|
||||
for entry := range entryChannel {
|
||||
entries = append(entries, entry)
|
||||
}
|
||||
|
||||
entries = entries[1:]
|
||||
|
||||
for _, entry := range entries {
|
||||
@@ -327,3 +343,33 @@ func TestEntryExcludeByAttribute(t *testing.T) {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestEntryEncoding(t *testing.T) {
|
||||
buffer := new(bytes.Buffer)
|
||||
encoder := msgpack.NewEncoder(buffer)
|
||||
|
||||
entry1 := CreateEntry("abcd", 1, 2, 0700)
|
||||
err := encoder.Encode(entry1)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to encode the entry: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
t.Logf("msgpack size: %d\n", len(buffer.Bytes()))
|
||||
decoder := msgpack.NewDecoder(buffer)
|
||||
|
||||
description, _ := json.Marshal(entry1)
|
||||
t.Logf("json size: %d\n", len(description))
|
||||
|
||||
var entry2 Entry
|
||||
err = decoder.Decode(&entry2)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to decode the entry: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if entry1.Path != entry2.Path || entry1.Size != entry2.Size || entry1.Time != entry2.Time {
|
||||
t.Error("Decoded entry is different than the original one")
|
||||
}
|
||||
|
||||
}
|
||||
574
src/duplicacy_entrylist.go
Normal file
574
src/duplicacy_entrylist.go
Normal file
@@ -0,0 +1,574 @@
|
||||
// Copyright (c) Acrosync LLC. All rights reserved.
|
||||
// Free for personal use and commercial trial
|
||||
// Commercial use requires per-user licenses available from https://duplicacy.com
|
||||
|
||||
package duplicacy
|
||||
|
||||
import (
|
||||
"encoding/hex"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"os"
|
||||
"io"
|
||||
"path"
|
||||
"crypto/sha256"
|
||||
"crypto/rand"
|
||||
"sync"
|
||||
|
||||
"github.com/vmihailenco/msgpack"
|
||||
)
|
||||
|
||||
// This struct stores information about a file entry that has been modified
|
||||
type ModifiedEntry struct {
|
||||
Path string
|
||||
Size int64
|
||||
Hash string
|
||||
}
|
||||
|
||||
// EntryList is basically a list of entries, which can be kept in the memory, or serialized to a disk file,
|
||||
// depending on if maximumInMemoryEntries is reached.
|
||||
//
|
||||
// The idea behind the on-disk entry list is that entries are written to a disk file as they are coming in.
|
||||
// Entries that have been modified and thus need to be uploaded will have their Incomplete bit set (i.e.,
|
||||
// with a size of -1). When the limit is reached, entries are moved to a disk file but ModifiedEntries and
|
||||
// UploadedChunks are still kept in memory. When later entries are read from the entry list, incomplete
|
||||
// entries are back-annotated with info from ModifiedEntries and UploadedChunk* before sending them out.
|
||||
|
||||
type EntryList struct {
|
||||
onDiskFile *os.File // the file to store entries
|
||||
encoder *msgpack.Encoder // msgpack encoder for entry serialization
|
||||
entries []*Entry // in-memory entry list
|
||||
|
||||
SnapshotID string // the snapshot id
|
||||
Token string // this unique random token makes sure we read/write
|
||||
// the same entry list
|
||||
ModifiedEntries []ModifiedEntry // entries that will be uploaded
|
||||
|
||||
UploadedChunkHashes []string // chunks from entries that have been uploaded
|
||||
UploadedChunkLengths []int // chunk lengths from entries that have been uploaded
|
||||
uploadedChunkLock sync.Mutex // lock for UploadedChunkHashes and UploadedChunkLengths
|
||||
|
||||
PreservedChunkHashes []string // chunks from entries not changed
|
||||
PreservedChunkLengths []int // chunk lengths from entries not changed
|
||||
|
||||
Checksum string // checksum of all entries to detect disk corruption
|
||||
|
||||
maximumInMemoryEntries int // max in-memory entries
|
||||
NumberOfEntries int64 // number of entries (not including directories and links)
|
||||
cachePath string // the directory for the on-disk file
|
||||
|
||||
// These 3 variables are used in entry infomation back-annotation
|
||||
modifiedEntryIndex int // points to the current modified entry
|
||||
uploadedChunkIndex int // counter for upload chunks
|
||||
uploadedChunkOffset int // the start offset for the current modified entry
|
||||
|
||||
}
|
||||
|
||||
// Create a new entry list
|
||||
func CreateEntryList(snapshotID string, cachePath string, maximumInMemoryEntries int) (*EntryList, error) {
|
||||
|
||||
token := make([]byte, 16)
|
||||
_, err := rand.Read(token)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Failed to create a random token: %v", err)
|
||||
}
|
||||
|
||||
entryList := &EntryList {
|
||||
SnapshotID: snapshotID,
|
||||
maximumInMemoryEntries: maximumInMemoryEntries,
|
||||
cachePath: cachePath,
|
||||
Token: string(token),
|
||||
}
|
||||
|
||||
return entryList, nil
|
||||
|
||||
}
|
||||
|
||||
// Create the on-disk entry list file
|
||||
func (entryList *EntryList)createOnDiskFile() error {
|
||||
file, err := os.OpenFile(path.Join(entryList.cachePath, "incomplete_files"), os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0600)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Failed to create on disk entry list: %v", err)
|
||||
}
|
||||
|
||||
entryList.onDiskFile = file
|
||||
entryList.encoder = msgpack.NewEncoder(file)
|
||||
|
||||
err = entryList.encoder.EncodeString(entryList.Token)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Failed to create on disk entry list: %v", err)
|
||||
}
|
||||
|
||||
for _, entry := range entryList.entries {
|
||||
err = entry.EncodeWithHash(entryList.encoder)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Add an entry to the entry list
|
||||
func (entryList *EntryList)AddEntry(entry *Entry) error {
|
||||
|
||||
if !entry.IsDir() && !entry.IsLink() {
|
||||
entryList.NumberOfEntries++
|
||||
}
|
||||
|
||||
if !entry.IsComplete() {
|
||||
if entry.IsDir() || entry.IsLink() {
|
||||
entry.Size = 0
|
||||
} else {
|
||||
modifiedEntry := ModifiedEntry {
|
||||
Path: entry.Path,
|
||||
Size: -1,
|
||||
}
|
||||
|
||||
entryList.ModifiedEntries = append(entryList.ModifiedEntries, modifiedEntry)
|
||||
}
|
||||
}
|
||||
|
||||
if entryList.onDiskFile != nil {
|
||||
return entry.EncodeWithHash(entryList.encoder)
|
||||
} else {
|
||||
entryList.entries = append(entryList.entries, entry)
|
||||
if entryList.maximumInMemoryEntries >= 0 && len(entryList.entries) > entryList.maximumInMemoryEntries {
|
||||
err := entryList.createOnDiskFile()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Add a preserved chunk that belongs to files that have not been modified
|
||||
func (entryList *EntryList)AddPreservedChunk(chunkHash string, chunkSize int) {
|
||||
entryList.PreservedChunkHashes = append(entryList.PreservedChunkHashes, chunkHash)
|
||||
entryList.PreservedChunkLengths = append(entryList.PreservedChunkLengths, chunkSize)
|
||||
}
|
||||
|
||||
// Add a chunk just uploaded (that belongs to files that have been modified)
|
||||
func (entryList *EntryList)AddUploadedChunk(chunkIndex int, chunkHash string, chunkSize int) {
|
||||
entryList.uploadedChunkLock.Lock()
|
||||
|
||||
for len(entryList.UploadedChunkHashes) <= chunkIndex {
|
||||
entryList.UploadedChunkHashes = append(entryList.UploadedChunkHashes, "")
|
||||
}
|
||||
|
||||
for len(entryList.UploadedChunkLengths) <= chunkIndex {
|
||||
entryList.UploadedChunkLengths = append(entryList.UploadedChunkLengths, 0)
|
||||
}
|
||||
|
||||
entryList.UploadedChunkHashes[chunkIndex] = chunkHash
|
||||
entryList.UploadedChunkLengths[chunkIndex] = chunkSize
|
||||
entryList.uploadedChunkLock.Unlock()
|
||||
}
|
||||
|
||||
// Close the on-disk file
|
||||
func (entryList *EntryList) CloseOnDiskFile() error {
|
||||
|
||||
if entryList.onDiskFile == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
err := entryList.onDiskFile.Sync()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = entryList.onDiskFile.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
entryList.onDiskFile = nil
|
||||
return nil
|
||||
}
|
||||
|
||||
// Return the length of the `index`th chunk
|
||||
func (entryList *EntryList) getChunkLength(index int) int {
|
||||
if index < len(entryList.PreservedChunkLengths) {
|
||||
return entryList.PreservedChunkLengths[index]
|
||||
} else {
|
||||
return entryList.UploadedChunkLengths[index - len(entryList.PreservedChunkLengths)]
|
||||
}
|
||||
}
|
||||
|
||||
// Sanity check for each entry
|
||||
func (entryList *EntryList) checkEntry(entry *Entry) error {
|
||||
|
||||
if entry.Size < 0 {
|
||||
return fmt.Errorf("the file %s hash an invalid size (%d)", entry.Path, entry.Size)
|
||||
}
|
||||
|
||||
if !entry.IsFile() || entry.Size == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
numberOfChunks := len(entryList.PreservedChunkLengths) + len(entryList.UploadedChunkLengths)
|
||||
|
||||
if entry.StartChunk < 0 {
|
||||
return fmt.Errorf("the file %s starts at chunk %d", entry.Path, entry.StartChunk)
|
||||
}
|
||||
|
||||
if entry.EndChunk >= numberOfChunks {
|
||||
return fmt.Errorf("the file %s ends at chunk %d while the number of chunks is %d",
|
||||
entry.Path, entry.EndChunk, numberOfChunks)
|
||||
}
|
||||
|
||||
if entry.EndChunk < entry.StartChunk {
|
||||
return fmt.Errorf("the file %s starts at chunk %d and ends at chunk %d",
|
||||
entry.Path, entry.StartChunk, entry.EndChunk)
|
||||
}
|
||||
|
||||
if entry.StartOffset >= entryList.getChunkLength(entry.StartChunk) {
|
||||
return fmt.Errorf("the file %s starts at offset %d of chunk %d with a length of %d",
|
||||
entry.Path, entry.StartOffset, entry.StartChunk, entryList.getChunkLength(entry.StartChunk))
|
||||
}
|
||||
|
||||
if entry.EndOffset > entryList.getChunkLength(entry.EndChunk) {
|
||||
return fmt.Errorf("the file %s ends at offset %d of chunk %d with a length of %d",
|
||||
entry.Path, entry.EndOffset, entry.EndChunk, entryList.getChunkLength(entry.EndChunk))
|
||||
}
|
||||
|
||||
fileSize := int64(0)
|
||||
|
||||
for i := entry.StartChunk; i <= entry.EndChunk; i++ {
|
||||
|
||||
start := 0
|
||||
if i == entry.StartChunk {
|
||||
start = entry.StartOffset
|
||||
}
|
||||
end := entryList.getChunkLength(i)
|
||||
if i == entry.EndChunk {
|
||||
end = entry.EndOffset
|
||||
}
|
||||
|
||||
fileSize += int64(end - start)
|
||||
}
|
||||
|
||||
if entry.Size != fileSize {
|
||||
return fmt.Errorf("the file %s has a size of %d but the total size of chunks is %d",
|
||||
entry.Path, entry.Size, fileSize)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// An incomplete entry (with a size of -1) does not have 'startChunk', 'startOffset', 'endChunk', and 'endOffset'. This function
|
||||
// is to fill in these information before sending the entry out.
|
||||
func (entryList *EntryList) fillAndSendEntry(entry *Entry, entryOut func(*Entry)error) (skipped bool, err error) {
|
||||
|
||||
if entry.IsComplete() {
|
||||
err := entryList.checkEntry(entry)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return false, entryOut(entry)
|
||||
}
|
||||
|
||||
if entryList.modifiedEntryIndex >= len(entryList.ModifiedEntries) {
|
||||
return false, fmt.Errorf("Unexpected file index %d (%d modified files)", entryList.modifiedEntryIndex, len(entryList.ModifiedEntries))
|
||||
}
|
||||
|
||||
modifiedEntry := &entryList.ModifiedEntries[entryList.modifiedEntryIndex]
|
||||
entryList.modifiedEntryIndex++
|
||||
|
||||
if modifiedEntry.Path != entry.Path {
|
||||
return false, fmt.Errorf("Unexpected file path %s when expecting %s", modifiedEntry.Path, entry.Path)
|
||||
}
|
||||
|
||||
if modifiedEntry.Size <= 0 {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
entry.Size = modifiedEntry.Size
|
||||
entry.Hash = modifiedEntry.Hash
|
||||
|
||||
entry.StartChunk = entryList.uploadedChunkIndex + len(entryList.PreservedChunkHashes)
|
||||
entry.StartOffset = entryList.uploadedChunkOffset
|
||||
entry.EndChunk = entry.StartChunk
|
||||
endOffset := int64(entry.StartOffset) + entry.Size
|
||||
|
||||
for entryList.uploadedChunkIndex < len(entryList.UploadedChunkLengths) && endOffset > int64(entryList.UploadedChunkLengths[entryList.uploadedChunkIndex]) {
|
||||
endOffset -= int64(entryList.UploadedChunkLengths[entryList.uploadedChunkIndex])
|
||||
entry.EndChunk++
|
||||
entryList.uploadedChunkIndex++
|
||||
}
|
||||
|
||||
if entryList.uploadedChunkIndex >= len(entryList.UploadedChunkLengths) {
|
||||
return false, fmt.Errorf("File %s has not been completely uploaded", entry.Path)
|
||||
}
|
||||
|
||||
entry.EndOffset = int(endOffset)
|
||||
entryList.uploadedChunkOffset = entry.EndOffset
|
||||
if entry.EndOffset == entryList.UploadedChunkLengths[entryList.uploadedChunkIndex] {
|
||||
entryList.uploadedChunkIndex++
|
||||
entryList.uploadedChunkOffset = 0
|
||||
}
|
||||
|
||||
err = entryList.checkEntry(entry)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
return false, entryOut(entry)
|
||||
}
|
||||
|
||||
// Iterate through the entries in this entry list
|
||||
func (entryList *EntryList) ReadEntries(entryOut func(*Entry)error) (error) {
|
||||
|
||||
entryList.modifiedEntryIndex = 0
|
||||
entryList.uploadedChunkIndex = 0
|
||||
entryList.uploadedChunkOffset = 0
|
||||
|
||||
if entryList.onDiskFile == nil {
|
||||
for _, entry := range entryList.entries {
|
||||
skipped, err := entryList.fillAndSendEntry(entry.Copy(), entryOut)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if skipped {
|
||||
continue
|
||||
}
|
||||
}
|
||||
} else {
|
||||
_, err := entryList.onDiskFile.Seek(0, os.SEEK_SET)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
decoder := msgpack.NewDecoder(entryList.onDiskFile)
|
||||
|
||||
_, err = decoder.DecodeString()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, err = decoder.PeekCode(); err == nil; _, err = decoder.PeekCode() {
|
||||
entry, err := DecodeEntryWithHash(decoder)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
skipped, err := entryList.fillAndSendEntry(entry, entryOut)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if skipped {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if err != io.EOF {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// When saving an incomplete snapshot, the on-disk entry list ('incomplete_files') is renamed to
|
||||
// 'incomplete_snapshot', and this EntryList struct is saved as 'incomplete_chunks'.
|
||||
func (entryList *EntryList) SaveIncompleteSnapshot() {
|
||||
entryList.uploadedChunkLock.Lock()
|
||||
defer entryList.uploadedChunkLock.Unlock()
|
||||
|
||||
if entryList.onDiskFile == nil {
|
||||
err := entryList.createOnDiskFile()
|
||||
if err != nil {
|
||||
LOG_WARN("INCOMPLETE_SAVE", "Failed to create the incomplete snapshot file: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
for _, entry := range entryList.entries {
|
||||
|
||||
err = entry.EncodeWithHash(entryList.encoder)
|
||||
if err != nil {
|
||||
LOG_WARN("INCOMPLETE_SAVE", "Failed to save the entry %s: %v", entry.Path, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
err := entryList.onDiskFile.Close()
|
||||
if err != nil {
|
||||
LOG_WARN("INCOMPLETE_SAVE", "Failed to close the on-disk file: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
filePath := path.Join(entryList.cachePath, "incomplete_snapshot")
|
||||
if _, err := os.Stat(filePath); err == nil {
|
||||
err = os.Remove(filePath)
|
||||
if err != nil {
|
||||
LOG_WARN("INCOMPLETE_REMOVE", "Failed to remove previous incomplete snapshot: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
err = os.Rename(path.Join(entryList.cachePath, "incomplete_files"), filePath)
|
||||
if err != nil {
|
||||
LOG_WARN("INCOMPLETE_SAVE", "Failed to rename the incomplete snapshot file: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
chunkFile := path.Join(entryList.cachePath, "incomplete_chunks")
|
||||
file, err := os.OpenFile(chunkFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0600)
|
||||
if err != nil {
|
||||
LOG_WARN("INCOMPLETE_SAVE", "Failed to create the incomplete chunk file: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
defer file.Close()
|
||||
encoder := msgpack.NewEncoder(file)
|
||||
|
||||
entryList.Checksum = entryList.CalculateChecksum()
|
||||
|
||||
err = encoder.Encode(entryList)
|
||||
if err != nil {
|
||||
LOG_WARN("INCOMPLETE_SAVE", "Failed to save the incomplete snapshot: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
LOG_INFO("INCOMPLETE_SAVE", "Incomplete snapshot saved to %s", filePath)
|
||||
}
|
||||
|
||||
// Calculate a checksum for this entry list
|
||||
func (entryList *EntryList) CalculateChecksum() string{
|
||||
|
||||
hasher := sha256.New()
|
||||
for _, s := range entryList.UploadedChunkHashes {
|
||||
hasher.Write([]byte(s))
|
||||
}
|
||||
|
||||
buffer := make([]byte, 8)
|
||||
for _, i := range entryList.UploadedChunkLengths {
|
||||
binary.LittleEndian.PutUint64(buffer, uint64(i))
|
||||
hasher.Write(buffer)
|
||||
}
|
||||
|
||||
for _, s := range entryList.PreservedChunkHashes {
|
||||
hasher.Write([]byte(s))
|
||||
}
|
||||
|
||||
for _, i := range entryList.PreservedChunkLengths {
|
||||
binary.LittleEndian.PutUint64(buffer, uint64(i))
|
||||
hasher.Write(buffer)
|
||||
}
|
||||
|
||||
for _, entry := range entryList.ModifiedEntries {
|
||||
binary.LittleEndian.PutUint64(buffer, uint64(entry.Size))
|
||||
hasher.Write(buffer)
|
||||
hasher.Write([]byte(entry.Hash))
|
||||
}
|
||||
|
||||
return hex.EncodeToString(hasher.Sum(nil))
|
||||
}
|
||||
|
||||
// Check if all chunks exist in 'chunkCache'
|
||||
func (entryList *EntryList) CheckChunks(config *Config, chunkCache map[string]bool) bool {
|
||||
for _, chunkHash := range entryList.UploadedChunkHashes {
|
||||
chunkID := config.GetChunkIDFromHash(chunkHash)
|
||||
if _, ok := chunkCache[chunkID]; !ok {
|
||||
return false
|
||||
}
|
||||
}
|
||||
for _, chunkHash := range entryList.PreservedChunkHashes {
|
||||
chunkID := config.GetChunkIDFromHash(chunkHash)
|
||||
if _, ok := chunkCache[chunkID]; !ok {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
|
||||
}
|
||||
|
||||
// Recover the on disk file from 'incomplete_snapshot', and restore the EntryList struct
|
||||
// from 'incomplete_chunks'
|
||||
func loadIncompleteSnapshot(snapshotID string, cachePath string) *EntryList {
|
||||
|
||||
onDiskFilePath := path.Join(cachePath, "incomplete_snapshot")
|
||||
entryListFilePath := path.Join(cachePath, "incomplete_chunks")
|
||||
|
||||
if _, err := os.Stat(onDiskFilePath); os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
|
||||
if _, err := os.Stat(entryListFilePath); os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
|
||||
entryList := &EntryList {}
|
||||
entryListFile, err := os.OpenFile(entryListFilePath, os.O_RDONLY, 0600)
|
||||
if err != nil {
|
||||
LOG_WARN("INCOMPLETE_LOAD", "Failed to open the incomplete snapshot: %v", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
defer entryListFile.Close()
|
||||
decoder := msgpack.NewDecoder(entryListFile)
|
||||
err = decoder.Decode(&entryList)
|
||||
if err != nil {
|
||||
LOG_WARN("INCOMPLETE_LOAD", "Failed to load the incomplete snapshot: %v", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
checksum := entryList.CalculateChecksum()
|
||||
if checksum != entryList.Checksum {
|
||||
LOG_WARN("INCOMPLETE_LOAD", "Failed to load the incomplete snapshot: checksum mismatched")
|
||||
return nil
|
||||
}
|
||||
|
||||
onDiskFile, err := os.OpenFile(onDiskFilePath, os.O_RDONLY, 0600)
|
||||
if err != nil {
|
||||
LOG_WARN("INCOMPLETE_LOAD", "Failed to open the on disk file for the incomplete snapshot: %v", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
decoder = msgpack.NewDecoder(onDiskFile)
|
||||
token, err := decoder.DecodeString()
|
||||
if err != nil {
|
||||
LOG_WARN("INCOMPLETE_LOAD", "Failed to read the token for the incomplete snapshot: %v", err)
|
||||
onDiskFile.Close()
|
||||
return nil
|
||||
}
|
||||
|
||||
if token != entryList.Token {
|
||||
LOG_WARN("INCOMPLETE_LOAD", "Mismatched tokens in the incomplete snapshot")
|
||||
onDiskFile.Close()
|
||||
return nil
|
||||
}
|
||||
|
||||
entryList.onDiskFile = onDiskFile
|
||||
|
||||
for i, hash := range entryList.UploadedChunkHashes {
|
||||
if len(hash) == 0 {
|
||||
// An empty hash means the chunk has not been uploaded in previous run
|
||||
entryList.UploadedChunkHashes = entryList.UploadedChunkHashes[0:i]
|
||||
entryList.UploadedChunkLengths = entryList.UploadedChunkLengths[0:i]
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
LOG_INFO("INCOMPLETE_LOAD", "Previous incomlete backup contains %d files and %d chunks",
|
||||
entryList.NumberOfEntries, len(entryList.PreservedChunkLengths) + len(entryList.UploadedChunkHashes))
|
||||
|
||||
return entryList
|
||||
}
|
||||
|
||||
// Delete the two incomplete files.
|
||||
func deleteIncompleteSnapshot(cachePath string) {
|
||||
|
||||
for _, file := range []string{"incomplete_snapshot", "incomplete_chunks"} {
|
||||
filePath := path.Join(cachePath, file)
|
||||
if _, err := os.Stat(filePath); err == nil {
|
||||
err = os.Remove(filePath)
|
||||
if err != nil {
|
||||
LOG_WARN("INCOMPLETE_REMOVE", "Failed to remove the incomplete snapshot: %v", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
179
src/duplicacy_entrylist_test.go
Normal file
179
src/duplicacy_entrylist_test.go
Normal file
@@ -0,0 +1,179 @@
|
||||
// Copyright (c) Acrosync LLC. All rights reserved.
|
||||
// Free for personal use and commercial trial
|
||||
// Commercial use requires per-user licenses available from https://duplicacy.com
|
||||
|
||||
package duplicacy
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path"
|
||||
"time"
|
||||
"testing"
|
||||
"math/rand"
|
||||
)
|
||||
|
||||
|
||||
func generateRandomString(length int) string {
|
||||
var letters = []rune("abcdefghijklmnopqrstuvwxyz")
|
||||
b := make([]rune, length)
|
||||
for i := range b {
|
||||
b[i] = letters[rand.Intn(len(letters))]
|
||||
}
|
||||
return string(b)
|
||||
}
|
||||
|
||||
var fileSizeGenerator = rand.NewZipf(rand.New(rand.NewSource(time.Now().UnixNano())), 1.2, 1.0, 1024)
|
||||
|
||||
func generateRandomFileSize() int64 {
|
||||
return int64(fileSizeGenerator.Uint64() + 1)
|
||||
}
|
||||
|
||||
func generateRandomChunks(totalFileSize int64) (chunks []string, lengths []int) {
|
||||
|
||||
totalChunkSize := int64(0)
|
||||
|
||||
for totalChunkSize < totalFileSize {
|
||||
chunks = append(chunks, generateRandomString(64))
|
||||
chunkSize := int64(1 + (rand.Int() % 64))
|
||||
if chunkSize + totalChunkSize > totalFileSize {
|
||||
chunkSize = totalFileSize - totalChunkSize
|
||||
}
|
||||
lengths = append(lengths, int(chunkSize))
|
||||
totalChunkSize += chunkSize
|
||||
}
|
||||
return chunks, lengths
|
||||
|
||||
}
|
||||
|
||||
func getPreservedChunks(entries []*Entry, chunks []string, lengths []int) (preservedChunks []string, preservedChunkLengths []int) {
|
||||
lastPreservedChunk := -1
|
||||
for i := range entries {
|
||||
if entries[i].Size < 0 {
|
||||
continue
|
||||
}
|
||||
delta := entries[i].StartChunk - len(chunks)
|
||||
if lastPreservedChunk != entries[i].StartChunk {
|
||||
lastPreservedChunk = entries[i].StartChunk
|
||||
preservedChunks = append(preservedChunks, chunks[entries[i].StartChunk])
|
||||
preservedChunkLengths = append(preservedChunkLengths, lengths[entries[i].StartChunk])
|
||||
delta++
|
||||
}
|
||||
for j := entries[i].StartChunk + 1; i <= entries[i].EndChunk; i++ {
|
||||
preservedChunks = append(preservedChunks, chunks[j])
|
||||
preservedChunkLengths = append(preservedChunkLengths, lengths[j])
|
||||
lastPreservedChunk = j
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func testEntryList(t *testing.T, numberOfEntries int, maximumInMemoryEntries int) {
|
||||
|
||||
entries := make([]*Entry, 0, numberOfEntries)
|
||||
entrySizes := make([]int64, 0)
|
||||
|
||||
for i := 0; i < numberOfEntries; i++ {
|
||||
entry:= CreateEntry(generateRandomString(16), -1, 0, 0700)
|
||||
entries = append(entries, entry)
|
||||
entrySizes = append(entrySizes, generateRandomFileSize())
|
||||
}
|
||||
|
||||
totalFileSize := int64(0)
|
||||
for _, size := range entrySizes {
|
||||
totalFileSize += size
|
||||
}
|
||||
|
||||
testDir := path.Join(os.TempDir(), "duplicacy_test")
|
||||
os.RemoveAll(testDir)
|
||||
os.MkdirAll(testDir, 0700)
|
||||
|
||||
os.MkdirAll(testDir + "/list1", 0700)
|
||||
os.MkdirAll(testDir + "/list2", 0700)
|
||||
os.MkdirAll(testDir + "/list3", 0700)
|
||||
os.MkdirAll(testDir + "/list1", 0700)
|
||||
|
||||
// For the first entry list, all entries are new
|
||||
entryList, _ := CreateEntryList("test", testDir + "/list1", maximumInMemoryEntries)
|
||||
for _, entry := range entries {
|
||||
entryList.AddEntry(entry)
|
||||
}
|
||||
uploadedChunks, uploadedChunksLengths := generateRandomChunks(totalFileSize)
|
||||
for i, chunk := range uploadedChunks {
|
||||
entryList.AddUploadedChunk(i, chunk, uploadedChunksLengths[i])
|
||||
}
|
||||
|
||||
for i := range entryList.ModifiedEntries {
|
||||
entryList.ModifiedEntries[i].Size = entrySizes[i]
|
||||
}
|
||||
|
||||
totalEntries := 0
|
||||
err := entryList.ReadEntries(func(entry *Entry) error {
|
||||
totalEntries++
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("ReadEntries returned an error: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
if totalEntries != numberOfEntries {
|
||||
t.Errorf("EntryList contains %d entries instead of %d", totalEntries, numberOfEntries)
|
||||
return
|
||||
}
|
||||
|
||||
// For the second entry list, half of the entries are new
|
||||
for i := range entries {
|
||||
if rand.Int() % 1 == 0 {
|
||||
entries[i].Size = -1
|
||||
} else {
|
||||
entries[i].Size = entrySizes[i]
|
||||
}
|
||||
}
|
||||
|
||||
preservedChunks, preservedChunkLengths := getPreservedChunks(entries, uploadedChunks, uploadedChunksLengths)
|
||||
entryList, _ = CreateEntryList("test", testDir + "/list2", maximumInMemoryEntries)
|
||||
for _, entry := range entries {
|
||||
entryList.AddEntry(entry)
|
||||
}
|
||||
for i, chunk := range preservedChunks {
|
||||
entryList.AddPreservedChunk(chunk, preservedChunkLengths[i])
|
||||
}
|
||||
|
||||
totalFileSize = 0
|
||||
for i := range entryList.ModifiedEntries {
|
||||
fileSize := generateRandomFileSize()
|
||||
entryList.ModifiedEntries[i].Size = fileSize
|
||||
totalFileSize += fileSize
|
||||
}
|
||||
|
||||
uploadedChunks, uploadedChunksLengths = generateRandomChunks(totalFileSize)
|
||||
for i, chunk := range uploadedChunks {
|
||||
entryList.AddUploadedChunk(i, chunk, uploadedChunksLengths[i])
|
||||
}
|
||||
|
||||
totalEntries = 0
|
||||
err = entryList.ReadEntries(func(entry *Entry) error {
|
||||
totalEntries++
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("ReadEntries returned an error: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
if totalEntries != numberOfEntries {
|
||||
t.Errorf("EntryList contains %d entries instead of %d", totalEntries, numberOfEntries)
|
||||
return
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
func TestEntryList(t *testing.T) {
|
||||
testEntryList(t, 1024, 1024)
|
||||
testEntryList(t, 1024, 512)
|
||||
testEntryList(t, 1024, 0)
|
||||
}
|
||||
@@ -8,17 +8,22 @@ import (
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
"sort"
|
||||
"bytes"
|
||||
|
||||
"github.com/vmihailenco/msgpack"
|
||||
|
||||
)
|
||||
|
||||
// Snapshot represents a backup of the repository.
|
||||
type Snapshot struct {
|
||||
Version int
|
||||
ID string // the snapshot id; must be different for different repositories
|
||||
Revision int // the revision number
|
||||
Options string // options used to create this snapshot (some not included)
|
||||
@@ -37,14 +42,11 @@ type Snapshot struct {
|
||||
// A sequence of chunks whose aggregated content is the json representation of 'ChunkLengths'.
|
||||
LengthSequence []string
|
||||
|
||||
Files []*Entry // list of files and subdirectories
|
||||
|
||||
ChunkHashes []string // a sequence of chunks representing the file content
|
||||
ChunkLengths []int // the length of each chunk
|
||||
|
||||
Flag bool // used to mark certain snapshots for deletion or copy
|
||||
|
||||
discardAttributes bool
|
||||
}
|
||||
|
||||
// CreateEmptySnapshot creates an empty snapshot.
|
||||
@@ -56,16 +58,14 @@ func CreateEmptySnapshot(id string) (snapshto *Snapshot) {
|
||||
}
|
||||
}
|
||||
|
||||
// CreateSnapshotFromDirectory creates a snapshot from the local directory 'top'. Only 'Files'
|
||||
// will be constructed, while 'ChunkHashes' and 'ChunkLengths' can only be populated after uploading.
|
||||
func CreateSnapshotFromDirectory(id string, top string, nobackupFile string, filtersFile string, excludeByAttribute bool) (snapshot *Snapshot, skippedDirectories []string,
|
||||
skippedFiles []string, err error) {
|
||||
type DirectoryListing struct {
|
||||
directory string
|
||||
files *[]Entry
|
||||
}
|
||||
|
||||
snapshot = &Snapshot{
|
||||
ID: id,
|
||||
Revision: 0,
|
||||
StartTime: time.Now().Unix(),
|
||||
}
|
||||
func (snapshot *Snapshot) ListLocalFiles(top string, nobackupFile string,
|
||||
filtersFile string, excludeByAttribute bool, listingChannel chan *Entry,
|
||||
skippedDirectories *[]string, skippedFiles *[]string) {
|
||||
|
||||
var patterns []string
|
||||
|
||||
@@ -77,45 +77,128 @@ func CreateSnapshotFromDirectory(id string, top string, nobackupFile string, fil
|
||||
directories := make([]*Entry, 0, 256)
|
||||
directories = append(directories, CreateEntry("", 0, 0, 0))
|
||||
|
||||
snapshot.Files = make([]*Entry, 0, 256)
|
||||
|
||||
attributeThreshold := 1024 * 1024
|
||||
if attributeThresholdValue, found := os.LookupEnv("DUPLICACY_ATTRIBUTE_THRESHOLD"); found && attributeThresholdValue != "" {
|
||||
attributeThreshold, _ = strconv.Atoi(attributeThresholdValue)
|
||||
}
|
||||
|
||||
for len(directories) > 0 {
|
||||
|
||||
directory := directories[len(directories)-1]
|
||||
directories = directories[:len(directories)-1]
|
||||
snapshot.Files = append(snapshot.Files, directory)
|
||||
subdirectories, skipped, err := ListEntries(top, directory.Path, &snapshot.Files, patterns, nobackupFile, snapshot.discardAttributes, excludeByAttribute)
|
||||
subdirectories, skipped, err := ListEntries(top, directory.Path, patterns, nobackupFile, excludeByAttribute, listingChannel)
|
||||
if err != nil {
|
||||
if directory.Path == "" {
|
||||
LOG_ERROR("LIST_FAILURE", "Failed to list the repository root: %v", err)
|
||||
return nil, nil, nil, err
|
||||
return
|
||||
}
|
||||
LOG_WARN("LIST_FAILURE", "Failed to list subdirectory %s: %v", directory.Path, err)
|
||||
skippedDirectories = append(skippedDirectories, directory.Path)
|
||||
if skippedDirectories != nil {
|
||||
*skippedDirectories = append(*skippedDirectories, directory.Path)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
directories = append(directories, subdirectories...)
|
||||
skippedFiles = append(skippedFiles, skipped...)
|
||||
|
||||
if !snapshot.discardAttributes && len(snapshot.Files) > attributeThreshold {
|
||||
LOG_INFO("LIST_ATTRIBUTES", "Discarding file attributes")
|
||||
snapshot.discardAttributes = true
|
||||
for _, file := range snapshot.Files {
|
||||
file.Attributes = nil
|
||||
}
|
||||
if skippedFiles != nil {
|
||||
*skippedFiles = append(*skippedFiles, skipped...)
|
||||
}
|
||||
|
||||
}
|
||||
close(listingChannel)
|
||||
}
|
||||
|
||||
func (snapshot *Snapshot)ListRemoteFiles(config *Config, chunkOperator *ChunkOperator, entryOut func(*Entry) bool) {
|
||||
|
||||
var chunks []string
|
||||
for _, chunkHash := range snapshot.FileSequence {
|
||||
chunks = append(chunks, chunkOperator.config.GetChunkIDFromHash(chunkHash))
|
||||
}
|
||||
|
||||
// Remove the root entry
|
||||
snapshot.Files = snapshot.Files[1:]
|
||||
var chunk *Chunk
|
||||
reader := sequenceReader{
|
||||
sequence: snapshot.FileSequence,
|
||||
buffer: new(bytes.Buffer),
|
||||
refillFunc: func(chunkHash string) []byte {
|
||||
if chunk != nil {
|
||||
config.PutChunk(chunk)
|
||||
}
|
||||
chunk = chunkOperator.Download(chunkHash, 0, true)
|
||||
return chunk.GetBytes()
|
||||
},
|
||||
}
|
||||
|
||||
if snapshot.Version == 0 {
|
||||
LOG_INFO("SNAPSHOT_VERSION", "snapshot %s at revision %d is encoded in an old version format", snapshot.ID, snapshot.Revision)
|
||||
files := make([]*Entry, 0)
|
||||
decoder := json.NewDecoder(&reader)
|
||||
|
||||
// read open bracket
|
||||
_, err := decoder.Token()
|
||||
if err != nil {
|
||||
LOG_ERROR("SNAPSHOT_PARSE", "Failed to open the snapshot %s at revision %d: not a list of entries",
|
||||
snapshot.ID, snapshot.Revision)
|
||||
return
|
||||
}
|
||||
|
||||
for decoder.More() {
|
||||
var entry Entry
|
||||
err = decoder.Decode(&entry)
|
||||
if err != nil {
|
||||
LOG_ERROR("SNAPSHOT_PARSE", "Failed to load files specified in the snapshot %s at revision %d: %v",
|
||||
snapshot.ID, snapshot.Revision, err)
|
||||
return
|
||||
}
|
||||
files = append(files, &entry)
|
||||
}
|
||||
|
||||
sort.Sort(ByName(files))
|
||||
|
||||
for _, file := range files {
|
||||
if !entryOut(file) {
|
||||
return
|
||||
}
|
||||
}
|
||||
} else if snapshot.Version == 1 {
|
||||
decoder := msgpack.NewDecoder(&reader)
|
||||
|
||||
lastEndChunk := 0
|
||||
|
||||
// while the array contains values
|
||||
for _, err := decoder.PeekCode(); err != io.EOF; _, err = decoder.PeekCode() {
|
||||
if err != nil {
|
||||
LOG_ERROR("SNAPSHOT_PARSE", "Failed to parse the snapshot %s at revision %d: %v",
|
||||
snapshot.ID, snapshot.Revision, err)
|
||||
return
|
||||
}
|
||||
var entry Entry
|
||||
err = decoder.Decode(&entry)
|
||||
if err != nil {
|
||||
LOG_ERROR("SNAPSHOT_PARSE", "Failed to load the snapshot %s at revision %d: %v",
|
||||
snapshot.ID, snapshot.Revision, err)
|
||||
return
|
||||
}
|
||||
|
||||
if entry.IsFile() {
|
||||
entry.StartChunk += lastEndChunk
|
||||
entry.EndChunk += entry.StartChunk
|
||||
lastEndChunk = entry.EndChunk
|
||||
}
|
||||
|
||||
err = entry.check(snapshot.ChunkLengths)
|
||||
if err != nil {
|
||||
LOG_ERROR("SNAPSHOT_ENTRY", "Failed to load the snapshot %s at revision %d: %v",
|
||||
snapshot.ID, snapshot.Revision, err)
|
||||
return
|
||||
}
|
||||
|
||||
if !entryOut(&entry) {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
LOG_ERROR("SNAPSHOT_VERSION", "snapshot %s at revision %d is encoded in unsupported version %d format",
|
||||
snapshot.ID, snapshot.Revision, snapshot.Version)
|
||||
return
|
||||
}
|
||||
|
||||
return snapshot, skippedDirectories, skippedFiles, nil
|
||||
}
|
||||
|
||||
func AppendPattern(patterns []string, new_pattern string) (new_patterns []string) {
|
||||
@@ -215,100 +298,6 @@ func ProcessFilterLines(patternFileLines []string, includedFiles []string) (patt
|
||||
return patterns
|
||||
}
|
||||
|
||||
// This is the struct used to save/load incomplete snapshots
|
||||
type IncompleteSnapshot struct {
|
||||
Files []*Entry
|
||||
ChunkHashes []string
|
||||
ChunkLengths []int
|
||||
}
|
||||
|
||||
// LoadIncompleteSnapshot loads the incomplete snapshot if it exists
|
||||
func LoadIncompleteSnapshot() (snapshot *Snapshot) {
|
||||
snapshotFile := path.Join(GetDuplicacyPreferencePath(), "incomplete")
|
||||
description, err := ioutil.ReadFile(snapshotFile)
|
||||
if err != nil {
|
||||
LOG_DEBUG("INCOMPLETE_LOCATE", "Failed to locate incomplete snapshot: %v", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
var incompleteSnapshot IncompleteSnapshot
|
||||
|
||||
err = json.Unmarshal(description, &incompleteSnapshot)
|
||||
if err != nil {
|
||||
LOG_DEBUG("INCOMPLETE_PARSE", "Failed to parse incomplete snapshot: %v", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
var chunkHashes []string
|
||||
for _, chunkHash := range incompleteSnapshot.ChunkHashes {
|
||||
hash, err := hex.DecodeString(chunkHash)
|
||||
if err != nil {
|
||||
LOG_DEBUG("INCOMPLETE_DECODE", "Failed to decode incomplete snapshot: %v", err)
|
||||
return nil
|
||||
}
|
||||
chunkHashes = append(chunkHashes, string(hash))
|
||||
}
|
||||
|
||||
snapshot = &Snapshot{
|
||||
Files: incompleteSnapshot.Files,
|
||||
ChunkHashes: chunkHashes,
|
||||
ChunkLengths: incompleteSnapshot.ChunkLengths,
|
||||
}
|
||||
LOG_INFO("INCOMPLETE_LOAD", "Incomplete snapshot loaded from %s", snapshotFile)
|
||||
return snapshot
|
||||
}
|
||||
|
||||
// SaveIncompleteSnapshot saves the incomplete snapshot under the preference directory
|
||||
func SaveIncompleteSnapshot(snapshot *Snapshot) {
|
||||
var files []*Entry
|
||||
for _, file := range snapshot.Files {
|
||||
// All unprocessed files will have a size of -1
|
||||
if file.Size >= 0 {
|
||||
file.Attributes = nil
|
||||
files = append(files, file)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
var chunkHashes []string
|
||||
for _, chunkHash := range snapshot.ChunkHashes {
|
||||
chunkHashes = append(chunkHashes, hex.EncodeToString([]byte(chunkHash)))
|
||||
}
|
||||
|
||||
incompleteSnapshot := IncompleteSnapshot{
|
||||
Files: files,
|
||||
ChunkHashes: chunkHashes,
|
||||
ChunkLengths: snapshot.ChunkLengths,
|
||||
}
|
||||
|
||||
description, err := json.MarshalIndent(incompleteSnapshot, "", " ")
|
||||
if err != nil {
|
||||
LOG_WARN("INCOMPLETE_ENCODE", "Failed to encode the incomplete snapshot: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
snapshotFile := path.Join(GetDuplicacyPreferencePath(), "incomplete")
|
||||
err = ioutil.WriteFile(snapshotFile, description, 0644)
|
||||
if err != nil {
|
||||
LOG_WARN("INCOMPLETE_WRITE", "Failed to save the incomplete snapshot: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
LOG_INFO("INCOMPLETE_SAVE", "Incomplete snapshot saved to %s", snapshotFile)
|
||||
}
|
||||
|
||||
func RemoveIncompleteSnapshot() {
|
||||
snapshotFile := path.Join(GetDuplicacyPreferencePath(), "incomplete")
|
||||
if stat, err := os.Stat(snapshotFile); err == nil && !stat.IsDir() {
|
||||
err = os.Remove(snapshotFile)
|
||||
if err != nil {
|
||||
LOG_INFO("INCOMPLETE_SAVE", "Failed to remove ncomplete snapshot: %v", err)
|
||||
} else {
|
||||
LOG_INFO("INCOMPLETE_SAVE", "Removed incomplete snapshot %s", snapshotFile)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// CreateSnapshotFromDescription creates a snapshot from json decription.
|
||||
func CreateSnapshotFromDescription(description []byte) (snapshot *Snapshot, err error) {
|
||||
|
||||
@@ -321,6 +310,14 @@ func CreateSnapshotFromDescription(description []byte) (snapshot *Snapshot, err
|
||||
|
||||
snapshot = &Snapshot{}
|
||||
|
||||
if value, ok := root["version"]; !ok {
|
||||
snapshot.Version = 0
|
||||
} else if version, ok := value.(float64); !ok {
|
||||
return nil, fmt.Errorf("Invalid version is specified in the snapshot")
|
||||
} else {
|
||||
snapshot.Version = int(version)
|
||||
}
|
||||
|
||||
if value, ok := root["id"]; !ok {
|
||||
return nil, fmt.Errorf("No id is specified in the snapshot")
|
||||
} else if snapshot.ID, ok = value.(string); !ok {
|
||||
@@ -437,6 +434,7 @@ func (snapshot *Snapshot) MarshalJSON() ([]byte, error) {
|
||||
|
||||
object := make(map[string]interface{})
|
||||
|
||||
object["version"] = 1
|
||||
object["id"] = snapshot.ID
|
||||
object["revision"] = snapshot.Revision
|
||||
object["options"] = snapshot.Options
|
||||
@@ -458,9 +456,7 @@ func (snapshot *Snapshot) MarshalJSON() ([]byte, error) {
|
||||
// MarshalSequence creates a json represetion for the specified chunk sequence.
|
||||
func (snapshot *Snapshot) MarshalSequence(sequenceType string) ([]byte, error) {
|
||||
|
||||
if sequenceType == "files" {
|
||||
return json.Marshal(snapshot.Files)
|
||||
} else if sequenceType == "chunks" {
|
||||
if sequenceType == "chunks" {
|
||||
return json.Marshal(encodeSequence(snapshot.ChunkHashes))
|
||||
} else {
|
||||
return json.Marshal(snapshot.ChunkLengths)
|
||||
@@ -489,3 +485,4 @@ func encodeSequence(sequence []string) []string {
|
||||
|
||||
return sequenceInHex
|
||||
}
|
||||
|
||||
|
||||
@@ -20,6 +20,8 @@ import (
|
||||
"strings"
|
||||
"text/tabwriter"
|
||||
"time"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/aryann/difflib"
|
||||
)
|
||||
@@ -189,7 +191,6 @@ type SnapshotManager struct {
|
||||
fileChunk *Chunk
|
||||
snapshotCache *FileStorage
|
||||
|
||||
chunkDownloader *ChunkDownloader
|
||||
chunkOperator *ChunkOperator
|
||||
}
|
||||
|
||||
@@ -268,72 +269,26 @@ func (reader *sequenceReader) Read(data []byte) (n int, err error) {
|
||||
return reader.buffer.Read(data)
|
||||
}
|
||||
|
||||
func (manager *SnapshotManager) CreateChunkDownloader() {
|
||||
if manager.chunkDownloader == nil {
|
||||
manager.chunkDownloader = CreateChunkDownloader(manager.config, manager.storage, manager.snapshotCache, false, 1, false)
|
||||
func (manager *SnapshotManager) CreateChunkOperator(resurrect bool, threads int, allowFailures bool) {
|
||||
if manager.chunkOperator == nil {
|
||||
manager.chunkOperator = CreateChunkOperator(manager.config, manager.storage, manager.snapshotCache, resurrect, threads, allowFailures)
|
||||
}
|
||||
}
|
||||
|
||||
// DownloadSequence returns the content represented by a sequence of chunks.
|
||||
func (manager *SnapshotManager) DownloadSequence(sequence []string) (content []byte) {
|
||||
manager.CreateChunkDownloader()
|
||||
manager.CreateChunkOperator(false, 1, false)
|
||||
for _, chunkHash := range sequence {
|
||||
i := manager.chunkDownloader.AddChunk(chunkHash)
|
||||
chunk := manager.chunkDownloader.WaitForChunk(i)
|
||||
chunk := manager.chunkOperator.Download(chunkHash, 0, true)
|
||||
content = append(content, chunk.GetBytes()...)
|
||||
manager.config.PutChunk(chunk)
|
||||
}
|
||||
|
||||
return content
|
||||
}
|
||||
|
||||
func (manager *SnapshotManager) DownloadSnapshotFileSequence(snapshot *Snapshot, patterns []string, attributesNeeded bool) bool {
|
||||
|
||||
manager.CreateChunkDownloader()
|
||||
|
||||
reader := sequenceReader{
|
||||
sequence: snapshot.FileSequence,
|
||||
buffer: new(bytes.Buffer),
|
||||
refillFunc: func(chunkHash string) []byte {
|
||||
i := manager.chunkDownloader.AddChunk(chunkHash)
|
||||
chunk := manager.chunkDownloader.WaitForChunk(i)
|
||||
return chunk.GetBytes()
|
||||
},
|
||||
}
|
||||
|
||||
files := make([]*Entry, 0)
|
||||
decoder := json.NewDecoder(&reader)
|
||||
|
||||
// read open bracket
|
||||
_, err := decoder.Token()
|
||||
if err != nil {
|
||||
LOG_ERROR("SNAPSHOT_PARSE", "Failed to load files specified in the snapshot %s at revision %d: not a list of entries",
|
||||
snapshot.ID, snapshot.Revision)
|
||||
return false
|
||||
}
|
||||
|
||||
// while the array contains values
|
||||
for decoder.More() {
|
||||
var entry Entry
|
||||
err = decoder.Decode(&entry)
|
||||
if err != nil {
|
||||
LOG_ERROR("SNAPSHOT_PARSE", "Failed to load files specified in the snapshot %s at revision %d: %v",
|
||||
snapshot.ID, snapshot.Revision, err)
|
||||
return false
|
||||
}
|
||||
|
||||
// If we don't need the attributes or the file isn't included we clear the attributes to save memory
|
||||
if !attributesNeeded || (len(patterns) != 0 && !MatchPath(entry.Path, patterns)) {
|
||||
entry.Attributes = nil
|
||||
}
|
||||
|
||||
files = append(files, &entry)
|
||||
}
|
||||
snapshot.Files = files
|
||||
return true
|
||||
}
|
||||
|
||||
// DownloadSnapshotSequence downloads the content represented by a sequence of chunks, and then unmarshal the content
|
||||
// using the specified 'loadFunction'. It purpose is to decode the chunk sequences representing chunk hashes or chunk lengths
|
||||
// using the specified 'loadFunction'. Its purpose is to decode the chunk sequences representing chunk hashes or chunk lengths
|
||||
// in a snapshot.
|
||||
func (manager *SnapshotManager) DownloadSnapshotSequence(snapshot *Snapshot, sequenceType string) bool {
|
||||
|
||||
@@ -362,30 +317,21 @@ func (manager *SnapshotManager) DownloadSnapshotSequence(snapshot *Snapshot, seq
|
||||
return true
|
||||
}
|
||||
|
||||
// DownloadSnapshotContents loads all chunk sequences in a snapshot. A snapshot, when just created, only contains
|
||||
// some metadata and theree sequence representing files, chunk hashes, and chunk lengths. This function must be called
|
||||
// for the actual content of the snapshot to be usable.
|
||||
func (manager *SnapshotManager) DownloadSnapshotContents(snapshot *Snapshot, patterns []string, attributesNeeded bool) bool {
|
||||
// DownloadSnapshotSequences loads all chunk sequences in a snapshot. A snapshot, when just created, only contains
|
||||
// some metadata and three sequence representing files, chunk hashes, and chunk lengths. This function must be called
|
||||
// for the chunk hash sequence and chunk length sequence to be usable.
|
||||
func (manager *SnapshotManager) DownloadSnapshotSequences(snapshot *Snapshot) bool {
|
||||
|
||||
manager.DownloadSnapshotFileSequence(snapshot, patterns, attributesNeeded)
|
||||
manager.DownloadSnapshotSequence(snapshot, "chunks")
|
||||
manager.DownloadSnapshotSequence(snapshot, "lengths")
|
||||
|
||||
err := manager.CheckSnapshot(snapshot)
|
||||
if err != nil {
|
||||
LOG_ERROR("SNAPSHOT_CHECK", "The snapshot %s at revision %d contains an error: %v",
|
||||
snapshot.ID, snapshot.Revision, err)
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// ClearSnapshotContents removes contents loaded by DownloadSnapshotContents
|
||||
func (manager *SnapshotManager) ClearSnapshotContents(snapshot *Snapshot) {
|
||||
// ClearSnapshotContents removes sequences loaded by DownloadSnapshotSequences
|
||||
func (manager *SnapshotManager) ClearSnapshotSequences(snapshot *Snapshot) {
|
||||
snapshot.ChunkHashes = nil
|
||||
snapshot.ChunkLengths = nil
|
||||
snapshot.Files = nil
|
||||
}
|
||||
|
||||
// CleanSnapshotCache removes all files not referenced by the specified 'snapshot' in the snapshot cache.
|
||||
@@ -577,10 +523,6 @@ func (manager *SnapshotManager) downloadLatestSnapshot(snapshotID string) (remot
|
||||
remote = manager.DownloadSnapshot(snapshotID, latest)
|
||||
}
|
||||
|
||||
if remote != nil {
|
||||
manager.DownloadSnapshotContents(remote, nil, false)
|
||||
}
|
||||
|
||||
return remote
|
||||
}
|
||||
|
||||
@@ -712,6 +654,12 @@ func (manager *SnapshotManager) ListSnapshots(snapshotID string, revisionsToList
|
||||
LOG_DEBUG("LIST_PARAMETERS", "id: %s, revisions: %v, tag: %s, showFiles: %t, showChunks: %t",
|
||||
snapshotID, revisionsToList, tag, showFiles, showChunks)
|
||||
|
||||
manager.CreateChunkOperator(false, 1, false)
|
||||
defer func() {
|
||||
manager.chunkOperator.Stop()
|
||||
manager.chunkOperator = nil
|
||||
}()
|
||||
|
||||
var snapshotIDs []string
|
||||
var err error
|
||||
|
||||
@@ -749,14 +697,16 @@ func (manager *SnapshotManager) ListSnapshots(snapshotID string, revisionsToList
|
||||
if len(snapshot.Tag) > 0 {
|
||||
tagWithSpace = snapshot.Tag + " "
|
||||
}
|
||||
LOG_INFO("SNAPSHOT_INFO", "Snapshot %s revision %d created at %s %s%s",
|
||||
snapshotID, revision, creationTime, tagWithSpace, snapshot.Options)
|
||||
|
||||
if showFiles {
|
||||
manager.DownloadSnapshotFileSequence(snapshot, nil, false)
|
||||
options := snapshot.Options
|
||||
if snapshot.Version == 0 {
|
||||
options += " (0)"
|
||||
}
|
||||
LOG_INFO("SNAPSHOT_INFO", "Snapshot %s revision %d created at %s %s%s",
|
||||
snapshotID, revision, creationTime, tagWithSpace, options)
|
||||
|
||||
if showFiles {
|
||||
// We need to fill in ChunkHashes and ChunkLengths to verify that each entry is valid
|
||||
manager.DownloadSnapshotSequences(snapshot)
|
||||
|
||||
if snapshot.NumberOfFiles > 0 {
|
||||
LOG_INFO("SNAPSHOT_STATS", "Files: %d", snapshot.NumberOfFiles)
|
||||
@@ -768,7 +718,7 @@ func (manager *SnapshotManager) ListSnapshots(snapshotID string, revisionsToList
|
||||
totalFileSize := int64(0)
|
||||
lastChunk := 0
|
||||
|
||||
for _, file := range snapshot.Files {
|
||||
snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func(file *Entry)bool {
|
||||
if file.IsFile() {
|
||||
totalFiles++
|
||||
totalFileSize += file.Size
|
||||
@@ -780,17 +730,18 @@ func (manager *SnapshotManager) ListSnapshots(snapshotID string, revisionsToList
|
||||
lastChunk = file.EndChunk
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
for _, file := range snapshot.Files {
|
||||
snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func(file *Entry)bool {
|
||||
if file.IsFile() {
|
||||
LOG_INFO("SNAPSHOT_FILE", "%s", file.String(maxSizeDigits))
|
||||
}
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
metaChunks := len(snapshot.FileSequence) + len(snapshot.ChunkSequence) + len(snapshot.LengthSequence)
|
||||
LOG_INFO("SNAPSHOT_STATS", "Files: %d, total size: %d, file chunks: %d, metadata chunks: %d",
|
||||
totalFiles, totalFileSize, lastChunk+1, metaChunks)
|
||||
LOG_INFO("SNAPSHOT_STATS", "Total size: %d, file chunks: %d, metadata chunks: %d", totalFileSize, lastChunk+1, metaChunks)
|
||||
}
|
||||
|
||||
if showChunks {
|
||||
@@ -807,11 +758,15 @@ func (manager *SnapshotManager) ListSnapshots(snapshotID string, revisionsToList
|
||||
|
||||
}
|
||||
|
||||
// ListSnapshots shows the information about a snapshot.
|
||||
// CheckSnapshots checks if there is any problem with a snapshot.
|
||||
func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToCheck []int, tag string, showStatistics bool, showTabular bool,
|
||||
checkFiles bool, checkChunks, searchFossils bool, resurrect bool, threads int, allowFailures bool) bool {
|
||||
|
||||
manager.chunkDownloader = CreateChunkDownloader(manager.config, manager.storage, manager.snapshotCache, false, threads, allowFailures)
|
||||
manager.CreateChunkOperator(resurrect, threads, allowFailures)
|
||||
defer func() {
|
||||
manager.chunkOperator.Stop()
|
||||
manager.chunkOperator = nil
|
||||
}()
|
||||
|
||||
LOG_DEBUG("LIST_PARAMETERS", "id: %s, revisions: %v, tag: %s, showStatistics: %t, showTabular: %t, checkFiles: %t, searchFossils: %t, resurrect: %t",
|
||||
snapshotID, revisionsToCheck, tag, showStatistics, showTabular, checkFiles, searchFossils, resurrect)
|
||||
@@ -911,9 +866,9 @@ func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToChe
|
||||
for _, snapshot := range snapshotMap[snapshotID] {
|
||||
|
||||
if checkFiles {
|
||||
manager.DownloadSnapshotContents(snapshot, nil, false)
|
||||
manager.DownloadSnapshotSequences(snapshot)
|
||||
manager.VerifySnapshot(snapshot)
|
||||
manager.ClearSnapshotContents(snapshot)
|
||||
manager.ClearSnapshotSequences(snapshot)
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -1026,6 +981,7 @@ func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToChe
|
||||
// .duplicacy/cache/storage/verified_chunks. Note that it contains the chunk ids not chunk
|
||||
// hashes.
|
||||
verifiedChunks := make(map[string]int64)
|
||||
var verifiedChunksLock sync.Mutex
|
||||
verifiedChunksFile := "verified_chunks"
|
||||
|
||||
manager.fileChunk.Reset(false)
|
||||
@@ -1061,16 +1017,11 @@ func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToChe
|
||||
defer saveVerifiedChunks()
|
||||
RunAtError = saveVerifiedChunks
|
||||
|
||||
manager.chunkDownloader.snapshotCache = nil
|
||||
LOG_INFO("SNAPSHOT_VERIFY", "Verifying %d chunks", len(*allChunkHashes))
|
||||
|
||||
startTime := time.Now()
|
||||
var chunkHashes []string
|
||||
|
||||
// The index of the first chunk to add to the downloader, which may have already downloaded
|
||||
// some metadata chunks so the index doesn't start with 0.
|
||||
chunkIndex := -1
|
||||
|
||||
skippedChunks := 0
|
||||
for chunkHash := range *allChunkHashes {
|
||||
if len(verifiedChunks) > 0 {
|
||||
@@ -1081,38 +1032,65 @@ func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToChe
|
||||
}
|
||||
}
|
||||
chunkHashes = append(chunkHashes, chunkHash)
|
||||
if chunkIndex == -1 {
|
||||
chunkIndex = manager.chunkDownloader.AddChunk(chunkHash)
|
||||
} else {
|
||||
manager.chunkDownloader.AddChunk(chunkHash)
|
||||
}
|
||||
}
|
||||
|
||||
if skippedChunks > 0 {
|
||||
LOG_INFO("SNAPSHOT_VERIFY", "Skipped %d chunks that have already been verified before", skippedChunks)
|
||||
}
|
||||
|
||||
var downloadedChunkSize int64
|
||||
totalChunks := len(chunkHashes)
|
||||
for i := 0; i < totalChunks; i++ {
|
||||
chunk := manager.chunkDownloader.WaitForChunk(i + chunkIndex)
|
||||
chunkID := manager.config.GetChunkIDFromHash(chunkHashes[i])
|
||||
if chunk.isBroken {
|
||||
continue
|
||||
}
|
||||
verifiedChunks[chunkID] = startTime.Unix()
|
||||
downloadedChunkSize += int64(chunk.GetLength())
|
||||
var totalDownloadedChunkSize int64
|
||||
var totalDownloadedChunks int64
|
||||
totalChunks := int64(len(chunkHashes))
|
||||
|
||||
elapsedTime := time.Now().Sub(startTime).Seconds()
|
||||
speed := int64(float64(downloadedChunkSize) / elapsedTime)
|
||||
remainingTime := int64(float64(totalChunks - i - 1) / float64(i + 1) * elapsedTime)
|
||||
percentage := float64(i + 1) / float64(totalChunks) * 100.0
|
||||
LOG_INFO("VERIFY_PROGRESS", "Verified chunk %s (%d/%d), %sB/s %s %.1f%%",
|
||||
chunkID, i + 1, totalChunks, PrettySize(speed), PrettyTime(remainingTime), percentage)
|
||||
chunkChannel := make(chan int, threads)
|
||||
var wg sync.WaitGroup
|
||||
|
||||
wg.Add(threads)
|
||||
for i := 0; i < threads; i++ {
|
||||
go func() {
|
||||
defer CatchLogException()
|
||||
|
||||
for {
|
||||
chunkIndex, ok := <- chunkChannel
|
||||
if !ok {
|
||||
wg.Done()
|
||||
return
|
||||
}
|
||||
|
||||
chunk := manager.chunkOperator.Download(chunkHashes[chunkIndex], chunkIndex, false)
|
||||
if chunk == nil {
|
||||
continue
|
||||
}
|
||||
chunkID := manager.config.GetChunkIDFromHash(chunkHashes[chunkIndex])
|
||||
verifiedChunksLock.Lock()
|
||||
verifiedChunks[chunkID] = startTime.Unix()
|
||||
verifiedChunksLock.Unlock()
|
||||
|
||||
downloadedChunkSize := atomic.AddInt64(&totalDownloadedChunkSize, int64(chunk.GetLength()))
|
||||
downloadedChunks := atomic.AddInt64(&totalDownloadedChunks, 1)
|
||||
|
||||
elapsedTime := time.Now().Sub(startTime).Seconds()
|
||||
speed := int64(float64(downloadedChunkSize) / elapsedTime)
|
||||
remainingTime := int64(float64(totalChunks - downloadedChunks) / float64(downloadedChunks) * elapsedTime)
|
||||
percentage := float64(downloadedChunks) / float64(totalChunks) * 100.0
|
||||
LOG_INFO("VERIFY_PROGRESS", "Verified chunk %s (%d/%d), %sB/s %s %.1f%%",
|
||||
chunkID, downloadedChunks, totalChunks, PrettySize(speed), PrettyTime(remainingTime), percentage)
|
||||
|
||||
manager.config.PutChunk(chunk)
|
||||
}
|
||||
} ()
|
||||
}
|
||||
|
||||
if manager.chunkDownloader.NumberOfFailedChunks > 0 {
|
||||
LOG_ERROR("SNAPSHOT_VERIFY", "%d out of %d chunks are corrupted", manager.chunkDownloader.NumberOfFailedChunks, len(*allChunkHashes))
|
||||
for chunkIndex := range chunkHashes {
|
||||
chunkChannel <- chunkIndex
|
||||
}
|
||||
|
||||
close(chunkChannel)
|
||||
wg.Wait()
|
||||
manager.chunkOperator.WaitForCompletion()
|
||||
|
||||
if manager.chunkOperator.NumberOfFailedChunks > 0 {
|
||||
LOG_ERROR("SNAPSHOT_VERIFY", "%d out of %d chunks are corrupted", manager.chunkOperator.NumberOfFailedChunks, len(*allChunkHashes))
|
||||
} else {
|
||||
LOG_INFO("SNAPSHOT_VERIFY", "All %d chunks have been successfully verified", len(*allChunkHashes))
|
||||
}
|
||||
@@ -1280,14 +1258,6 @@ func (manager *SnapshotManager) PrintSnapshot(snapshot *Snapshot) bool {
|
||||
object["chunks"] = manager.ConvertSequence(snapshot.ChunkHashes)
|
||||
object["lengths"] = snapshot.ChunkLengths
|
||||
|
||||
// By default the json serialization of a file entry contains the path in base64 format. This is
|
||||
// to convert every file entry into an object which include the path in a more readable format.
|
||||
var files []map[string]interface{}
|
||||
for _, file := range snapshot.Files {
|
||||
files = append(files, file.convertToObject(false))
|
||||
}
|
||||
object["files"] = files
|
||||
|
||||
description, err := json.MarshalIndent(object, "", " ")
|
||||
|
||||
if err != nil {
|
||||
@@ -1296,8 +1266,24 @@ func (manager *SnapshotManager) PrintSnapshot(snapshot *Snapshot) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
fmt.Printf("%s\n", string(description))
|
||||
// Don't print the ending bracket
|
||||
fmt.Printf("%s", string(description[:len(description) - 2]))
|
||||
fmt.Printf(",\n \"files\": [\n")
|
||||
isFirstFile := true
|
||||
snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func (file *Entry) bool {
|
||||
|
||||
fileDescription, _ := json.MarshalIndent(file.convertToObject(false), "", " ")
|
||||
|
||||
if isFirstFile {
|
||||
fmt.Printf("%s", fileDescription)
|
||||
isFirstFile = false
|
||||
} else {
|
||||
fmt.Printf(",\n%s", fileDescription)
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
fmt.Printf(" ]\n}\n")
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -1313,17 +1299,20 @@ func (manager *SnapshotManager) VerifySnapshot(snapshot *Snapshot) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
files := make([]*Entry, 0, len(snapshot.Files)/2)
|
||||
for _, file := range snapshot.Files {
|
||||
files := make([]*Entry, 0)
|
||||
snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func (file *Entry) bool {
|
||||
if file.IsFile() && file.Size != 0 {
|
||||
file.Attributes = nil
|
||||
files = append(files, file)
|
||||
}
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
sort.Sort(ByChunk(files))
|
||||
corruptedFiles := 0
|
||||
var lastChunk *Chunk
|
||||
for _, file := range files {
|
||||
if !manager.RetrieveFile(snapshot, file, func([]byte) {}) {
|
||||
if !manager.RetrieveFile(snapshot, file, &lastChunk, func([]byte) {}) {
|
||||
corruptedFiles++
|
||||
}
|
||||
LOG_TRACE("SNAPSHOT_VERIFY", "%s", file.Path)
|
||||
@@ -1341,21 +1330,13 @@ func (manager *SnapshotManager) VerifySnapshot(snapshot *Snapshot) bool {
|
||||
}
|
||||
|
||||
// RetrieveFile retrieves the file in the specified snapshot.
|
||||
func (manager *SnapshotManager) RetrieveFile(snapshot *Snapshot, file *Entry, output func([]byte)) bool {
|
||||
func (manager *SnapshotManager) RetrieveFile(snapshot *Snapshot, file *Entry, lastChunk **Chunk, output func([]byte)) bool {
|
||||
|
||||
if file.Size == 0 {
|
||||
return true
|
||||
}
|
||||
|
||||
manager.CreateChunkDownloader()
|
||||
|
||||
// Temporarily disable the snapshot cache of the download so that downloaded file chunks won't be saved
|
||||
// to the cache.
|
||||
snapshotCache := manager.chunkDownloader.snapshotCache
|
||||
manager.chunkDownloader.snapshotCache = nil
|
||||
defer func() {
|
||||
manager.chunkDownloader.snapshotCache = snapshotCache
|
||||
}()
|
||||
manager.CreateChunkOperator(false, 1, false)
|
||||
|
||||
fileHasher := manager.config.NewFileHasher()
|
||||
alternateHash := false
|
||||
@@ -1376,12 +1357,19 @@ func (manager *SnapshotManager) RetrieveFile(snapshot *Snapshot, file *Entry, ou
|
||||
}
|
||||
|
||||
hash := snapshot.ChunkHashes[i]
|
||||
lastChunk, lastChunkHash := manager.chunkDownloader.GetLastDownloadedChunk()
|
||||
if lastChunkHash != hash {
|
||||
i := manager.chunkDownloader.AddChunk(hash)
|
||||
chunk = manager.chunkDownloader.WaitForChunk(i)
|
||||
if lastChunk == nil {
|
||||
chunk = manager.chunkOperator.Download(hash, 0, false)
|
||||
} else if *lastChunk == nil {
|
||||
chunk = manager.chunkOperator.Download(hash, 0, false)
|
||||
*lastChunk = chunk
|
||||
} else {
|
||||
chunk = lastChunk
|
||||
if (*lastChunk).GetHash() == hash {
|
||||
chunk = *lastChunk
|
||||
} else {
|
||||
manager.config.PutChunk(*lastChunk)
|
||||
chunk = manager.chunkOperator.Download(hash, 0, false)
|
||||
*lastChunk = chunk
|
||||
}
|
||||
}
|
||||
|
||||
output(chunk.GetBytes()[start:end])
|
||||
@@ -1405,10 +1393,18 @@ func (manager *SnapshotManager) RetrieveFile(snapshot *Snapshot, file *Entry, ou
|
||||
|
||||
// FindFile returns the file entry that has the given file name.
|
||||
func (manager *SnapshotManager) FindFile(snapshot *Snapshot, filePath string, suppressError bool) *Entry {
|
||||
for _, entry := range snapshot.Files {
|
||||
|
||||
var found *Entry
|
||||
snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func (entry *Entry) bool {
|
||||
if entry.Path == filePath {
|
||||
return entry
|
||||
found = entry
|
||||
return false
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
if found != nil {
|
||||
return found
|
||||
}
|
||||
|
||||
if !suppressError {
|
||||
@@ -1440,13 +1436,8 @@ func (manager *SnapshotManager) PrintFile(snapshotID string, revision int, path
|
||||
return false
|
||||
}
|
||||
|
||||
patterns := []string{}
|
||||
if path != "" {
|
||||
patterns = []string{path}
|
||||
}
|
||||
|
||||
// If no path is specified, we're printing the snapshot so we need all attributes
|
||||
if !manager.DownloadSnapshotContents(snapshot, patterns, path == "") {
|
||||
// If no path is specified, we're printing the snapshot
|
||||
if !manager.DownloadSnapshotSequences(snapshot) {
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -1456,7 +1447,7 @@ func (manager *SnapshotManager) PrintFile(snapshotID string, revision int, path
|
||||
}
|
||||
|
||||
file := manager.FindFile(snapshot, path, false)
|
||||
if !manager.RetrieveFile(snapshot, file, func(chunk []byte) {
|
||||
if !manager.RetrieveFile(snapshot, file, nil, func(chunk []byte) {
|
||||
fmt.Printf("%s", chunk)
|
||||
}) {
|
||||
LOG_ERROR("SNAPSHOT_RETRIEVE", "File %s is corrupted in snapshot %s at revision %d",
|
||||
@@ -1474,22 +1465,38 @@ func (manager *SnapshotManager) Diff(top string, snapshotID string, revisions []
|
||||
LOG_DEBUG("DIFF_PARAMETERS", "top: %s, id: %s, revision: %v, path: %s, compareByHash: %t",
|
||||
top, snapshotID, revisions, filePath, compareByHash)
|
||||
|
||||
manager.CreateChunkOperator(false, 1, false)
|
||||
defer func() {
|
||||
manager.chunkOperator.Stop()
|
||||
manager.chunkOperator = nil
|
||||
} ()
|
||||
|
||||
var leftSnapshot *Snapshot
|
||||
var rightSnapshot *Snapshot
|
||||
var err error
|
||||
|
||||
leftSnapshotFiles := make([]*Entry, 0, 1024)
|
||||
rightSnapshotFiles := make([]*Entry, 0, 1024)
|
||||
|
||||
// If no or only one revision is specified, use the on-disk version for the right-hand side.
|
||||
if len(revisions) <= 1 {
|
||||
// Only scan the repository if filePath is not provided
|
||||
if len(filePath) == 0 {
|
||||
rightSnapshot, _, _, err = CreateSnapshotFromDirectory(snapshotID, top, nobackupFile, filtersFile, excludeByAttribute)
|
||||
if err != nil {
|
||||
LOG_ERROR("SNAPSHOT_LIST", "Failed to list the directory %s: %v", top, err)
|
||||
return false
|
||||
rightSnapshot = CreateEmptySnapshot(snapshotID)
|
||||
localListingChannel := make(chan *Entry)
|
||||
go func() {
|
||||
defer CatchLogException()
|
||||
rightSnapshot.ListLocalFiles(top, nobackupFile, filtersFile, excludeByAttribute, localListingChannel, nil, nil)
|
||||
} ()
|
||||
|
||||
for entry := range localListingChannel {
|
||||
entry.Attributes = nil // attributes are not compared
|
||||
rightSnapshotFiles = append(rightSnapshotFiles, entry)
|
||||
}
|
||||
|
||||
}
|
||||
} else {
|
||||
rightSnapshot = manager.DownloadSnapshot(snapshotID, revisions[1])
|
||||
manager.DownloadSnapshotSequences(rightSnapshot)
|
||||
}
|
||||
|
||||
// If no revision is specified, use the latest revision as the left-hand side.
|
||||
@@ -1503,15 +1510,11 @@ func (manager *SnapshotManager) Diff(top string, snapshotID string, revisions []
|
||||
leftSnapshot = manager.DownloadSnapshot(snapshotID, revisions[0])
|
||||
}
|
||||
|
||||
manager.DownloadSnapshotSequences(leftSnapshot)
|
||||
if len(filePath) > 0 {
|
||||
|
||||
manager.DownloadSnapshotContents(leftSnapshot, nil, false)
|
||||
if rightSnapshot != nil && rightSnapshot.Revision != 0 {
|
||||
manager.DownloadSnapshotContents(rightSnapshot, nil, false)
|
||||
}
|
||||
|
||||
var leftFile []byte
|
||||
if !manager.RetrieveFile(leftSnapshot, manager.FindFile(leftSnapshot, filePath, false), func(content []byte) {
|
||||
if !manager.RetrieveFile(leftSnapshot, manager.FindFile(leftSnapshot, filePath, false), nil, func(content []byte) {
|
||||
leftFile = append(leftFile, content...)
|
||||
}) {
|
||||
LOG_ERROR("SNAPSHOT_DIFF", "File %s is corrupted in snapshot %s at revision %d",
|
||||
@@ -1521,7 +1524,7 @@ func (manager *SnapshotManager) Diff(top string, snapshotID string, revisions []
|
||||
|
||||
var rightFile []byte
|
||||
if rightSnapshot != nil {
|
||||
if !manager.RetrieveFile(rightSnapshot, manager.FindFile(rightSnapshot, filePath, false), func(content []byte) {
|
||||
if !manager.RetrieveFile(rightSnapshot, manager.FindFile(rightSnapshot, filePath, false), nil, func(content []byte) {
|
||||
rightFile = append(rightFile, content...)
|
||||
}) {
|
||||
LOG_ERROR("SNAPSHOT_DIFF", "File %s is corrupted in snapshot %s at revision %d",
|
||||
@@ -1582,24 +1585,32 @@ func (manager *SnapshotManager) Diff(top string, snapshotID string, revisions []
|
||||
return true
|
||||
}
|
||||
|
||||
// We only need to decode the 'files' sequence, not 'chunkhashes' or 'chunklengthes'
|
||||
manager.DownloadSnapshotFileSequence(leftSnapshot, nil, false)
|
||||
if rightSnapshot != nil && rightSnapshot.Revision != 0 {
|
||||
manager.DownloadSnapshotFileSequence(rightSnapshot, nil, false)
|
||||
leftSnapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func(entry *Entry) bool {
|
||||
entry.Attributes = nil
|
||||
leftSnapshotFiles = append(leftSnapshotFiles, entry)
|
||||
return true
|
||||
})
|
||||
|
||||
if rightSnapshot.Revision != 0 {
|
||||
rightSnapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func(entry *Entry) bool {
|
||||
entry.Attributes = nil
|
||||
rightSnapshotFiles = append(rightSnapshotFiles, entry)
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
maxSize := int64(9)
|
||||
maxSizeDigits := 1
|
||||
|
||||
// Find the max Size value in order for pretty alignment.
|
||||
for _, file := range leftSnapshot.Files {
|
||||
for _, file := range leftSnapshotFiles {
|
||||
for !file.IsDir() && file.Size > maxSize {
|
||||
maxSize = maxSize*10 + 9
|
||||
maxSizeDigits += 1
|
||||
}
|
||||
}
|
||||
|
||||
for _, file := range rightSnapshot.Files {
|
||||
for _, file := range rightSnapshotFiles {
|
||||
for !file.IsDir() && file.Size > maxSize {
|
||||
maxSize = maxSize*10 + 9
|
||||
maxSizeDigits += 1
|
||||
@@ -1609,22 +1620,22 @@ func (manager *SnapshotManager) Diff(top string, snapshotID string, revisions []
|
||||
buffer := make([]byte, 32*1024)
|
||||
|
||||
var i, j int
|
||||
for i < len(leftSnapshot.Files) || j < len(rightSnapshot.Files) {
|
||||
for i < len(leftSnapshotFiles) || j < len(rightSnapshotFiles) {
|
||||
|
||||
if i >= len(leftSnapshot.Files) {
|
||||
if rightSnapshot.Files[j].IsFile() {
|
||||
LOG_INFO("SNAPSHOT_DIFF", "+ %s", rightSnapshot.Files[j].String(maxSizeDigits))
|
||||
if i >= len(leftSnapshotFiles) {
|
||||
if rightSnapshotFiles[j].IsFile() {
|
||||
LOG_INFO("SNAPSHOT_DIFF", "+ %s", rightSnapshotFiles[j].String(maxSizeDigits))
|
||||
}
|
||||
j++
|
||||
} else if j >= len(rightSnapshot.Files) {
|
||||
if leftSnapshot.Files[i].IsFile() {
|
||||
LOG_INFO("SNAPSHOT_DIFF", "- %s", leftSnapshot.Files[i].String(maxSizeDigits))
|
||||
} else if j >= len(rightSnapshotFiles) {
|
||||
if leftSnapshotFiles[i].IsFile() {
|
||||
LOG_INFO("SNAPSHOT_DIFF", "- %s", leftSnapshotFiles[i].String(maxSizeDigits))
|
||||
}
|
||||
i++
|
||||
} else {
|
||||
|
||||
left := leftSnapshot.Files[i]
|
||||
right := rightSnapshot.Files[j]
|
||||
left := leftSnapshotFiles[i]
|
||||
right := rightSnapshotFiles[j]
|
||||
|
||||
if !left.IsFile() {
|
||||
i++
|
||||
@@ -1679,6 +1690,12 @@ func (manager *SnapshotManager) ShowHistory(top string, snapshotID string, revis
|
||||
LOG_DEBUG("HISTORY_PARAMETERS", "top: %s, id: %s, revisions: %v, path: %s, showLocalHash: %t",
|
||||
top, snapshotID, revisions, filePath, showLocalHash)
|
||||
|
||||
manager.CreateChunkOperator(false, 1, false)
|
||||
defer func() {
|
||||
manager.chunkOperator.Stop()
|
||||
manager.chunkOperator = nil
|
||||
} ()
|
||||
|
||||
var err error
|
||||
|
||||
if len(revisions) == 0 {
|
||||
@@ -1693,7 +1710,7 @@ func (manager *SnapshotManager) ShowHistory(top string, snapshotID string, revis
|
||||
sort.Ints(revisions)
|
||||
for _, revision := range revisions {
|
||||
snapshot := manager.DownloadSnapshot(snapshotID, revision)
|
||||
manager.DownloadSnapshotFileSequence(snapshot, nil, false)
|
||||
manager.DownloadSnapshotSequences(snapshot)
|
||||
file := manager.FindFile(snapshot, filePath, true)
|
||||
|
||||
if file != nil {
|
||||
@@ -1801,8 +1818,11 @@ func (manager *SnapshotManager) PruneSnapshots(selfID string, snapshotID string,
|
||||
LOG_WARN("DELETE_OPTIONS", "Tags or retention policy will be ignored if at least one revision is specified")
|
||||
}
|
||||
|
||||
manager.chunkOperator = CreateChunkOperator(manager.storage, threads)
|
||||
defer manager.chunkOperator.Stop()
|
||||
manager.CreateChunkOperator(false, threads, false)
|
||||
defer func() {
|
||||
manager.chunkOperator.Stop()
|
||||
manager.chunkOperator = nil
|
||||
} ()
|
||||
|
||||
prefPath := GetDuplicacyPreferencePath()
|
||||
logDir := path.Join(prefPath, "logs")
|
||||
@@ -2184,7 +2204,7 @@ func (manager *SnapshotManager) PruneSnapshots(selfID string, snapshotID string,
|
||||
return false
|
||||
}
|
||||
|
||||
manager.chunkOperator.Stop()
|
||||
manager.chunkOperator.WaitForCompletion()
|
||||
for _, fossil := range manager.chunkOperator.fossils {
|
||||
collection.AddFossil(fossil)
|
||||
}
|
||||
@@ -2265,6 +2285,7 @@ func (manager *SnapshotManager) PruneSnapshots(selfID string, snapshotID string,
|
||||
} else {
|
||||
manager.CleanSnapshotCache(nil, allSnapshots)
|
||||
}
|
||||
manager.chunkOperator.WaitForCompletion()
|
||||
|
||||
return true
|
||||
}
|
||||
@@ -2477,8 +2498,6 @@ func (manager *SnapshotManager) pruneSnapshotsExhaustive(referencedFossils map[s
|
||||
// CheckSnapshot performs sanity checks on the given snapshot.
|
||||
func (manager *SnapshotManager) CheckSnapshot(snapshot *Snapshot) (err error) {
|
||||
|
||||
lastChunk := 0
|
||||
lastOffset := 0
|
||||
var lastEntry *Entry
|
||||
|
||||
numberOfChunks := len(snapshot.ChunkHashes)
|
||||
@@ -2488,57 +2507,39 @@ func (manager *SnapshotManager) CheckSnapshot(snapshot *Snapshot) (err error) {
|
||||
numberOfChunks, len(snapshot.ChunkLengths))
|
||||
}
|
||||
|
||||
entries := make([]*Entry, len(snapshot.Files))
|
||||
copy(entries, snapshot.Files)
|
||||
sort.Sort(ByChunk(entries))
|
||||
snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func (entry *Entry) bool {
|
||||
|
||||
for _, entry := range snapshot.Files {
|
||||
if lastEntry != nil && lastEntry.Compare(entry) >= 0 && !strings.Contains(lastEntry.Path, "\ufffd") {
|
||||
return fmt.Errorf("The entry %s appears before the entry %s", lastEntry.Path, entry.Path)
|
||||
err = fmt.Errorf("The entry %s appears before the entry %s", lastEntry.Path, entry.Path)
|
||||
return false
|
||||
}
|
||||
lastEntry = entry
|
||||
}
|
||||
|
||||
for _, entry := range entries {
|
||||
|
||||
if !entry.IsFile() || entry.Size == 0 {
|
||||
continue
|
||||
return true
|
||||
}
|
||||
|
||||
if entry.StartChunk < 0 {
|
||||
return fmt.Errorf("The file %s starts at chunk %d", entry.Path, entry.StartChunk)
|
||||
err = fmt.Errorf("The file %s starts at chunk %d", entry.Path, entry.StartChunk)
|
||||
return false
|
||||
}
|
||||
|
||||
if entry.EndChunk >= numberOfChunks {
|
||||
return fmt.Errorf("The file %s ends at chunk %d while the number of chunks is %d",
|
||||
err = fmt.Errorf("The file %s ends at chunk %d while the number of chunks is %d",
|
||||
entry.Path, entry.EndChunk, numberOfChunks)
|
||||
return false
|
||||
}
|
||||
|
||||
if entry.EndChunk < entry.StartChunk {
|
||||
return fmt.Errorf("The file %s starts at chunk %d and ends at chunk %d",
|
||||
fmt.Errorf("The file %s starts at chunk %d and ends at chunk %d",
|
||||
entry.Path, entry.StartChunk, entry.EndChunk)
|
||||
return false
|
||||
}
|
||||
|
||||
if entry.StartOffset > 0 {
|
||||
if entry.StartChunk < lastChunk {
|
||||
return fmt.Errorf("The file %s starts at chunk %d while the last chunk is %d",
|
||||
entry.Path, entry.StartChunk, lastChunk)
|
||||
}
|
||||
|
||||
if entry.StartChunk > lastChunk+1 {
|
||||
return fmt.Errorf("The file %s starts at chunk %d while the last chunk is %d",
|
||||
entry.Path, entry.StartChunk, lastChunk)
|
||||
}
|
||||
|
||||
if entry.StartChunk == lastChunk && entry.StartOffset < lastOffset {
|
||||
return fmt.Errorf("The file %s starts at offset %d of chunk %d while the last file ends at offset %d",
|
||||
entry.Path, entry.StartOffset, entry.StartChunk, lastOffset)
|
||||
}
|
||||
|
||||
if entry.StartChunk == entry.EndChunk && entry.StartOffset > entry.EndOffset {
|
||||
return fmt.Errorf("The file %s starts at offset %d and ends at offset %d of the same chunk %d",
|
||||
entry.Path, entry.StartOffset, entry.EndOffset, entry.StartChunk)
|
||||
}
|
||||
if entry.StartChunk == entry.EndChunk && entry.StartOffset > entry.EndOffset {
|
||||
err = fmt.Errorf("The file %s starts at offset %d and ends at offset %d of the same chunk %d",
|
||||
entry.Path, entry.StartOffset, entry.EndOffset, entry.StartChunk)
|
||||
return false
|
||||
}
|
||||
|
||||
fileSize := int64(0)
|
||||
@@ -2558,22 +2559,13 @@ func (manager *SnapshotManager) CheckSnapshot(snapshot *Snapshot) (err error) {
|
||||
}
|
||||
|
||||
if entry.Size != fileSize {
|
||||
return fmt.Errorf("The file %s has a size of %d but the total size of chunks is %d",
|
||||
err = fmt.Errorf("The file %s has a size of %d but the total size of chunks is %d",
|
||||
entry.Path, entry.Size, fileSize)
|
||||
return false
|
||||
}
|
||||
|
||||
lastChunk = entry.EndChunk
|
||||
lastOffset = entry.EndOffset
|
||||
}
|
||||
|
||||
if len(entries) > 0 && entries[0].StartChunk != 0 {
|
||||
return fmt.Errorf("The first file starts at chunk %d", entries[0].StartChunk)
|
||||
}
|
||||
|
||||
// There may be a last chunk whose size is 0 so we allow this to happen
|
||||
if lastChunk < numberOfChunks-2 {
|
||||
return fmt.Errorf("The last file ends at chunk %d but the number of chunks is %d", lastChunk, numberOfChunks)
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -116,19 +116,18 @@ func createTestSnapshotManager(testDir string) *SnapshotManager {
|
||||
|
||||
func uploadTestChunk(manager *SnapshotManager, content []byte) string {
|
||||
|
||||
completionFunc := func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) {
|
||||
chunkOperator := CreateChunkOperator(manager.config, manager.storage, nil, false, testThreads, false)
|
||||
chunkOperator.UploadCompletionFunc = func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) {
|
||||
LOG_INFO("UPLOAD_CHUNK", "Chunk %s size %d uploaded", chunk.GetID(), chunkSize)
|
||||
}
|
||||
|
||||
chunkUploader := CreateChunkUploader(manager.config, manager.storage, nil, *testThreads, nil)
|
||||
chunkUploader.completionFunc = completionFunc
|
||||
chunkUploader.Start()
|
||||
|
||||
chunk := CreateChunk(manager.config, true)
|
||||
chunk.Reset(true)
|
||||
chunk.Write(content)
|
||||
chunkUploader.StartChunk(chunk, 0)
|
||||
chunkUploader.Stop()
|
||||
|
||||
chunkOperator.Upload(chunk, 0, false)
|
||||
chunkOperator.WaitForCompletion()
|
||||
chunkOperator.Stop()
|
||||
|
||||
return chunk.GetHash()
|
||||
}
|
||||
@@ -180,6 +179,12 @@ func createTestSnapshot(manager *SnapshotManager, snapshotID string, revision in
|
||||
|
||||
func checkTestSnapshots(manager *SnapshotManager, expectedSnapshots int, expectedFossils int) {
|
||||
|
||||
manager.CreateChunkOperator(false, 1, false)
|
||||
defer func() {
|
||||
manager.chunkOperator.Stop()
|
||||
manager.chunkOperator = nil
|
||||
}()
|
||||
|
||||
var snapshotIDs []string
|
||||
var err error
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
"runtime"
|
||||
"runtime"
|
||||
|
||||
"github.com/gilbertchen/gopass"
|
||||
"golang.org/x/crypto/pbkdf2"
|
||||
@@ -473,4 +473,4 @@ func PrintMemoryUsage() {
|
||||
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -52,11 +52,11 @@ func (entry *Entry) ReadAttributes(top string) {
|
||||
fullPath := filepath.Join(top, entry.Path)
|
||||
attributes, _ := xattr.List(fullPath)
|
||||
if len(attributes) > 0 {
|
||||
entry.Attributes = make(map[string][]byte)
|
||||
entry.Attributes = &map[string][]byte{}
|
||||
for _, name := range attributes {
|
||||
attribute, err := xattr.Get(fullPath, name)
|
||||
if err == nil {
|
||||
entry.Attributes[name] = attribute
|
||||
(*entry.Attributes)[name] = attribute
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -68,19 +68,19 @@ func (entry *Entry) SetAttributesToFile(fullPath string) {
|
||||
for _, name := range names {
|
||||
|
||||
|
||||
newAttribute, found := entry.Attributes[name]
|
||||
newAttribute, found := (*entry.Attributes)[name]
|
||||
if found {
|
||||
oldAttribute, _ := xattr.Get(fullPath, name)
|
||||
if !bytes.Equal(oldAttribute, newAttribute) {
|
||||
xattr.Set(fullPath, name, newAttribute)
|
||||
}
|
||||
delete(entry.Attributes, name)
|
||||
delete(*entry.Attributes, name)
|
||||
} else {
|
||||
xattr.Remove(fullPath, name)
|
||||
}
|
||||
}
|
||||
|
||||
for name, attribute := range entry.Attributes {
|
||||
for name, attribute := range *entry.Attributes {
|
||||
xattr.Set(fullPath, name, attribute)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user