mirror of
https://github.com/gilbertchen/duplicacy
synced 2025-12-06 00:03:38 +00:00
Rewrite the backup procedure to reduce memory usage
Main changes: * Change the listing order of files/directories so that the local and remote snapshots can be compared on-the-fly. * Introduce a new struct called EntryList that maintains a list of files/directories, which are kept in memory when the number is lower, and serialized into a file when there are too many. * EntryList can also be turned into an on-disk incomplete snapshot quickly, to support fast-resume on next run. * ChunkOperator can now download and upload chunks, thus replacing original ChunkDownloader and ChunkUploader. The new ChunkDownloader is only used to prefetch chunks during the restore operation.
This commit is contained in:
@@ -147,6 +147,10 @@ func setGlobalOptions(context *cli.Context) {
|
||||
duplicacy.SetLoggingLevel(duplicacy.DEBUG)
|
||||
}
|
||||
|
||||
if context.GlobalBool("print-memory-usage") {
|
||||
go duplicacy.PrintMemoryUsage()
|
||||
}
|
||||
|
||||
ScriptEnabled = true
|
||||
if context.GlobalBool("no-script") {
|
||||
ScriptEnabled = false
|
||||
@@ -781,7 +785,10 @@ func backupRepository(context *cli.Context) {
|
||||
|
||||
backupManager.SetupSnapshotCache(preference.Name)
|
||||
backupManager.SetDryRun(dryRun)
|
||||
backupManager.Backup(repository, quickMode, threads, context.String("t"), showStatistics, enableVSS, vssTimeout, enumOnly)
|
||||
|
||||
metadataChunkSize := context.Int("metadata-chunk-size")
|
||||
maximumInMemoryEntries := context.Int("max-in-memory-entries")
|
||||
backupManager.Backup(repository, quickMode, threads, context.String("t"), showStatistics, enableVSS, vssTimeout, enumOnly, metadataChunkSize, maximumInMemoryEntries)
|
||||
|
||||
runScript(context, preference.Name, "post")
|
||||
}
|
||||
@@ -1506,6 +1513,19 @@ func main() {
|
||||
Name: "enum-only",
|
||||
Usage: "enumerate the repository recursively and then exit",
|
||||
},
|
||||
cli.IntFlag{
|
||||
Name: "metadata-chunk-size",
|
||||
Value: 1024 * 1024,
|
||||
Usage: "the average size of metadata chunks (defaults to 1M)",
|
||||
Argument: "<size>",
|
||||
},
|
||||
cli.IntFlag{
|
||||
Name: "max-in-memory-entries",
|
||||
Value: 1024 * 1024,
|
||||
Usage: "the maximum number of entries kept in memory (defaults to 1M)",
|
||||
Argument: "<number>",
|
||||
},
|
||||
|
||||
},
|
||||
Usage: "Save a snapshot of the repository to the storage",
|
||||
ArgsUsage: " ",
|
||||
@@ -2180,6 +2200,10 @@ func main() {
|
||||
Usage: "suppress logs with the specified id",
|
||||
Argument: "<id>",
|
||||
},
|
||||
cli.BoolFlag{
|
||||
Name: "print-memory-usage",
|
||||
Usage: "print memory usage every second",
|
||||
},
|
||||
}
|
||||
|
||||
app.HideVersion = true
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -257,7 +257,7 @@ func TestBackupManager(t *testing.T) {
|
||||
backupManager.SetupSnapshotCache("default")
|
||||
|
||||
SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy")
|
||||
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false)
|
||||
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false, 1024, 1024)
|
||||
time.Sleep(time.Duration(delay) * time.Second)
|
||||
SetDuplicacyPreferencePath(testDir + "/repository2/.duplicacy")
|
||||
failedFiles := backupManager.Restore(testDir+"/repository2", threads /*inPlace=*/, false /*quickMode=*/, false, threads /*overwrite=*/, true,
|
||||
@@ -282,7 +282,7 @@ func TestBackupManager(t *testing.T) {
|
||||
modifyFile(testDir+"/repository1/dir1/file3", 0.3)
|
||||
|
||||
SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy")
|
||||
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "second", false, false, 0, false)
|
||||
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "second", false, false, 0, false, 1024, 1024)
|
||||
time.Sleep(time.Duration(delay) * time.Second)
|
||||
SetDuplicacyPreferencePath(testDir + "/repository2/.duplicacy")
|
||||
failedFiles = backupManager.Restore(testDir+"/repository2", 2 /*inPlace=*/, true /*quickMode=*/, true, threads /*overwrite=*/, true,
|
||||
@@ -303,7 +303,7 @@ func TestBackupManager(t *testing.T) {
|
||||
os.Mkdir(testDir+"/repository1/dir2/dir3", 0700)
|
||||
os.Mkdir(testDir+"/repository1/dir4", 0700)
|
||||
SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy")
|
||||
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, false, threads, "third", false, false, 0, false)
|
||||
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, false, threads, "third", false, false, 0, false, 1024, 1024)
|
||||
time.Sleep(time.Duration(delay) * time.Second)
|
||||
|
||||
// Create some directories and files under repository2 that will be deleted during restore
|
||||
@@ -368,7 +368,7 @@ func TestBackupManager(t *testing.T) {
|
||||
}
|
||||
backupManager.SnapshotManager.CheckSnapshots( /*snapshotID*/ "host1" /*revisions*/, []int{2, 3} /*tag*/, "",
|
||||
/*showStatistics*/ false /*showTabular*/, false /*checkFiles*/, false /*checkChunks*/, false /*searchFossils*/, false /*resurrect*/, false, 1 /*allowFailures*/, false)
|
||||
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, false, threads, "fourth", false, false, 0, false)
|
||||
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, false, threads, "fourth", false, false, 0, false, 1024, 1024)
|
||||
backupManager.SnapshotManager.PruneSnapshots("host1", "host1" /*revisions*/, nil /*tags*/, nil /*retentions*/, nil,
|
||||
/*exhaustive*/ false /*exclusive=*/, true /*ignoredIDs*/, nil /*dryRun*/, false /*deleteOnly*/, false /*collectOnly*/, false, 1)
|
||||
numberOfSnapshots = backupManager.SnapshotManager.ListSnapshots( /*snapshotID*/ "host1" /*revisionsToList*/, nil /*tag*/, "" /*showFiles*/, false /*showChunks*/, false)
|
||||
@@ -533,7 +533,7 @@ func TestPersistRestore(t *testing.T) {
|
||||
unencBackupManager.SetupSnapshotCache("default")
|
||||
|
||||
SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy")
|
||||
unencBackupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false)
|
||||
unencBackupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false, 1024, 1024)
|
||||
time.Sleep(time.Duration(delay) * time.Second)
|
||||
|
||||
|
||||
@@ -543,7 +543,7 @@ func TestPersistRestore(t *testing.T) {
|
||||
encBackupManager.SetupSnapshotCache("default")
|
||||
|
||||
SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy")
|
||||
encBackupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false)
|
||||
encBackupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false, 1024, 1024)
|
||||
time.Sleep(time.Duration(delay) * time.Second)
|
||||
|
||||
|
||||
|
||||
@@ -29,29 +29,29 @@ func benchmarkSplit(reader *bytes.Reader, fileSize int64, chunkSize int, compres
|
||||
config.HashKey = DEFAULT_KEY
|
||||
config.IDKey = DEFAULT_KEY
|
||||
|
||||
maker := CreateChunkMaker(config, false)
|
||||
maker := CreateFileChunkMaker(config, false)
|
||||
|
||||
startTime := float64(time.Now().UnixNano()) / 1e9
|
||||
numberOfChunks := 0
|
||||
reader.Seek(0, os.SEEK_SET)
|
||||
maker.ForEachChunk(reader,
|
||||
func(chunk *Chunk, final bool) {
|
||||
if compression {
|
||||
key := ""
|
||||
if encryption {
|
||||
key = "0123456789abcdef0123456789abcdef"
|
||||
}
|
||||
err := chunk.Encrypt([]byte(key), "", false)
|
||||
if err != nil {
|
||||
LOG_ERROR("BENCHMARK_ENCRYPT", "Failed to encrypt the chunk: %v", err)
|
||||
}
|
||||
|
||||
chunkFunc := func(chunk *Chunk) {
|
||||
if compression {
|
||||
key := ""
|
||||
if encryption {
|
||||
key = "0123456789abcdef0123456789abcdef"
|
||||
}
|
||||
config.PutChunk(chunk)
|
||||
numberOfChunks++
|
||||
},
|
||||
func(size int64, hash string) (io.Reader, bool) {
|
||||
return nil, false
|
||||
})
|
||||
err := chunk.Encrypt([]byte(key), "", false)
|
||||
if err != nil {
|
||||
LOG_ERROR("BENCHMARK_ENCRYPT", "Failed to encrypt the chunk: %v", err)
|
||||
}
|
||||
}
|
||||
config.PutChunk(chunk)
|
||||
numberOfChunks++
|
||||
}
|
||||
|
||||
maker.AddData(reader, chunkFunc)
|
||||
maker.AddData(nil, chunkFunc)
|
||||
|
||||
runningTime := float64(time.Now().UnixNano())/1e9 - startTime
|
||||
speed := int64(float64(fileSize) / runningTime)
|
||||
|
||||
@@ -65,8 +65,8 @@ type Chunk struct {
|
||||
config *Config // Every chunk is associated with a Config object. Which hashing algorithm to use is determined
|
||||
// by the config
|
||||
|
||||
isSnapshot bool // Indicates if the chunk is a snapshot chunk (instead of a file chunk). This is only used by RSA
|
||||
// encryption, where a snapshot chunk is not encrypted by RSA
|
||||
isMetadata bool // Indicates if the chunk is a metadata chunk (instead of a file chunk). This is primarily used by RSA
|
||||
// encryption, where a metadata chunk is not encrypted by RSA
|
||||
|
||||
isBroken bool // Indicates the chunk did not download correctly. This is only used for -persist (allowFailures) mode
|
||||
}
|
||||
@@ -127,7 +127,7 @@ func (chunk *Chunk) Reset(hashNeeded bool) {
|
||||
chunk.hash = nil
|
||||
chunk.id = ""
|
||||
chunk.size = 0
|
||||
chunk.isSnapshot = false
|
||||
chunk.isMetadata = false
|
||||
chunk.isBroken = false
|
||||
}
|
||||
|
||||
@@ -186,7 +186,7 @@ func (chunk *Chunk) VerifyID() {
|
||||
|
||||
// Encrypt encrypts the plain data stored in the chunk buffer. If derivationKey is not nil, the actual
|
||||
// encryption key will be HMAC-SHA256(encryptionKey, derivationKey).
|
||||
func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isSnapshot bool) (err error) {
|
||||
func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isMetadata bool) (err error) {
|
||||
|
||||
var aesBlock cipher.Block
|
||||
var gcm cipher.AEAD
|
||||
@@ -203,8 +203,8 @@ func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isSnapsh
|
||||
|
||||
key := encryptionKey
|
||||
usingRSA := false
|
||||
// Enable RSA encryption only when the chunk is not a snapshot chunk
|
||||
if chunk.config.rsaPublicKey != nil && !isSnapshot && !chunk.isSnapshot {
|
||||
// Enable RSA encryption only when the chunk is not a metadata chunk
|
||||
if chunk.config.rsaPublicKey != nil && !isMetadata && !chunk.isMetadata {
|
||||
randomKey := make([]byte, 32)
|
||||
_, err := rand.Read(randomKey)
|
||||
if err != nil {
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
package duplicacy
|
||||
|
||||
import (
|
||||
"io"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
)
|
||||
@@ -20,78 +19,47 @@ type ChunkDownloadTask struct {
|
||||
isDownloading bool // 'true' means the chunk has been downloaded or is being downloaded
|
||||
}
|
||||
|
||||
// ChunkDownloadCompletion represents the nofication when a chunk has been downloaded.
|
||||
type ChunkDownloadCompletion struct {
|
||||
chunkIndex int // The index of this chunk in the chunk list
|
||||
chunk *Chunk // The chunk that has been downloaded
|
||||
chunk *Chunk
|
||||
chunkIndex int
|
||||
}
|
||||
|
||||
// ChunkDownloader is capable of performing multi-threaded downloading. Chunks to be downloaded are first organized
|
||||
// ChunkDownloader is a wrapper of ChunkOperator and is only used by the restore procedure.capable of performing multi-threaded downloading. Chunks to be downloaded are first organized
|
||||
// as a list of ChunkDownloadTasks, with only the chunkHash field initialized. When a chunk is needed, the
|
||||
// corresponding ChunkDownloadTask is sent to the dowloading goroutine. Once a chunk is downloaded, it will be
|
||||
// inserted in the completed task list.
|
||||
type ChunkDownloader struct {
|
||||
|
||||
operator *ChunkOperator
|
||||
|
||||
totalChunkSize int64 // Total chunk size
|
||||
downloadedChunkSize int64 // Downloaded chunk size
|
||||
|
||||
config *Config // Associated config
|
||||
storage Storage // Download from this storage
|
||||
snapshotCache *FileStorage // Used as cache if not nil; usually for downloading snapshot chunks
|
||||
showStatistics bool // Show a stats log for each chunk if true
|
||||
threads int // Number of threads
|
||||
allowFailures bool // Whether to failfast on download error, or continue
|
||||
|
||||
taskList []ChunkDownloadTask // The list of chunks to be downloaded
|
||||
completedTasks map[int]bool // Store downloaded chunks
|
||||
lastChunkIndex int // a monotonically increasing number indicating the last chunk to be downloaded
|
||||
|
||||
taskQueue chan ChunkDownloadTask // Downloading goroutines are waiting on this channel for input
|
||||
stopChannel chan bool // Used to stop the dowloading goroutines
|
||||
completionChannel chan ChunkDownloadCompletion // A downloading goroutine sends back the chunk via this channel after downloading
|
||||
|
||||
startTime int64 // The time it starts downloading
|
||||
numberOfDownloadedChunks int // The number of chunks that have been downloaded
|
||||
numberOfDownloadingChunks int // The number of chunks still being downloaded
|
||||
numberOfActiveChunks int // The number of chunks that is being downloaded or has been downloaded but not reclaimed
|
||||
|
||||
NumberOfFailedChunks int // The number of chunks that can't be downloaded
|
||||
}
|
||||
|
||||
func CreateChunkDownloader(config *Config, storage Storage, snapshotCache *FileStorage, showStatistics bool, threads int, allowFailures bool) *ChunkDownloader {
|
||||
func CreateChunkDownloader(operator *ChunkOperator) *ChunkDownloader {
|
||||
downloader := &ChunkDownloader{
|
||||
config: config,
|
||||
storage: storage,
|
||||
snapshotCache: snapshotCache,
|
||||
showStatistics: showStatistics,
|
||||
threads: threads,
|
||||
allowFailures: allowFailures,
|
||||
operator: operator,
|
||||
|
||||
taskList: nil,
|
||||
completedTasks: make(map[int]bool),
|
||||
lastChunkIndex: 0,
|
||||
|
||||
taskQueue: make(chan ChunkDownloadTask, threads),
|
||||
stopChannel: make(chan bool),
|
||||
completionChannel: make(chan ChunkDownloadCompletion),
|
||||
|
||||
startTime: time.Now().Unix(),
|
||||
}
|
||||
|
||||
// Start the downloading goroutines
|
||||
for i := 0; i < downloader.threads; i++ {
|
||||
go func(threadIndex int) {
|
||||
defer CatchLogException()
|
||||
for {
|
||||
select {
|
||||
case task := <-downloader.taskQueue:
|
||||
downloader.Download(threadIndex, task)
|
||||
case <-downloader.stopChannel:
|
||||
return
|
||||
}
|
||||
}
|
||||
}(i)
|
||||
}
|
||||
|
||||
return downloader
|
||||
}
|
||||
|
||||
@@ -129,26 +97,6 @@ func (downloader *ChunkDownloader) AddFiles(snapshot *Snapshot, files []*Entry)
|
||||
}
|
||||
}
|
||||
|
||||
// AddChunk adds a single chunk the download list.
|
||||
func (downloader *ChunkDownloader) AddChunk(chunkHash string) int {
|
||||
|
||||
task := ChunkDownloadTask{
|
||||
chunkIndex: len(downloader.taskList),
|
||||
chunkHash: chunkHash,
|
||||
chunkLength: 0,
|
||||
needed: true,
|
||||
isDownloading: false,
|
||||
}
|
||||
downloader.taskList = append(downloader.taskList, task)
|
||||
if downloader.numberOfActiveChunks < downloader.threads {
|
||||
downloader.taskQueue <- task
|
||||
downloader.numberOfDownloadingChunks++
|
||||
downloader.numberOfActiveChunks++
|
||||
downloader.taskList[len(downloader.taskList)-1].isDownloading = true
|
||||
}
|
||||
return len(downloader.taskList) - 1
|
||||
}
|
||||
|
||||
// Prefetch adds up to 'threads' chunks needed by a file to the download list
|
||||
func (downloader *ChunkDownloader) Prefetch(file *Entry) {
|
||||
|
||||
@@ -159,20 +107,22 @@ func (downloader *ChunkDownloader) Prefetch(file *Entry) {
|
||||
task := &downloader.taskList[i]
|
||||
if task.needed {
|
||||
if !task.isDownloading {
|
||||
if downloader.numberOfActiveChunks >= downloader.threads {
|
||||
if downloader.numberOfActiveChunks >= downloader.operator.threads {
|
||||
return
|
||||
}
|
||||
|
||||
LOG_DEBUG("DOWNLOAD_PREFETCH", "Prefetching %s chunk %s", file.Path,
|
||||
downloader.config.GetChunkIDFromHash(task.chunkHash))
|
||||
downloader.taskQueue <- *task
|
||||
downloader.operator.config.GetChunkIDFromHash(task.chunkHash))
|
||||
downloader.operator.DownloadAsync(task.chunkHash, i, false, func (chunk *Chunk, chunkIndex int) {
|
||||
downloader.completionChannel <- ChunkDownloadCompletion { chunk: chunk, chunkIndex: chunkIndex }
|
||||
})
|
||||
task.isDownloading = true
|
||||
downloader.numberOfDownloadingChunks++
|
||||
downloader.numberOfActiveChunks++
|
||||
}
|
||||
} else {
|
||||
LOG_DEBUG("DOWNLOAD_PREFETCH", "%s chunk %s is not needed", file.Path,
|
||||
downloader.config.GetChunkIDFromHash(task.chunkHash))
|
||||
downloader.operator.config.GetChunkIDFromHash(task.chunkHash))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -186,7 +136,7 @@ func (downloader *ChunkDownloader) Reclaim(chunkIndex int) {
|
||||
|
||||
for i := range downloader.completedTasks {
|
||||
if i < chunkIndex && downloader.taskList[i].chunk != nil {
|
||||
downloader.config.PutChunk(downloader.taskList[i].chunk)
|
||||
downloader.operator.config.PutChunk(downloader.taskList[i].chunk)
|
||||
downloader.taskList[i].chunk = nil
|
||||
delete(downloader.completedTasks, i)
|
||||
downloader.numberOfActiveChunks--
|
||||
@@ -222,8 +172,10 @@ func (downloader *ChunkDownloader) WaitForChunk(chunkIndex int) (chunk *Chunk) {
|
||||
// If we haven't started download the specified chunk, download it now
|
||||
if !downloader.taskList[chunkIndex].isDownloading {
|
||||
LOG_DEBUG("DOWNLOAD_FETCH", "Fetching chunk %s",
|
||||
downloader.config.GetChunkIDFromHash(downloader.taskList[chunkIndex].chunkHash))
|
||||
downloader.taskQueue <- downloader.taskList[chunkIndex]
|
||||
downloader.operator.config.GetChunkIDFromHash(downloader.taskList[chunkIndex].chunkHash))
|
||||
downloader.operator.DownloadAsync(downloader.taskList[chunkIndex].chunkHash, chunkIndex, false, func (chunk *Chunk, chunkIndex int) {
|
||||
downloader.completionChannel <- ChunkDownloadCompletion { chunk: chunk, chunkIndex: chunkIndex }
|
||||
})
|
||||
downloader.taskList[chunkIndex].isDownloading = true
|
||||
downloader.numberOfDownloadingChunks++
|
||||
downloader.numberOfActiveChunks++
|
||||
@@ -231,7 +183,7 @@ func (downloader *ChunkDownloader) WaitForChunk(chunkIndex int) (chunk *Chunk) {
|
||||
|
||||
// We also need to look ahead and prefetch other chunks as many as permitted by the number of threads
|
||||
for i := chunkIndex + 1; i < len(downloader.taskList); i++ {
|
||||
if downloader.numberOfActiveChunks >= downloader.threads {
|
||||
if downloader.numberOfActiveChunks >= downloader.operator.threads {
|
||||
break
|
||||
}
|
||||
task := &downloader.taskList[i]
|
||||
@@ -240,8 +192,10 @@ func (downloader *ChunkDownloader) WaitForChunk(chunkIndex int) (chunk *Chunk) {
|
||||
}
|
||||
|
||||
if !task.isDownloading {
|
||||
LOG_DEBUG("DOWNLOAD_PREFETCH", "Prefetching chunk %s", downloader.config.GetChunkIDFromHash(task.chunkHash))
|
||||
downloader.taskQueue <- *task
|
||||
LOG_DEBUG("DOWNLOAD_PREFETCH", "Prefetching chunk %s", downloader.operator.config.GetChunkIDFromHash(task.chunkHash))
|
||||
downloader.operator.DownloadAsync(task.chunkHash, task.chunkIndex, false, func (chunk *Chunk, chunkIndex int) {
|
||||
downloader.completionChannel <- ChunkDownloadCompletion { chunk: chunk, chunkIndex: chunkIndex }
|
||||
})
|
||||
task.isDownloading = true
|
||||
downloader.numberOfDownloadingChunks++
|
||||
downloader.numberOfActiveChunks++
|
||||
@@ -255,9 +209,6 @@ func (downloader *ChunkDownloader) WaitForChunk(chunkIndex int) (chunk *Chunk) {
|
||||
downloader.taskList[completion.chunkIndex].chunk = completion.chunk
|
||||
downloader.numberOfDownloadedChunks++
|
||||
downloader.numberOfDownloadingChunks--
|
||||
if completion.chunk.isBroken {
|
||||
downloader.NumberOfFailedChunks++
|
||||
}
|
||||
}
|
||||
return downloader.taskList[chunkIndex].chunk
|
||||
}
|
||||
@@ -281,13 +232,10 @@ func (downloader *ChunkDownloader) WaitForCompletion() {
|
||||
// Wait for a completion event first
|
||||
if downloader.numberOfActiveChunks > 0 {
|
||||
completion := <-downloader.completionChannel
|
||||
downloader.config.PutChunk(completion.chunk)
|
||||
downloader.operator.config.PutChunk(completion.chunk)
|
||||
downloader.numberOfActiveChunks--
|
||||
downloader.numberOfDownloadedChunks++
|
||||
downloader.numberOfDownloadingChunks--
|
||||
if completion.chunk.isBroken {
|
||||
downloader.NumberOfFailedChunks++
|
||||
}
|
||||
}
|
||||
|
||||
// Pass the tasks one by one to the download queue
|
||||
@@ -297,7 +245,9 @@ func (downloader *ChunkDownloader) WaitForCompletion() {
|
||||
downloader.lastChunkIndex++
|
||||
continue
|
||||
}
|
||||
downloader.taskQueue <- *task
|
||||
downloader.operator.DownloadAsync(task.chunkHash, task.chunkIndex, false, func (chunk *Chunk, chunkIndex int) {
|
||||
downloader.completionChannel <- ChunkDownloadCompletion { chunk: chunk, chunkIndex: chunkIndex }
|
||||
})
|
||||
task.isDownloading = true
|
||||
downloader.numberOfDownloadingChunks++
|
||||
downloader.numberOfActiveChunks++
|
||||
@@ -306,213 +256,3 @@ func (downloader *ChunkDownloader) WaitForCompletion() {
|
||||
}
|
||||
}
|
||||
|
||||
// Stop terminates all downloading goroutines
|
||||
func (downloader *ChunkDownloader) Stop() {
|
||||
for downloader.numberOfDownloadingChunks > 0 {
|
||||
completion := <-downloader.completionChannel
|
||||
downloader.completedTasks[completion.chunkIndex] = true
|
||||
downloader.taskList[completion.chunkIndex].chunk = completion.chunk
|
||||
downloader.numberOfDownloadedChunks++
|
||||
downloader.numberOfDownloadingChunks--
|
||||
if completion.chunk.isBroken {
|
||||
downloader.NumberOfFailedChunks++
|
||||
}
|
||||
}
|
||||
|
||||
for i := range downloader.completedTasks {
|
||||
downloader.config.PutChunk(downloader.taskList[i].chunk)
|
||||
downloader.taskList[i].chunk = nil
|
||||
downloader.numberOfActiveChunks--
|
||||
}
|
||||
|
||||
for i := 0; i < downloader.threads; i++ {
|
||||
downloader.stopChannel <- true
|
||||
}
|
||||
}
|
||||
|
||||
// Download downloads a chunk from the storage.
|
||||
func (downloader *ChunkDownloader) Download(threadIndex int, task ChunkDownloadTask) bool {
|
||||
|
||||
cachedPath := ""
|
||||
chunk := downloader.config.GetChunk()
|
||||
chunkID := downloader.config.GetChunkIDFromHash(task.chunkHash)
|
||||
|
||||
if downloader.snapshotCache != nil && downloader.storage.IsCacheNeeded() {
|
||||
|
||||
var exist bool
|
||||
var err error
|
||||
|
||||
// Reset the chunk with a hasher -- we're reading from the cache where chunk are not encrypted or compressed
|
||||
chunk.Reset(true)
|
||||
|
||||
cachedPath, exist, _, err = downloader.snapshotCache.FindChunk(threadIndex, chunkID, false)
|
||||
if err != nil {
|
||||
LOG_WARN("DOWNLOAD_CACHE", "Failed to find the cache path for the chunk %s: %v", chunkID, err)
|
||||
} else if exist {
|
||||
err = downloader.snapshotCache.DownloadFile(0, cachedPath, chunk)
|
||||
if err != nil {
|
||||
LOG_WARN("DOWNLOAD_CACHE", "Failed to load the chunk %s from the snapshot cache: %v", chunkID, err)
|
||||
} else {
|
||||
actualChunkID := chunk.GetID()
|
||||
if actualChunkID != chunkID {
|
||||
LOG_WARN("DOWNLOAD_CACHE_CORRUPTED",
|
||||
"The chunk %s load from the snapshot cache has a hash id of %s", chunkID, actualChunkID)
|
||||
} else {
|
||||
LOG_DEBUG("CHUNK_CACHE", "Chunk %s has been loaded from the snapshot cache", chunkID)
|
||||
|
||||
downloader.completionChannel <- ChunkDownloadCompletion{chunk: chunk, chunkIndex: task.chunkIndex}
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reset the chunk without a hasher -- the downloaded content will be encrypted and/or compressed and the hasher
|
||||
// will be set up before the encryption
|
||||
chunk.Reset(false)
|
||||
|
||||
// If failures are allowed, complete the task properly
|
||||
completeFailedChunk := func(chunk *Chunk) {
|
||||
if downloader.allowFailures {
|
||||
chunk.isBroken = true
|
||||
downloader.completionChannel <- ChunkDownloadCompletion{chunk: chunk, chunkIndex: task.chunkIndex}
|
||||
}
|
||||
}
|
||||
|
||||
const MaxDownloadAttempts = 3
|
||||
for downloadAttempt := 0; ; downloadAttempt++ {
|
||||
|
||||
// Find the chunk by ID first.
|
||||
chunkPath, exist, _, err := downloader.storage.FindChunk(threadIndex, chunkID, false)
|
||||
if err != nil {
|
||||
completeFailedChunk(chunk)
|
||||
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CHUNK", "Failed to find the chunk %s: %v", chunkID, err)
|
||||
return false
|
||||
}
|
||||
|
||||
if !exist {
|
||||
// No chunk is found. Have to find it in the fossil pool again.
|
||||
fossilPath, exist, _, err := downloader.storage.FindChunk(threadIndex, chunkID, true)
|
||||
if err != nil {
|
||||
completeFailedChunk(chunk)
|
||||
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CHUNK", "Failed to find the chunk %s: %v", chunkID, err)
|
||||
return false
|
||||
}
|
||||
|
||||
if !exist {
|
||||
|
||||
retry := false
|
||||
|
||||
// Retry for Hubic or WebDAV as it may return 404 even when the chunk exists
|
||||
if _, ok := downloader.storage.(*HubicStorage); ok {
|
||||
retry = true
|
||||
}
|
||||
|
||||
if _, ok := downloader.storage.(*WebDAVStorage); ok {
|
||||
retry = true
|
||||
}
|
||||
|
||||
if retry && downloadAttempt < MaxDownloadAttempts {
|
||||
LOG_WARN("DOWNLOAD_RETRY", "Failed to find the chunk %s; retrying", chunkID)
|
||||
continue
|
||||
}
|
||||
|
||||
completeFailedChunk(chunk)
|
||||
// A chunk is not found. This is a serious error and hopefully it will never happen.
|
||||
if err != nil {
|
||||
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CHUNK", "Chunk %s can't be found: %v", chunkID, err)
|
||||
} else {
|
||||
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CHUNK", "Chunk %s can't be found", chunkID)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// We can't download the fossil directly. We have to turn it back into a regular chunk and try
|
||||
// downloading again.
|
||||
err = downloader.storage.MoveFile(threadIndex, fossilPath, chunkPath)
|
||||
if err != nil {
|
||||
completeFailedChunk(chunk)
|
||||
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CHUNK", "Failed to resurrect chunk %s: %v", chunkID, err)
|
||||
return false
|
||||
}
|
||||
|
||||
LOG_WARN("DOWNLOAD_RESURRECT", "Fossil %s has been resurrected", chunkID)
|
||||
continue
|
||||
}
|
||||
|
||||
err = downloader.storage.DownloadFile(threadIndex, chunkPath, chunk)
|
||||
if err != nil {
|
||||
_, isHubic := downloader.storage.(*HubicStorage)
|
||||
// Retry on EOF or if it is a Hubic backend as it may return 404 even when the chunk exists
|
||||
if (err == io.ErrUnexpectedEOF || isHubic) && downloadAttempt < MaxDownloadAttempts {
|
||||
LOG_WARN("DOWNLOAD_RETRY", "Failed to download the chunk %s: %v; retrying", chunkID, err)
|
||||
chunk.Reset(false)
|
||||
continue
|
||||
} else {
|
||||
completeFailedChunk(chunk)
|
||||
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CHUNK", "Failed to download the chunk %s: %v", chunkID, err)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
err = chunk.Decrypt(downloader.config.ChunkKey, task.chunkHash)
|
||||
if err != nil {
|
||||
if downloadAttempt < MaxDownloadAttempts {
|
||||
LOG_WARN("DOWNLOAD_RETRY", "Failed to decrypt the chunk %s: %v; retrying", chunkID, err)
|
||||
chunk.Reset(false)
|
||||
continue
|
||||
} else {
|
||||
completeFailedChunk(chunk)
|
||||
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_DECRYPT", "Failed to decrypt the chunk %s: %v", chunkID, err)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
actualChunkID := chunk.GetID()
|
||||
if actualChunkID != chunkID {
|
||||
if downloadAttempt < MaxDownloadAttempts {
|
||||
LOG_WARN("DOWNLOAD_RETRY", "The chunk %s has a hash id of %s; retrying", chunkID, actualChunkID)
|
||||
chunk.Reset(false)
|
||||
continue
|
||||
} else {
|
||||
completeFailedChunk(chunk)
|
||||
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CORRUPTED", "The chunk %s has a hash id of %s", chunkID, actualChunkID)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
if len(cachedPath) > 0 {
|
||||
// Save a copy to the local snapshot cache
|
||||
err := downloader.snapshotCache.UploadFile(threadIndex, cachedPath, chunk.GetBytes())
|
||||
if err != nil {
|
||||
LOG_WARN("DOWNLOAD_CACHE", "Failed to add the chunk %s to the snapshot cache: %v", chunkID, err)
|
||||
}
|
||||
}
|
||||
|
||||
downloadedChunkSize := atomic.AddInt64(&downloader.downloadedChunkSize, int64(chunk.GetLength()))
|
||||
|
||||
if (downloader.showStatistics || IsTracing()) && downloader.totalChunkSize > 0 {
|
||||
|
||||
now := time.Now().Unix()
|
||||
if now <= downloader.startTime {
|
||||
now = downloader.startTime + 1
|
||||
}
|
||||
speed := downloadedChunkSize / (now - downloader.startTime)
|
||||
remainingTime := int64(0)
|
||||
if speed > 0 {
|
||||
remainingTime = (downloader.totalChunkSize-downloadedChunkSize)/speed + 1
|
||||
}
|
||||
percentage := float32(downloadedChunkSize * 1000 / downloader.totalChunkSize)
|
||||
LOG_INFO("DOWNLOAD_PROGRESS", "Downloaded chunk %d size %d, %sB/s %s %.1f%%",
|
||||
task.chunkIndex+1, chunk.GetLength(),
|
||||
PrettySize(speed), PrettyTime(remainingTime), percentage/10)
|
||||
} else {
|
||||
LOG_DEBUG("CHUNK_DOWNLOAD", "Chunk %s has been downloaded", chunkID)
|
||||
}
|
||||
|
||||
downloader.completionChannel <- ChunkDownloadCompletion{chunk: chunk, chunkIndex: task.chunkIndex}
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -25,15 +25,20 @@ type ChunkMaker struct {
|
||||
bufferSize int
|
||||
bufferStart int
|
||||
|
||||
minimumReached bool
|
||||
hashSum uint64
|
||||
chunk *Chunk
|
||||
|
||||
config *Config
|
||||
|
||||
hashOnly bool
|
||||
hashOnlyChunk *Chunk
|
||||
|
||||
}
|
||||
|
||||
// CreateChunkMaker creates a chunk maker. 'randomSeed' is used to generate the character-to-integer table needed by
|
||||
// buzhash.
|
||||
func CreateChunkMaker(config *Config, hashOnly bool) *ChunkMaker {
|
||||
func CreateFileChunkMaker(config *Config, hashOnly bool) *ChunkMaker {
|
||||
size := 1
|
||||
for size*2 <= config.AverageChunkSize {
|
||||
size *= 2
|
||||
@@ -67,6 +72,33 @@ func CreateChunkMaker(config *Config, hashOnly bool) *ChunkMaker {
|
||||
}
|
||||
|
||||
maker.buffer = make([]byte, 2*config.MinimumChunkSize)
|
||||
maker.bufferStart = 0
|
||||
maker.bufferSize = 0
|
||||
|
||||
maker.startNewChunk()
|
||||
|
||||
return maker
|
||||
}
|
||||
|
||||
// CreateMetaDataChunkMaker creates a chunk maker that always uses the variable-sized chunking algorithm
|
||||
func CreateMetaDataChunkMaker(config *Config, chunkSize int) *ChunkMaker {
|
||||
|
||||
size := 1
|
||||
for size*2 <= chunkSize {
|
||||
size *= 2
|
||||
}
|
||||
|
||||
if size != chunkSize {
|
||||
LOG_FATAL("CHUNK_SIZE", "Invalid metadata chunk size: %d is not a power of 2", chunkSize)
|
||||
return nil
|
||||
}
|
||||
|
||||
maker := CreateFileChunkMaker(config, false)
|
||||
maker.hashMask = uint64(chunkSize - 1)
|
||||
maker.maximumChunkSize = chunkSize * 4
|
||||
maker.minimumChunkSize = chunkSize / 4
|
||||
maker.bufferCapacity = 2 * maker.minimumChunkSize
|
||||
maker.buffer = make([]byte, maker.bufferCapacity)
|
||||
|
||||
return maker
|
||||
}
|
||||
@@ -90,62 +122,50 @@ func (maker *ChunkMaker) buzhashUpdate(sum uint64, out byte, in byte, length int
|
||||
return rotateLeftByOne(sum) ^ rotateLeft(maker.randomTable[out], uint(length)) ^ maker.randomTable[in]
|
||||
}
|
||||
|
||||
// ForEachChunk reads data from 'reader'. If EOF is encountered, it will call 'nextReader' to ask for next file. If
|
||||
// 'nextReader' returns false, it will process remaining data in the buffer and then quit. When a chunk is identified,
|
||||
// it will call 'endOfChunk' to return the chunk size and a boolean flag indicating if it is the last chunk.
|
||||
func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *Chunk, final bool),
|
||||
nextReader func(size int64, hash string) (io.Reader, bool)) {
|
||||
func (maker *ChunkMaker) startNewChunk() (chunk *Chunk) {
|
||||
maker.hashSum = 0
|
||||
maker.minimumReached = false
|
||||
if maker.hashOnly {
|
||||
maker.chunk = maker.hashOnlyChunk
|
||||
maker.chunk.Reset(true)
|
||||
} else {
|
||||
maker.chunk = maker.config.GetChunk()
|
||||
maker.chunk.Reset(true)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
maker.bufferStart = 0
|
||||
maker.bufferSize = 0
|
||||
|
||||
var minimumReached bool
|
||||
var hashSum uint64
|
||||
var chunk *Chunk
|
||||
func (maker *ChunkMaker) AddData(reader io.Reader, sendChunk func(*Chunk)) (int64, string) {
|
||||
|
||||
isEOF := false
|
||||
fileSize := int64(0)
|
||||
fileHasher := maker.config.NewFileHasher()
|
||||
|
||||
// Start a new chunk.
|
||||
startNewChunk := func() {
|
||||
hashSum = 0
|
||||
minimumReached = false
|
||||
if maker.hashOnly {
|
||||
chunk = maker.hashOnlyChunk
|
||||
chunk.Reset(true)
|
||||
} else {
|
||||
chunk = maker.config.GetChunk()
|
||||
chunk.Reset(true)
|
||||
}
|
||||
}
|
||||
|
||||
// Move data from the buffer to the chunk.
|
||||
fill := func(count int) {
|
||||
|
||||
if maker.bufferStart+count < maker.bufferCapacity {
|
||||
chunk.Write(maker.buffer[maker.bufferStart : maker.bufferStart+count])
|
||||
maker.chunk.Write(maker.buffer[maker.bufferStart : maker.bufferStart+count])
|
||||
maker.bufferStart += count
|
||||
maker.bufferSize -= count
|
||||
} else {
|
||||
chunk.Write(maker.buffer[maker.bufferStart:])
|
||||
chunk.Write(maker.buffer[:count-(maker.bufferCapacity-maker.bufferStart)])
|
||||
maker.chunk.Write(maker.buffer[maker.bufferStart:])
|
||||
maker.chunk.Write(maker.buffer[:count-(maker.bufferCapacity-maker.bufferStart)])
|
||||
maker.bufferStart = count - (maker.bufferCapacity - maker.bufferStart)
|
||||
maker.bufferSize -= count
|
||||
}
|
||||
}
|
||||
|
||||
startNewChunk()
|
||||
|
||||
var err error
|
||||
|
||||
isEOF := false
|
||||
|
||||
if maker.minimumChunkSize == maker.maximumChunkSize {
|
||||
|
||||
if maker.bufferCapacity < maker.minimumChunkSize {
|
||||
maker.buffer = make([]byte, maker.minimumChunkSize)
|
||||
if reader == nil {
|
||||
return 0, ""
|
||||
}
|
||||
|
||||
for {
|
||||
maker.startNewChunk()
|
||||
maker.bufferStart = 0
|
||||
for maker.bufferStart < maker.minimumChunkSize && !isEOF {
|
||||
count, err := reader.Read(maker.buffer[maker.bufferStart:maker.minimumChunkSize])
|
||||
@@ -153,7 +173,7 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C
|
||||
if err != nil {
|
||||
if err != io.EOF {
|
||||
LOG_ERROR("CHUNK_MAKER", "Failed to read %d bytes: %s", count, err.Error())
|
||||
return
|
||||
return 0, ""
|
||||
} else {
|
||||
isEOF = true
|
||||
}
|
||||
@@ -161,26 +181,15 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C
|
||||
maker.bufferStart += count
|
||||
}
|
||||
|
||||
fileHasher.Write(maker.buffer[:maker.bufferStart])
|
||||
fileSize += int64(maker.bufferStart)
|
||||
chunk.Write(maker.buffer[:maker.bufferStart])
|
||||
if maker.bufferStart > 0 {
|
||||
fileHasher.Write(maker.buffer[:maker.bufferStart])
|
||||
fileSize += int64(maker.bufferStart)
|
||||
maker.chunk.Write(maker.buffer[:maker.bufferStart])
|
||||
sendChunk(maker.chunk)
|
||||
}
|
||||
|
||||
if isEOF {
|
||||
var ok bool
|
||||
reader, ok = nextReader(fileSize, hex.EncodeToString(fileHasher.Sum(nil)))
|
||||
if !ok {
|
||||
endOfChunk(chunk, true)
|
||||
return
|
||||
} else {
|
||||
endOfChunk(chunk, false)
|
||||
startNewChunk()
|
||||
fileSize = 0
|
||||
fileHasher = maker.config.NewFileHasher()
|
||||
isEOF = false
|
||||
}
|
||||
} else {
|
||||
endOfChunk(chunk, false)
|
||||
startNewChunk()
|
||||
return fileSize, hex.EncodeToString(fileHasher.Sum(nil))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -189,7 +198,7 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C
|
||||
for {
|
||||
|
||||
// If the buffer still has some space left and EOF is not seen, read more data.
|
||||
for maker.bufferSize < maker.bufferCapacity && !isEOF {
|
||||
for maker.bufferSize < maker.bufferCapacity && !isEOF && reader != nil {
|
||||
start := maker.bufferStart + maker.bufferSize
|
||||
count := maker.bufferCapacity - start
|
||||
if start >= maker.bufferCapacity {
|
||||
@@ -201,7 +210,7 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C
|
||||
|
||||
if err != nil && err != io.EOF {
|
||||
LOG_ERROR("CHUNK_MAKER", "Failed to read %d bytes: %s", count, err.Error())
|
||||
return
|
||||
return 0, ""
|
||||
}
|
||||
|
||||
maker.bufferSize += count
|
||||
@@ -210,54 +219,55 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C
|
||||
|
||||
// if EOF is seen, try to switch to next file and continue
|
||||
if err == io.EOF {
|
||||
var ok bool
|
||||
reader, ok = nextReader(fileSize, hex.EncodeToString(fileHasher.Sum(nil)))
|
||||
if !ok {
|
||||
isEOF = true
|
||||
} else {
|
||||
fileSize = 0
|
||||
fileHasher = maker.config.NewFileHasher()
|
||||
isEOF = false
|
||||
}
|
||||
isEOF = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// No eough data to meet the minimum chunk size requirement, so just return as a chunk.
|
||||
if maker.bufferSize < maker.minimumChunkSize {
|
||||
fill(maker.bufferSize)
|
||||
endOfChunk(chunk, true)
|
||||
return
|
||||
if reader == nil {
|
||||
fill(maker.bufferSize)
|
||||
if maker.chunk.GetLength() > 0 {
|
||||
sendChunk(maker.chunk)
|
||||
}
|
||||
return 0, ""
|
||||
} else if isEOF {
|
||||
return fileSize, hex.EncodeToString(fileHasher.Sum(nil))
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Minimum chunk size has been reached. Calculate the buzhash for the minimum size chunk.
|
||||
if !minimumReached {
|
||||
if !maker.minimumReached {
|
||||
|
||||
bytes := maker.minimumChunkSize
|
||||
|
||||
if maker.bufferStart+bytes < maker.bufferCapacity {
|
||||
hashSum = maker.buzhashSum(0, maker.buffer[maker.bufferStart:maker.bufferStart+bytes])
|
||||
maker.hashSum = maker.buzhashSum(0, maker.buffer[maker.bufferStart:maker.bufferStart+bytes])
|
||||
} else {
|
||||
hashSum = maker.buzhashSum(0, maker.buffer[maker.bufferStart:])
|
||||
hashSum = maker.buzhashSum(hashSum,
|
||||
maker.hashSum = maker.buzhashSum(0, maker.buffer[maker.bufferStart:])
|
||||
maker.hashSum = maker.buzhashSum(maker.hashSum,
|
||||
maker.buffer[:bytes-(maker.bufferCapacity-maker.bufferStart)])
|
||||
}
|
||||
|
||||
if (hashSum & maker.hashMask) == 0 {
|
||||
if (maker.hashSum & maker.hashMask) == 0 {
|
||||
// This is a minimum size chunk
|
||||
fill(bytes)
|
||||
endOfChunk(chunk, false)
|
||||
startNewChunk()
|
||||
sendChunk(maker.chunk)
|
||||
maker.startNewChunk()
|
||||
continue
|
||||
}
|
||||
|
||||
minimumReached = true
|
||||
maker.minimumReached = true
|
||||
}
|
||||
|
||||
// Now check the buzhash of the data in the buffer, shifting one byte at a time.
|
||||
bytes := maker.bufferSize - maker.minimumChunkSize
|
||||
isEOC := false
|
||||
maxSize := maker.maximumChunkSize - chunk.GetLength()
|
||||
for i := 0; i < maker.bufferSize-maker.minimumChunkSize; i++ {
|
||||
isEOC := false // chunk boundary found
|
||||
maxSize := maker.maximumChunkSize - maker.chunk.GetLength()
|
||||
for i := 0; i < bytes; i++ {
|
||||
out := maker.bufferStart + i
|
||||
if out >= maker.bufferCapacity {
|
||||
out -= maker.bufferCapacity
|
||||
@@ -267,8 +277,8 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C
|
||||
in -= maker.bufferCapacity
|
||||
}
|
||||
|
||||
hashSum = maker.buzhashUpdate(hashSum, maker.buffer[out], maker.buffer[in], maker.minimumChunkSize)
|
||||
if (hashSum&maker.hashMask) == 0 || i == maxSize-maker.minimumChunkSize-1 {
|
||||
maker.hashSum = maker.buzhashUpdate(maker.hashSum, maker.buffer[out], maker.buffer[in], maker.minimumChunkSize)
|
||||
if (maker.hashSum&maker.hashMask) == 0 || i == maxSize-maker.minimumChunkSize-1 {
|
||||
// A chunk is completed.
|
||||
bytes = i + 1 + maker.minimumChunkSize
|
||||
isEOC = true
|
||||
@@ -277,21 +287,20 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C
|
||||
}
|
||||
|
||||
fill(bytes)
|
||||
|
||||
if isEOC {
|
||||
if isEOF && maker.bufferSize == 0 {
|
||||
endOfChunk(chunk, true)
|
||||
return
|
||||
sendChunk(maker.chunk)
|
||||
maker.startNewChunk()
|
||||
} else {
|
||||
if reader == nil {
|
||||
fill(maker.minimumChunkSize)
|
||||
sendChunk(maker.chunk)
|
||||
maker.startNewChunk()
|
||||
return 0, ""
|
||||
}
|
||||
endOfChunk(chunk, false)
|
||||
startNewChunk()
|
||||
continue
|
||||
}
|
||||
|
||||
if isEOF {
|
||||
fill(maker.bufferSize)
|
||||
endOfChunk(chunk, true)
|
||||
return
|
||||
return fileSize, hex.EncodeToString(fileHasher.Sum(nil))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,14 +7,12 @@ package duplicacy
|
||||
import (
|
||||
"bytes"
|
||||
crypto_rand "crypto/rand"
|
||||
"io"
|
||||
"math/rand"
|
||||
"sort"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func splitIntoChunks(content []byte, n, averageChunkSize, maxChunkSize, minChunkSize,
|
||||
bufferCapacity int) ([]string, int) {
|
||||
func splitIntoChunks(content []byte, n, averageChunkSize, maxChunkSize, minChunkSize int) ([]string, int) {
|
||||
|
||||
config := CreateConfig()
|
||||
|
||||
@@ -27,14 +25,12 @@ func splitIntoChunks(content []byte, n, averageChunkSize, maxChunkSize, minChunk
|
||||
config.HashKey = DEFAULT_KEY
|
||||
config.IDKey = DEFAULT_KEY
|
||||
|
||||
maker := CreateChunkMaker(config, false)
|
||||
maker := CreateFileChunkMaker(config, false)
|
||||
|
||||
var chunks []string
|
||||
totalChunkSize := 0
|
||||
totalFileSize := int64(0)
|
||||
|
||||
//LOG_INFO("CHUNK_SPLIT", "bufferCapacity: %d", bufferCapacity)
|
||||
|
||||
buffers := make([]*bytes.Buffer, n)
|
||||
sizes := make([]int, n)
|
||||
sizes[0] = 0
|
||||
@@ -42,7 +38,7 @@ func splitIntoChunks(content []byte, n, averageChunkSize, maxChunkSize, minChunk
|
||||
same := true
|
||||
for same {
|
||||
same = false
|
||||
sizes[i] = rand.Int() % n
|
||||
sizes[i] = rand.Int() % len(content)
|
||||
for j := 0; j < i; j++ {
|
||||
if sizes[i] == sizes[j] {
|
||||
same = true
|
||||
@@ -59,22 +55,17 @@ func splitIntoChunks(content []byte, n, averageChunkSize, maxChunkSize, minChunk
|
||||
}
|
||||
buffers[n-1] = bytes.NewBuffer(content[sizes[n-1]:])
|
||||
|
||||
i := 0
|
||||
chunkFunc := func(chunk *Chunk) {
|
||||
chunks = append(chunks, chunk.GetHash())
|
||||
totalChunkSize += chunk.GetLength()
|
||||
config.PutChunk(chunk)
|
||||
}
|
||||
|
||||
maker.ForEachChunk(buffers[0],
|
||||
func(chunk *Chunk, final bool) {
|
||||
//LOG_INFO("CHUNK_SPLIT", "i: %d, chunk: %s, size: %d", i, chunk.GetHash(), size)
|
||||
chunks = append(chunks, chunk.GetHash())
|
||||
totalChunkSize += chunk.GetLength()
|
||||
},
|
||||
func(size int64, hash string) (io.Reader, bool) {
|
||||
totalFileSize += size
|
||||
i++
|
||||
if i >= len(buffers) {
|
||||
return nil, false
|
||||
}
|
||||
return buffers[i], true
|
||||
})
|
||||
for _, buffer := range buffers {
|
||||
fileSize, _ := maker.AddData(buffer, chunkFunc)
|
||||
totalFileSize += fileSize
|
||||
}
|
||||
maker.AddData(nil, chunkFunc)
|
||||
|
||||
if totalFileSize != int64(totalChunkSize) {
|
||||
LOG_ERROR("CHUNK_SPLIT", "total chunk size: %d, total file size: %d", totalChunkSize, totalFileSize)
|
||||
@@ -96,35 +87,28 @@ func TestChunkMaker(t *testing.T) {
|
||||
continue
|
||||
}
|
||||
|
||||
chunkArray1, totalSize1 := splitIntoChunks(content, 10, 32, 64, 16, 32)
|
||||
chunkArray1, totalSize1 := splitIntoChunks(content, 10, 32, 64, 16)
|
||||
|
||||
capacities := [...]int{32, 33, 34, 61, 62, 63, 64, 65, 66, 126, 127, 128, 129, 130,
|
||||
255, 256, 257, 511, 512, 513, 1023, 1024, 1025,
|
||||
32, 48, 64, 128, 256, 512, 1024, 2048}
|
||||
|
||||
//capacities := [...]int { 32 }
|
||||
for _, n := range [...]int{6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} {
|
||||
chunkArray2, totalSize2 := splitIntoChunks(content, n, 32, 64, 16)
|
||||
|
||||
for _, capacity := range capacities {
|
||||
if totalSize1 != totalSize2 {
|
||||
t.Errorf("[size %d] total size is %d instead of %d",
|
||||
size, totalSize2, totalSize1)
|
||||
}
|
||||
|
||||
for _, n := range [...]int{6, 7, 8, 9, 10} {
|
||||
chunkArray2, totalSize2 := splitIntoChunks(content, n, 32, 64, 16, capacity)
|
||||
|
||||
if totalSize1 != totalSize2 {
|
||||
t.Errorf("[size %d, capacity %d] total size is %d instead of %d",
|
||||
size, capacity, totalSize2, totalSize1)
|
||||
}
|
||||
|
||||
if len(chunkArray1) != len(chunkArray2) {
|
||||
t.Errorf("[size %d, capacity %d] number of chunks is %d instead of %d",
|
||||
size, capacity, len(chunkArray2), len(chunkArray1))
|
||||
} else {
|
||||
for i := 0; i < len(chunkArray1); i++ {
|
||||
if chunkArray1[i] != chunkArray2[i] {
|
||||
t.Errorf("[size %d, capacity %d, chunk %d] chunk is different", size, capacity, i)
|
||||
}
|
||||
if len(chunkArray1) != len(chunkArray2) {
|
||||
t.Errorf("[size %d] number of chunks is %d instead of %d",
|
||||
size, len(chunkArray2), len(chunkArray1))
|
||||
} else {
|
||||
for i := 0; i < len(chunkArray1); i++ {
|
||||
if chunkArray1[i] != chunkArray2[i] {
|
||||
t.Errorf("[size %d, chunk %d] chunk is different", size, i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
package duplicacy
|
||||
|
||||
import (
|
||||
"io"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
@@ -12,42 +13,69 @@ import (
|
||||
|
||||
// These are operations that ChunkOperator will perform.
|
||||
const (
|
||||
ChunkOperationFind = 0
|
||||
ChunkOperationDelete = 1
|
||||
ChunkOperationFossilize = 2
|
||||
ChunkOperationResurrect = 3
|
||||
ChunkOperationDownload = 0
|
||||
ChunkOperationUpload = 1
|
||||
ChunkOperationDelete = 2
|
||||
ChunkOperationFossilize = 3
|
||||
ChunkOperationResurrect = 4
|
||||
ChunkOperationFind = 5
|
||||
)
|
||||
|
||||
// ChunkOperatorTask is used to pass parameters for different kinds of chunk operations.
|
||||
type ChunkOperatorTask struct {
|
||||
operation int // The type of operation
|
||||
chunkID string // The chunk id
|
||||
filePath string // The path of the chunk file; it may be empty
|
||||
// ChunkTask is used to pass parameters for different kinds of chunk operations.
|
||||
type ChunkTask struct {
|
||||
operation int // The type of operation
|
||||
chunkID string // The chunk id
|
||||
chunkHash string // The chunk hash
|
||||
chunkIndex int // The chunk index
|
||||
filePath string // The path of the chunk file; it may be empty
|
||||
|
||||
isMetadata bool
|
||||
chunk *Chunk
|
||||
|
||||
completionFunc func(chunk *Chunk, chunkIndex int)
|
||||
}
|
||||
|
||||
// ChunkOperator is capable of performing multi-threaded operations on chunks.
|
||||
type ChunkOperator struct {
|
||||
numberOfActiveTasks int64 // The number of chunks that are being operated on
|
||||
storage Storage // This storage
|
||||
threads int // Number of threads
|
||||
taskQueue chan ChunkOperatorTask // Operating goroutines are waiting on this channel for input
|
||||
stopChannel chan bool // Used to stop all the goroutines
|
||||
config *Config // Associated config
|
||||
storage Storage // This storage
|
||||
snapshotCache *FileStorage
|
||||
showStatistics bool
|
||||
threads int // Number of threads
|
||||
taskQueue chan ChunkTask // Operating goroutines are waiting on this channel for input
|
||||
stopChannel chan bool // Used to stop all the goroutines
|
||||
|
||||
fossils []string // For fossilize operation, the paths of the fossils are stored in this slice
|
||||
fossilsLock *sync.Mutex // The lock for 'fossils'
|
||||
numberOfActiveTasks int64 // The number of chunks that are being operated on
|
||||
|
||||
fossils []string // For fossilize operation, the paths of the fossils are stored in this slice
|
||||
collectionLock *sync.Mutex // The lock for accessing 'fossils'
|
||||
|
||||
startTime int64 // The time it starts downloading
|
||||
totalChunkSize int64 // Total chunk size
|
||||
downloadedChunkSize int64 // Downloaded chunk size
|
||||
|
||||
allowFailures bool // Whether to fail on download error, or continue
|
||||
NumberOfFailedChunks int64 // The number of chunks that can't be downloaded
|
||||
|
||||
UploadCompletionFunc func(chunk *Chunk, chunkIndex int, inCache bool, chunkSize int, uploadSize int)
|
||||
}
|
||||
|
||||
// CreateChunkOperator creates a new ChunkOperator.
|
||||
func CreateChunkOperator(storage Storage, threads int) *ChunkOperator {
|
||||
func CreateChunkOperator(config *Config, storage Storage, snapshotCache *FileStorage, showStatistics bool, threads int, allowFailures bool) *ChunkOperator {
|
||||
|
||||
operator := &ChunkOperator{
|
||||
config: config,
|
||||
storage: storage,
|
||||
snapshotCache: snapshotCache,
|
||||
showStatistics: showStatistics,
|
||||
threads: threads,
|
||||
|
||||
taskQueue: make(chan ChunkOperatorTask, threads*4),
|
||||
taskQueue: make(chan ChunkTask, threads),
|
||||
stopChannel: make(chan bool),
|
||||
|
||||
fossils: make([]string, 0),
|
||||
fossilsLock: &sync.Mutex{},
|
||||
collectionLock: &sync.Mutex{},
|
||||
|
||||
allowFailures: allowFailures,
|
||||
}
|
||||
|
||||
// Start the operator goroutines
|
||||
@@ -84,38 +112,78 @@ func (operator *ChunkOperator) Stop() {
|
||||
atomic.AddInt64(&operator.numberOfActiveTasks, int64(-1))
|
||||
}
|
||||
|
||||
func (operator *ChunkOperator) AddTask(operation int, chunkID string, filePath string) {
|
||||
func (operator *ChunkOperator) WaitForCompletion() {
|
||||
|
||||
task := ChunkOperatorTask{
|
||||
operation: operation,
|
||||
chunkID: chunkID,
|
||||
filePath: filePath,
|
||||
for atomic.LoadInt64(&operator.numberOfActiveTasks) > 0 {
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
operator.taskQueue <- task
|
||||
atomic.AddInt64(&operator.numberOfActiveTasks, int64(1))
|
||||
}
|
||||
|
||||
func (operator *ChunkOperator) Find(chunkID string) {
|
||||
operator.AddTask(ChunkOperationFind, chunkID, "")
|
||||
func (operator *ChunkOperator) AddTask(operation int, chunkID string, chunkHash string, filePath string, chunkIndex int, chunk *Chunk, isMetadata bool, completionFunc func(*Chunk, int)) {
|
||||
|
||||
task := ChunkTask {
|
||||
operation: operation,
|
||||
chunkID: chunkID,
|
||||
chunkHash: chunkHash,
|
||||
chunkIndex: chunkIndex,
|
||||
filePath: filePath,
|
||||
chunk: chunk,
|
||||
isMetadata: isMetadata,
|
||||
completionFunc: completionFunc,
|
||||
}
|
||||
|
||||
operator.taskQueue <- task
|
||||
atomic.AddInt64(&operator.numberOfActiveTasks, int64(1))
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (operator *ChunkOperator) Download(chunkHash string, chunkIndex int, isMetadata bool) *Chunk {
|
||||
chunkID := operator.config.GetChunkIDFromHash(chunkHash)
|
||||
completionChannel := make(chan *Chunk)
|
||||
completionFunc := func(chunk *Chunk, chunkIndex int) {
|
||||
completionChannel <- chunk
|
||||
}
|
||||
operator.AddTask(ChunkOperationDownload, chunkID, chunkHash, "", chunkIndex, nil, isMetadata, completionFunc)
|
||||
return <- completionChannel
|
||||
}
|
||||
|
||||
func (operator *ChunkOperator) DownloadAsync(chunkHash string, chunkIndex int, isMetadata bool, completionFunc func(*Chunk, int)) {
|
||||
chunkID := operator.config.GetChunkIDFromHash(chunkHash)
|
||||
operator.AddTask(ChunkOperationDownload, chunkID, chunkHash, "", chunkIndex, nil, isMetadata, completionFunc)
|
||||
}
|
||||
|
||||
func (operator *ChunkOperator) Upload(chunk *Chunk, chunkIndex int, isMetadata bool) {
|
||||
chunkHash := chunk.GetHash()
|
||||
chunkID := operator.config.GetChunkIDFromHash(chunkHash)
|
||||
operator.AddTask(ChunkOperationUpload, chunkID, chunkHash, "", chunkIndex, chunk, isMetadata, nil)
|
||||
}
|
||||
|
||||
func (operator *ChunkOperator) Delete(chunkID string, filePath string) {
|
||||
operator.AddTask(ChunkOperationDelete, chunkID, filePath)
|
||||
operator.AddTask(ChunkOperationDelete, chunkID, "", filePath, 0, nil, false, nil)
|
||||
}
|
||||
|
||||
func (operator *ChunkOperator) Fossilize(chunkID string, filePath string) {
|
||||
operator.AddTask(ChunkOperationFossilize, chunkID, filePath)
|
||||
operator.AddTask(ChunkOperationFossilize, chunkID, "", filePath, 0, nil, false, nil)
|
||||
}
|
||||
|
||||
func (operator *ChunkOperator) Resurrect(chunkID string, filePath string) {
|
||||
operator.AddTask(ChunkOperationResurrect, chunkID, filePath)
|
||||
operator.AddTask(ChunkOperationResurrect, chunkID, "", filePath, 0, nil, false, nil)
|
||||
}
|
||||
|
||||
func (operator *ChunkOperator) Run(threadIndex int, task ChunkOperatorTask) {
|
||||
func (operator *ChunkOperator) Run(threadIndex int, task ChunkTask) {
|
||||
defer func() {
|
||||
atomic.AddInt64(&operator.numberOfActiveTasks, int64(-1))
|
||||
}()
|
||||
|
||||
if task.operation == ChunkOperationDownload {
|
||||
operator.DownloadChunk(threadIndex, task)
|
||||
return
|
||||
} else if task.operation == ChunkOperationUpload {
|
||||
operator.UploadChunk(threadIndex, task)
|
||||
return
|
||||
}
|
||||
|
||||
// task.filePath may be empty. If so, find the chunk first.
|
||||
if task.operation == ChunkOperationDelete || task.operation == ChunkOperationFossilize {
|
||||
if task.filePath == "" {
|
||||
@@ -132,9 +200,9 @@ func (operator *ChunkOperator) Run(threadIndex int, task ChunkOperatorTask) {
|
||||
fossilPath, exist, _, _ := operator.storage.FindChunk(threadIndex, task.chunkID, true)
|
||||
if exist {
|
||||
LOG_WARN("CHUNK_FOSSILIZE", "Chunk %s is already a fossil", task.chunkID)
|
||||
operator.fossilsLock.Lock()
|
||||
operator.collectionLock.Lock()
|
||||
operator.fossils = append(operator.fossils, fossilPath)
|
||||
operator.fossilsLock.Unlock()
|
||||
operator.collectionLock.Unlock()
|
||||
} else {
|
||||
LOG_ERROR("CHUNK_FIND", "Chunk %s does not exist in the storage", task.chunkID)
|
||||
}
|
||||
@@ -175,17 +243,17 @@ func (operator *ChunkOperator) Run(threadIndex int, task ChunkOperatorTask) {
|
||||
if err == nil {
|
||||
LOG_TRACE("CHUNK_DELETE", "Deleted chunk file %s as the fossil already exists", task.chunkID)
|
||||
}
|
||||
operator.fossilsLock.Lock()
|
||||
operator.collectionLock.Lock()
|
||||
operator.fossils = append(operator.fossils, fossilPath)
|
||||
operator.fossilsLock.Unlock()
|
||||
operator.collectionLock.Unlock()
|
||||
} else {
|
||||
LOG_ERROR("CHUNK_DELETE", "Failed to fossilize the chunk %s: %v", task.chunkID, err)
|
||||
}
|
||||
} else {
|
||||
LOG_TRACE("CHUNK_FOSSILIZE", "The chunk %s has been marked as a fossil", task.chunkID)
|
||||
operator.fossilsLock.Lock()
|
||||
operator.collectionLock.Lock()
|
||||
operator.fossils = append(operator.fossils, fossilPath)
|
||||
operator.fossilsLock.Unlock()
|
||||
operator.collectionLock.Unlock()
|
||||
}
|
||||
} else if task.operation == ChunkOperationResurrect {
|
||||
chunkPath, exist, _, err := operator.storage.FindChunk(threadIndex, task.chunkID, false)
|
||||
@@ -207,3 +275,267 @@ func (operator *ChunkOperator) Run(threadIndex int, task ChunkOperatorTask) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Download downloads a chunk from the storage.
|
||||
func (operator *ChunkOperator) DownloadChunk(threadIndex int, task ChunkTask) {
|
||||
|
||||
cachedPath := ""
|
||||
chunk := operator.config.GetChunk()
|
||||
chunk.isMetadata = task.isMetadata
|
||||
chunkID := task.chunkID
|
||||
|
||||
defer func() {
|
||||
if chunk != nil {
|
||||
operator.config.PutChunk(chunk)
|
||||
}
|
||||
} ()
|
||||
|
||||
if task.isMetadata && operator.snapshotCache != nil {
|
||||
|
||||
var exist bool
|
||||
var err error
|
||||
|
||||
// Reset the chunk with a hasher -- we're reading from the cache where chunk are not encrypted or compressed
|
||||
chunk.Reset(true)
|
||||
|
||||
cachedPath, exist, _, err = operator.snapshotCache.FindChunk(threadIndex, chunkID, false)
|
||||
if err != nil {
|
||||
LOG_WARN("DOWNLOAD_CACHE", "Failed to find the cache path for the chunk %s: %v", chunkID, err)
|
||||
} else if exist {
|
||||
err = operator.snapshotCache.DownloadFile(0, cachedPath, chunk)
|
||||
if err != nil {
|
||||
LOG_WARN("DOWNLOAD_CACHE", "Failed to load the chunk %s from the snapshot cache: %v", chunkID, err)
|
||||
} else {
|
||||
actualChunkID := chunk.GetID()
|
||||
if actualChunkID != chunkID {
|
||||
LOG_WARN("DOWNLOAD_CACHE_CORRUPTED",
|
||||
"The chunk %s load from the snapshot cache has a hash id of %s", chunkID, actualChunkID)
|
||||
} else {
|
||||
LOG_DEBUG("CHUNK_CACHE", "Chunk %s has been loaded from the snapshot cache", chunkID)
|
||||
|
||||
task.completionFunc(chunk, task.chunkIndex)
|
||||
chunk = nil
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reset the chunk without a hasher -- the downloaded content will be encrypted and/or compressed and the hasher
|
||||
// will be set up before the encryption
|
||||
chunk.Reset(false)
|
||||
chunk.isMetadata = task.isMetadata
|
||||
|
||||
// If failures are allowed, complete the task properly
|
||||
completeFailedChunk := func() {
|
||||
|
||||
atomic.AddInt64(&operator.NumberOfFailedChunks, 1)
|
||||
if operator.allowFailures {
|
||||
task.completionFunc(chunk, task.chunkIndex)
|
||||
}
|
||||
}
|
||||
|
||||
const MaxDownloadAttempts = 3
|
||||
for downloadAttempt := 0; ; downloadAttempt++ {
|
||||
|
||||
// Find the chunk by ID first.
|
||||
chunkPath, exist, _, err := operator.storage.FindChunk(threadIndex, chunkID, false)
|
||||
if err != nil {
|
||||
completeFailedChunk()
|
||||
LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Failed to find the chunk %s: %v", chunkID, err)
|
||||
return
|
||||
}
|
||||
|
||||
if !exist {
|
||||
// No chunk is found. Have to find it in the fossil pool again.
|
||||
fossilPath, exist, _, err := operator.storage.FindChunk(threadIndex, chunkID, true)
|
||||
if err != nil {
|
||||
completeFailedChunk()
|
||||
LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Failed to find the chunk %s: %v", chunkID, err)
|
||||
return
|
||||
}
|
||||
|
||||
if !exist {
|
||||
|
||||
retry := false
|
||||
|
||||
// Retry for Hubic or WebDAV as it may return 404 even when the chunk exists
|
||||
if _, ok := operator.storage.(*HubicStorage); ok {
|
||||
retry = true
|
||||
}
|
||||
|
||||
if _, ok := operator.storage.(*WebDAVStorage); ok {
|
||||
retry = true
|
||||
}
|
||||
|
||||
if retry && downloadAttempt < MaxDownloadAttempts {
|
||||
LOG_WARN("DOWNLOAD_RETRY", "Failed to find the chunk %s; retrying", chunkID)
|
||||
continue
|
||||
}
|
||||
|
||||
// A chunk is not found. This is a serious error and hopefully it will never happen.
|
||||
completeFailedChunk()
|
||||
if err != nil {
|
||||
LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Chunk %s can't be found: %v", chunkID, err)
|
||||
} else {
|
||||
LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Chunk %s can't be found", chunkID)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// We can't download the fossil directly. We have to turn it back into a regular chunk and try
|
||||
// downloading again.
|
||||
err = operator.storage.MoveFile(threadIndex, fossilPath, chunkPath)
|
||||
if err != nil {
|
||||
completeFailedChunk()
|
||||
LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Failed to resurrect chunk %s: %v", chunkID, err)
|
||||
return
|
||||
}
|
||||
|
||||
LOG_WARN("DOWNLOAD_RESURRECT", "Fossil %s has been resurrected", chunkID)
|
||||
continue
|
||||
}
|
||||
|
||||
err = operator.storage.DownloadFile(threadIndex, chunkPath, chunk)
|
||||
if err != nil {
|
||||
_, isHubic := operator.storage.(*HubicStorage)
|
||||
// Retry on EOF or if it is a Hubic backend as it may return 404 even when the chunk exists
|
||||
if (err == io.ErrUnexpectedEOF || isHubic) && downloadAttempt < MaxDownloadAttempts {
|
||||
LOG_WARN("DOWNLOAD_RETRY", "Failed to download the chunk %s: %v; retrying", chunkID, err)
|
||||
chunk.Reset(false)
|
||||
chunk.isMetadata = task.isMetadata
|
||||
continue
|
||||
} else {
|
||||
completeFailedChunk()
|
||||
LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Failed to download the chunk %s: %v", chunkID, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
err = chunk.Decrypt(operator.config.ChunkKey, task.chunkHash)
|
||||
if err != nil {
|
||||
if downloadAttempt < MaxDownloadAttempts {
|
||||
LOG_WARN("DOWNLOAD_RETRY", "Failed to decrypt the chunk %s: %v; retrying", chunkID, err)
|
||||
chunk.Reset(false)
|
||||
chunk.isMetadata = task.isMetadata
|
||||
continue
|
||||
} else {
|
||||
completeFailedChunk()
|
||||
LOG_WERROR(operator.allowFailures, "DOWNLOAD_DECRYPT", "Failed to decrypt the chunk %s: %v", chunkID, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
actualChunkID := chunk.GetID()
|
||||
if actualChunkID != chunkID {
|
||||
if downloadAttempt < MaxDownloadAttempts {
|
||||
LOG_WARN("DOWNLOAD_RETRY", "The chunk %s has a hash id of %s; retrying", chunkID, actualChunkID)
|
||||
chunk.Reset(false)
|
||||
chunk.isMetadata = task.isMetadata
|
||||
continue
|
||||
} else {
|
||||
completeFailedChunk()
|
||||
LOG_WERROR(operator.allowFailures, "DOWNLOAD_CORRUPTED", "The chunk %s has a hash id of %s", chunkID, actualChunkID)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
if chunk.isMetadata && len(cachedPath) > 0 {
|
||||
// Save a copy to the local snapshot cache
|
||||
err := operator.snapshotCache.UploadFile(threadIndex, cachedPath, chunk.GetBytes())
|
||||
if err != nil {
|
||||
LOG_WARN("DOWNLOAD_CACHE", "Failed to add the chunk %s to the snapshot cache: %v", chunkID, err)
|
||||
}
|
||||
}
|
||||
|
||||
downloadedChunkSize := atomic.AddInt64(&operator.downloadedChunkSize, int64(chunk.GetLength()))
|
||||
|
||||
if (operator.showStatistics || IsTracing()) && operator.totalChunkSize > 0 {
|
||||
|
||||
now := time.Now().Unix()
|
||||
if now <= operator.startTime {
|
||||
now = operator.startTime + 1
|
||||
}
|
||||
speed := downloadedChunkSize / (now - operator.startTime)
|
||||
remainingTime := int64(0)
|
||||
if speed > 0 {
|
||||
remainingTime = (operator.totalChunkSize-downloadedChunkSize)/speed + 1
|
||||
}
|
||||
percentage := float32(downloadedChunkSize * 1000 / operator.totalChunkSize)
|
||||
LOG_INFO("DOWNLOAD_PROGRESS", "Downloaded chunk %d size %d, %sB/s %s %.1f%%",
|
||||
task.chunkIndex+1, chunk.GetLength(),
|
||||
PrettySize(speed), PrettyTime(remainingTime), percentage/10)
|
||||
} else {
|
||||
LOG_DEBUG("CHUNK_DOWNLOAD", "Chunk %s has been downloaded", chunkID)
|
||||
}
|
||||
|
||||
task.completionFunc(chunk, task.chunkIndex)
|
||||
chunk = nil
|
||||
return
|
||||
}
|
||||
|
||||
// UploadChunk is called by the task goroutines to perform the actual uploading
|
||||
func (operator *ChunkOperator) UploadChunk(threadIndex int, task ChunkTask) bool {
|
||||
|
||||
chunk := task.chunk
|
||||
chunkID := task.chunkID
|
||||
chunkSize := chunk.GetLength()
|
||||
|
||||
// For a snapshot chunk, verify that its chunk id is correct
|
||||
if task.isMetadata {
|
||||
chunk.VerifyID()
|
||||
}
|
||||
|
||||
if task.isMetadata && operator.storage.IsCacheNeeded() {
|
||||
// Save a copy to the local snapshot.
|
||||
chunkPath, exist, _, err := operator.snapshotCache.FindChunk(threadIndex, chunkID, false)
|
||||
if err != nil {
|
||||
LOG_WARN("UPLOAD_CACHE", "Failed to find the cache path for the chunk %s: %v", chunkID, err)
|
||||
} else if exist {
|
||||
LOG_DEBUG("CHUNK_CACHE", "Chunk %s already exists in the snapshot cache", chunkID)
|
||||
} else if err = operator.snapshotCache.UploadFile(threadIndex, chunkPath, chunk.GetBytes()); err != nil {
|
||||
LOG_WARN("UPLOAD_CACHE", "Failed to save the chunk %s to the snapshot cache: %v", chunkID, err)
|
||||
} else {
|
||||
LOG_DEBUG("CHUNK_CACHE", "Chunk %s has been saved to the snapshot cache", chunkID)
|
||||
}
|
||||
}
|
||||
|
||||
// This returns the path the chunk file should be at.
|
||||
chunkPath, exist, _, err := operator.storage.FindChunk(threadIndex, chunkID, false)
|
||||
if err != nil {
|
||||
LOG_ERROR("UPLOAD_CHUNK", "Failed to find the path for the chunk %s: %v", chunkID, err)
|
||||
return false
|
||||
}
|
||||
|
||||
if exist {
|
||||
// Chunk deduplication by name in effect here.
|
||||
LOG_DEBUG("CHUNK_DUPLICATE", "Chunk %s already exists", chunkID)
|
||||
|
||||
operator.UploadCompletionFunc(chunk, task.chunkIndex, false, chunkSize, 0)
|
||||
return false
|
||||
}
|
||||
|
||||
// Encrypt the chunk only after we know that it must be uploaded.
|
||||
err = chunk.Encrypt(operator.config.ChunkKey, chunk.GetHash(), task.isMetadata)
|
||||
if err != nil {
|
||||
LOG_ERROR("UPLOAD_CHUNK", "Failed to encrypt the chunk %s: %v", chunkID, err)
|
||||
return false
|
||||
}
|
||||
|
||||
if !operator.config.dryRun {
|
||||
err = operator.storage.UploadFile(threadIndex, chunkPath, chunk.GetBytes())
|
||||
if err != nil {
|
||||
LOG_ERROR("UPLOAD_CHUNK", "Failed to upload the chunk %s: %v", chunkID, err)
|
||||
return false
|
||||
}
|
||||
LOG_DEBUG("CHUNK_UPLOAD", "Chunk %s has been uploaded", chunkID)
|
||||
} else {
|
||||
LOG_DEBUG("CHUNK_UPLOAD", "Uploading was skipped for chunk %s", chunkID)
|
||||
}
|
||||
|
||||
operator.UploadCompletionFunc(chunk, task.chunkIndex, false, chunkSize, chunk.GetLength())
|
||||
return true
|
||||
}
|
||||
@@ -15,11 +15,11 @@ import (
|
||||
"math/rand"
|
||||
)
|
||||
|
||||
func TestUploaderAndDownloader(t *testing.T) {
|
||||
func TestChunkOperator(t *testing.T) {
|
||||
|
||||
rand.Seed(time.Now().UnixNano())
|
||||
setTestingT(t)
|
||||
SetLoggingLevel(INFO)
|
||||
SetLoggingLevel(DEBUG)
|
||||
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
@@ -87,35 +87,25 @@ func TestUploaderAndDownloader(t *testing.T) {
|
||||
totalFileSize += chunk.GetLength()
|
||||
}
|
||||
|
||||
completionFunc := func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) {
|
||||
chunkOperator := CreateChunkOperator(config, storage, nil, false, testThreads, false)
|
||||
chunkOperator.UploadCompletionFunc = func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) {
|
||||
t.Logf("Chunk %s size %d (%d/%d) uploaded", chunk.GetID(), chunkSize, chunkIndex, len(chunks))
|
||||
}
|
||||
|
||||
chunkUploader := CreateChunkUploader(config, storage, nil, testThreads, nil)
|
||||
chunkUploader.completionFunc = completionFunc
|
||||
chunkUploader.Start()
|
||||
|
||||
for i, chunk := range chunks {
|
||||
chunkUploader.StartChunk(chunk, i)
|
||||
chunkOperator.Upload(chunk, i, false)
|
||||
}
|
||||
|
||||
chunkUploader.Stop()
|
||||
|
||||
chunkDownloader := CreateChunkDownloader(config, storage, nil, true, testThreads, false)
|
||||
chunkDownloader.totalChunkSize = int64(totalFileSize)
|
||||
|
||||
for _, chunk := range chunks {
|
||||
chunkDownloader.AddChunk(chunk.GetHash())
|
||||
}
|
||||
chunkOperator.WaitForCompletion()
|
||||
|
||||
for i, chunk := range chunks {
|
||||
downloaded := chunkDownloader.WaitForChunk(i)
|
||||
downloaded := chunkOperator.Download(chunk.GetHash(), i, false)
|
||||
if downloaded.GetID() != chunk.GetID() {
|
||||
t.Errorf("Uploaded: %s, downloaded: %s", chunk.GetID(), downloaded.GetID())
|
||||
}
|
||||
}
|
||||
|
||||
chunkDownloader.Stop()
|
||||
chunkOperator.Stop()
|
||||
|
||||
for _, file := range listChunks(storage) {
|
||||
err = storage.DeleteFile(0, "chunks/"+file)
|
||||
@@ -1,151 +0,0 @@
|
||||
// Copyright (c) Acrosync LLC. All rights reserved.
|
||||
// Free for personal use and commercial trial
|
||||
// Commercial use requires per-user licenses available from https://duplicacy.com
|
||||
|
||||
package duplicacy
|
||||
|
||||
import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
)
|
||||
|
||||
// ChunkUploadTask represents a chunk to be uploaded.
|
||||
type ChunkUploadTask struct {
|
||||
chunk *Chunk
|
||||
chunkIndex int
|
||||
}
|
||||
|
||||
// ChunkUploader uploads chunks to the storage using one or more uploading goroutines. Chunks are added
|
||||
// by the call to StartChunk(), and then passed to the uploading goroutines. The completion function is
|
||||
// called when the downloading is completed. Note that ChunkUploader does not release chunks to the
|
||||
// chunk pool; instead
|
||||
type ChunkUploader struct {
|
||||
config *Config // Associated config
|
||||
storage Storage // Download from this storage
|
||||
snapshotCache *FileStorage // Used as cache if not nil; usually for uploading snapshot chunks
|
||||
threads int // Number of uploading goroutines
|
||||
taskQueue chan ChunkUploadTask // Uploading goroutines are listening on this channel for upload jobs
|
||||
stopChannel chan bool // Used to terminate uploading goroutines
|
||||
|
||||
numberOfUploadingTasks int32 // The number of uploading tasks
|
||||
|
||||
// Uploading goroutines call this function after having downloaded chunks
|
||||
completionFunc func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int)
|
||||
}
|
||||
|
||||
// CreateChunkUploader creates a chunk uploader.
|
||||
func CreateChunkUploader(config *Config, storage Storage, snapshotCache *FileStorage, threads int,
|
||||
completionFunc func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int)) *ChunkUploader {
|
||||
uploader := &ChunkUploader{
|
||||
config: config,
|
||||
storage: storage,
|
||||
snapshotCache: snapshotCache,
|
||||
threads: threads,
|
||||
taskQueue: make(chan ChunkUploadTask, 1),
|
||||
stopChannel: make(chan bool),
|
||||
completionFunc: completionFunc,
|
||||
}
|
||||
|
||||
return uploader
|
||||
}
|
||||
|
||||
// Starts starts uploading goroutines.
|
||||
func (uploader *ChunkUploader) Start() {
|
||||
for i := 0; i < uploader.threads; i++ {
|
||||
go func(threadIndex int) {
|
||||
defer CatchLogException()
|
||||
for {
|
||||
select {
|
||||
case task := <-uploader.taskQueue:
|
||||
uploader.Upload(threadIndex, task)
|
||||
case <-uploader.stopChannel:
|
||||
return
|
||||
}
|
||||
}
|
||||
}(i)
|
||||
}
|
||||
}
|
||||
|
||||
// StartChunk sends a chunk to be uploaded to a waiting uploading goroutine. It may block if all uploading goroutines are busy.
|
||||
func (uploader *ChunkUploader) StartChunk(chunk *Chunk, chunkIndex int) {
|
||||
atomic.AddInt32(&uploader.numberOfUploadingTasks, 1)
|
||||
uploader.taskQueue <- ChunkUploadTask{
|
||||
chunk: chunk,
|
||||
chunkIndex: chunkIndex,
|
||||
}
|
||||
}
|
||||
|
||||
// Stop stops all uploading goroutines.
|
||||
func (uploader *ChunkUploader) Stop() {
|
||||
for atomic.LoadInt32(&uploader.numberOfUploadingTasks) > 0 {
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
for i := 0; i < uploader.threads; i++ {
|
||||
uploader.stopChannel <- false
|
||||
}
|
||||
}
|
||||
|
||||
// Upload is called by the uploading goroutines to perform the actual uploading
|
||||
func (uploader *ChunkUploader) Upload(threadIndex int, task ChunkUploadTask) bool {
|
||||
|
||||
chunk := task.chunk
|
||||
chunkSize := chunk.GetLength()
|
||||
chunkID := chunk.GetID()
|
||||
|
||||
// For a snapshot chunk, verify that its chunk id is correct
|
||||
if uploader.snapshotCache != nil {
|
||||
chunk.VerifyID()
|
||||
}
|
||||
|
||||
if uploader.snapshotCache != nil && uploader.storage.IsCacheNeeded() {
|
||||
// Save a copy to the local snapshot.
|
||||
chunkPath, exist, _, err := uploader.snapshotCache.FindChunk(threadIndex, chunkID, false)
|
||||
if err != nil {
|
||||
LOG_WARN("UPLOAD_CACHE", "Failed to find the cache path for the chunk %s: %v", chunkID, err)
|
||||
} else if exist {
|
||||
LOG_DEBUG("CHUNK_CACHE", "Chunk %s already exists in the snapshot cache", chunkID)
|
||||
} else if err = uploader.snapshotCache.UploadFile(threadIndex, chunkPath, chunk.GetBytes()); err != nil {
|
||||
LOG_WARN("UPLOAD_CACHE", "Failed to save the chunk %s to the snapshot cache: %v", chunkID, err)
|
||||
} else {
|
||||
LOG_DEBUG("CHUNK_CACHE", "Chunk %s has been saved to the snapshot cache", chunkID)
|
||||
}
|
||||
}
|
||||
|
||||
// This returns the path the chunk file should be at.
|
||||
chunkPath, exist, _, err := uploader.storage.FindChunk(threadIndex, chunkID, false)
|
||||
if err != nil {
|
||||
LOG_ERROR("UPLOAD_CHUNK", "Failed to find the path for the chunk %s: %v", chunkID, err)
|
||||
return false
|
||||
}
|
||||
|
||||
if exist {
|
||||
// Chunk deduplication by name in effect here.
|
||||
LOG_DEBUG("CHUNK_DUPLICATE", "Chunk %s already exists", chunkID)
|
||||
|
||||
uploader.completionFunc(chunk, task.chunkIndex, true, chunkSize, 0)
|
||||
atomic.AddInt32(&uploader.numberOfUploadingTasks, -1)
|
||||
return false
|
||||
}
|
||||
|
||||
// Encrypt the chunk only after we know that it must be uploaded.
|
||||
err = chunk.Encrypt(uploader.config.ChunkKey, chunk.GetHash(), uploader.snapshotCache != nil)
|
||||
if err != nil {
|
||||
LOG_ERROR("UPLOAD_CHUNK", "Failed to encrypt the chunk %s: %v", chunkID, err)
|
||||
return false
|
||||
}
|
||||
|
||||
if !uploader.config.dryRun {
|
||||
err = uploader.storage.UploadFile(threadIndex, chunkPath, chunk.GetBytes())
|
||||
if err != nil {
|
||||
LOG_ERROR("UPLOAD_CHUNK", "Failed to upload the chunk %s: %v", chunkID, err)
|
||||
return false
|
||||
}
|
||||
LOG_DEBUG("CHUNK_UPLOAD", "Chunk %s has been uploaded", chunkID)
|
||||
} else {
|
||||
LOG_DEBUG("CHUNK_UPLOAD", "Uploading was skipped for chunk %s", chunkID)
|
||||
}
|
||||
|
||||
uploader.completionFunc(chunk, task.chunkIndex, false, chunkSize, chunk.GetLength())
|
||||
atomic.AddInt32(&uploader.numberOfUploadingTasks, -1)
|
||||
return true
|
||||
}
|
||||
@@ -16,6 +16,11 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
"bytes"
|
||||
"crypto/sha256"
|
||||
|
||||
"github.com/vmihailenco/msgpack"
|
||||
|
||||
)
|
||||
|
||||
// This is the hidden directory in the repository for storing various files.
|
||||
@@ -45,7 +50,7 @@ type Entry struct {
|
||||
EndChunk int
|
||||
EndOffset int
|
||||
|
||||
Attributes map[string][]byte
|
||||
Attributes *map[string][]byte
|
||||
}
|
||||
|
||||
// CreateEntry creates an entry from file properties.
|
||||
@@ -93,6 +98,27 @@ func CreateEntryFromFileInfo(fileInfo os.FileInfo, directory string) *Entry {
|
||||
return entry
|
||||
}
|
||||
|
||||
func (entry *Entry) Copy() *Entry {
|
||||
return &Entry{
|
||||
Path: entry.Path,
|
||||
Size: entry.Size,
|
||||
Time: entry.Time,
|
||||
Mode: entry.Mode,
|
||||
Link: entry.Link,
|
||||
Hash: entry.Hash,
|
||||
|
||||
UID: entry.UID,
|
||||
GID: entry.GID,
|
||||
|
||||
StartChunk: entry.StartChunk,
|
||||
StartOffset: entry.StartOffset,
|
||||
EndChunk: entry.EndChunk,
|
||||
EndOffset: entry.EndOffset,
|
||||
|
||||
Attributes: entry.Attributes,
|
||||
}
|
||||
}
|
||||
|
||||
// CreateEntryFromJSON creates an entry from a json description.
|
||||
func (entry *Entry) UnmarshalJSON(description []byte) (err error) {
|
||||
|
||||
@@ -175,17 +201,17 @@ func (entry *Entry) UnmarshalJSON(description []byte) (err error) {
|
||||
if attributes, ok := value.(map[string]interface{}); !ok {
|
||||
return fmt.Errorf("Attributes are invalid for file '%s' in the snapshot", entry.Path)
|
||||
} else {
|
||||
entry.Attributes = make(map[string][]byte)
|
||||
entry.Attributes = &map[string][]byte{}
|
||||
for name, object := range attributes {
|
||||
if object == nil {
|
||||
entry.Attributes[name] = []byte("")
|
||||
(*entry.Attributes)[name] = []byte("")
|
||||
} else if attributeInBase64, ok := object.(string); !ok {
|
||||
return fmt.Errorf("Attribute '%s' is invalid for file '%s' in the snapshot", name, entry.Path)
|
||||
} else if attribute, err := base64.StdEncoding.DecodeString(attributeInBase64); err != nil {
|
||||
return fmt.Errorf("Failed to decode attribute '%s' for file '%s' in the snapshot: %v",
|
||||
name, entry.Path, err)
|
||||
} else {
|
||||
entry.Attributes[name] = attribute
|
||||
(*entry.Attributes)[name] = attribute
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -244,7 +270,7 @@ func (entry *Entry) convertToObject(encodeName bool) map[string]interface{} {
|
||||
object["gid"] = entry.GID
|
||||
}
|
||||
|
||||
if len(entry.Attributes) > 0 {
|
||||
if entry.Attributes != nil && len(*entry.Attributes) > 0 {
|
||||
object["attributes"] = entry.Attributes
|
||||
}
|
||||
|
||||
@@ -259,6 +285,197 @@ func (entry *Entry) MarshalJSON() ([]byte, error) {
|
||||
return description, err
|
||||
}
|
||||
|
||||
var _ msgpack.CustomEncoder = (*Entry)(nil)
|
||||
var _ msgpack.CustomDecoder = (*Entry)(nil)
|
||||
|
||||
func (entry *Entry) EncodeMsgpack(encoder *msgpack.Encoder) error {
|
||||
|
||||
err := encoder.EncodeString(entry.Path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = encoder.EncodeInt(entry.Size)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = encoder.EncodeInt(entry.Time)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = encoder.EncodeInt(int64(entry.Mode))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = encoder.EncodeString(entry.Link)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = encoder.EncodeString(entry.Hash)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = encoder.EncodeInt(int64(entry.StartChunk))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = encoder.EncodeInt(int64(entry.StartOffset))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = encoder.EncodeInt(int64(entry.EndChunk))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = encoder.EncodeInt(int64(entry.EndOffset))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = encoder.EncodeInt(int64(entry.UID))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = encoder.EncodeInt(int64(entry.GID))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var numberOfAttributes int64
|
||||
if entry.Attributes != nil {
|
||||
numberOfAttributes = int64(len(*entry.Attributes))
|
||||
}
|
||||
|
||||
err = encoder.EncodeInt(numberOfAttributes)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if entry.Attributes != nil {
|
||||
attributes := make([]string, numberOfAttributes)
|
||||
i := 0
|
||||
for attribute := range *entry.Attributes {
|
||||
attributes[i] = attribute
|
||||
i++
|
||||
}
|
||||
sort.Strings(attributes)
|
||||
for _, attribute := range attributes {
|
||||
err = encoder.EncodeString(attribute)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = encoder.EncodeString(string((*entry.Attributes)[attribute]))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (entry *Entry) DecodeMsgpack(decoder *msgpack.Decoder) error {
|
||||
|
||||
var err error
|
||||
|
||||
entry.Path, err = decoder.DecodeString()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
entry.Size, err = decoder.DecodeInt64()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
entry.Time, err = decoder.DecodeInt64()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
mode, err := decoder.DecodeInt64()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
entry.Mode = uint32(mode)
|
||||
|
||||
entry.Link, err = decoder.DecodeString()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
entry.Hash, err = decoder.DecodeString()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
startChunk, err := decoder.DecodeInt()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
entry.StartChunk = int(startChunk)
|
||||
|
||||
startOffset, err := decoder.DecodeInt()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
entry.StartOffset = int(startOffset)
|
||||
|
||||
endChunk, err := decoder.DecodeInt()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
entry.EndChunk = int(endChunk)
|
||||
|
||||
endOffset, err := decoder.DecodeInt()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
entry.EndOffset = int(endOffset)
|
||||
|
||||
uid, err := decoder.DecodeInt()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
entry.UID = int(uid)
|
||||
|
||||
gid, err := decoder.DecodeInt()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
entry.GID = int(gid)
|
||||
|
||||
numberOfAttributes, err := decoder.DecodeInt()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if numberOfAttributes > 0 {
|
||||
entry.Attributes = &map[string][]byte{}
|
||||
for i := 0; i < numberOfAttributes; i++ {
|
||||
attribute, err := decoder.DecodeString()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
value, err := decoder.DecodeString()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
(*entry.Attributes)[attribute] = []byte(value)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (entry *Entry) IsFile() bool {
|
||||
return entry.Mode&uint32(os.ModeType) == 0
|
||||
}
|
||||
@@ -271,10 +488,27 @@ func (entry *Entry) IsLink() bool {
|
||||
return entry.Mode&uint32(os.ModeSymlink) != 0
|
||||
}
|
||||
|
||||
func (entry *Entry) IsComplete() bool {
|
||||
return entry.Size >= 0
|
||||
}
|
||||
|
||||
func (entry *Entry) GetPermissions() os.FileMode {
|
||||
return os.FileMode(entry.Mode) & fileModeMask
|
||||
}
|
||||
|
||||
func (entry *Entry) GetParent() string {
|
||||
path := entry.Path
|
||||
if path != "" && path[len(path) - 1] == '/' {
|
||||
path = path[:len(path) - 1]
|
||||
}
|
||||
i := strings.LastIndex(path, "/")
|
||||
if i == -1 {
|
||||
return ""
|
||||
} else {
|
||||
return path[:i]
|
||||
}
|
||||
}
|
||||
|
||||
func (entry *Entry) IsSameAs(other *Entry) bool {
|
||||
return entry.Size == other.Size && entry.Time <= other.Time+1 && entry.Time >= other.Time-1
|
||||
}
|
||||
@@ -326,7 +560,7 @@ func (entry *Entry) RestoreMetadata(fullPath string, fileInfo *os.FileInfo, setO
|
||||
}
|
||||
}
|
||||
|
||||
if len(entry.Attributes) > 0 {
|
||||
if entry.Attributes != nil && len(*entry.Attributes) > 0 {
|
||||
entry.SetAttributesToFile(fullPath)
|
||||
}
|
||||
|
||||
@@ -335,47 +569,62 @@ func (entry *Entry) RestoreMetadata(fullPath string, fileInfo *os.FileInfo, setO
|
||||
|
||||
// Return -1 if 'left' should appear before 'right', 1 if opposite, and 0 if they are the same.
|
||||
// Files are always arranged before subdirectories under the same parent directory.
|
||||
func (left *Entry) Compare(right *Entry) int {
|
||||
|
||||
path1 := left.Path
|
||||
path2 := right.Path
|
||||
|
||||
func ComparePaths(left string, right string) int {
|
||||
p := 0
|
||||
for ; p < len(path1) && p < len(path2); p++ {
|
||||
if path1[p] != path2[p] {
|
||||
for ; p < len(left) && p < len(right); p++ {
|
||||
if left[p] != right[p] {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// c1, c2 is the first byte that differs
|
||||
// c1, c2 are the first bytes that differ
|
||||
var c1, c2 byte
|
||||
if p < len(path1) {
|
||||
c1 = path1[p]
|
||||
if p < len(left) {
|
||||
c1 = left[p]
|
||||
}
|
||||
if p < len(path2) {
|
||||
c2 = path2[p]
|
||||
if p < len(right) {
|
||||
c2 = right[p]
|
||||
}
|
||||
|
||||
// c3, c4 indicates how the current component ends
|
||||
// c3 == '/': the current component is a directory
|
||||
// c3 != '/': the current component is the last one
|
||||
// c3, c4 indicate how the current component ends
|
||||
// c3 == '/': the current component is a directory; c3 != '/': the current component is the last one
|
||||
c3 := c1
|
||||
for i := p; c3 != '/' && i < len(path1); i++ {
|
||||
c3 = path1[i]
|
||||
|
||||
// last1, last2 means if the current compoent is the last component
|
||||
last1 := true
|
||||
for i := p; i < len(left); i++ {
|
||||
c3 = left[i]
|
||||
if c3 == '/' {
|
||||
last1 = i == len(left) - 1
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
c4 := c2
|
||||
for i := p; c4 != '/' && i < len(path2); i++ {
|
||||
c4 = path2[i]
|
||||
last2 := true
|
||||
for i := p; i < len(right); i++ {
|
||||
c4 = right[i]
|
||||
if c4 == '/' {
|
||||
last2 = i == len(right) - 1
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if last1 != last2 {
|
||||
if last1 {
|
||||
return -1
|
||||
} else {
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
if c3 == '/' {
|
||||
if c4 == '/' {
|
||||
// We are comparing two directory components
|
||||
if c1 == '/' {
|
||||
// left is shorter
|
||||
// Note that c2 maybe smaller than c1 but c1 is '/' which is counted
|
||||
// as 0
|
||||
// left is shorter; note that c2 maybe smaller than c1 but c1 should be treated as 0 therefore
|
||||
// this is a special case that must be handled separately
|
||||
return -1
|
||||
} else if c2 == '/' {
|
||||
// right is shorter
|
||||
@@ -397,6 +646,10 @@ func (left *Entry) Compare(right *Entry) int {
|
||||
}
|
||||
}
|
||||
|
||||
func (left *Entry) Compare(right *Entry) int {
|
||||
return ComparePaths(left.Path, right.Path)
|
||||
}
|
||||
|
||||
// This is used to sort entries by their names.
|
||||
type ByName []*Entry
|
||||
|
||||
@@ -443,7 +696,7 @@ func (files FileInfoCompare) Less(i, j int) bool {
|
||||
|
||||
// ListEntries returns a list of entries representing file and subdirectories under the directory 'path'. Entry paths
|
||||
// are normalized as relative to 'top'. 'patterns' are used to exclude or include certain files.
|
||||
func ListEntries(top string, path string, fileList *[]*Entry, patterns []string, nobackupFile string, discardAttributes bool, excludeByAttribute bool) (directoryList []*Entry,
|
||||
func ListEntries(top string, path string, patterns []string, nobackupFile string, excludeByAttribute bool, listingChannel chan *Entry) (directoryList []*Entry,
|
||||
skippedFiles []string, err error) {
|
||||
|
||||
LOG_DEBUG("LIST_ENTRIES", "Listing %s", path)
|
||||
@@ -478,8 +731,6 @@ func ListEntries(top string, path string, fileList *[]*Entry, patterns []string,
|
||||
|
||||
sort.Sort(FileInfoCompare(files))
|
||||
|
||||
entries := make([]*Entry, 0, 4)
|
||||
|
||||
for _, f := range files {
|
||||
if f.Name() == DUPLICACY_DIRECTORY {
|
||||
continue
|
||||
@@ -520,11 +771,9 @@ func ListEntries(top string, path string, fileList *[]*Entry, patterns []string,
|
||||
}
|
||||
}
|
||||
|
||||
if !discardAttributes {
|
||||
entry.ReadAttributes(top)
|
||||
}
|
||||
entry.ReadAttributes(top)
|
||||
|
||||
if excludeByAttribute && excludedByAttribute(entry.Attributes) {
|
||||
if excludeByAttribute && entry.Attributes != nil && excludedByAttribute(*entry.Attributes) {
|
||||
LOG_DEBUG("LIST_EXCLUDE", "%s is excluded by attribute", entry.Path)
|
||||
continue
|
||||
}
|
||||
@@ -535,20 +784,20 @@ func ListEntries(top string, path string, fileList *[]*Entry, patterns []string,
|
||||
continue
|
||||
}
|
||||
|
||||
entries = append(entries, entry)
|
||||
if entry.IsDir() {
|
||||
directoryList = append(directoryList, entry)
|
||||
} else {
|
||||
listingChannel <- entry
|
||||
}
|
||||
}
|
||||
|
||||
// For top level directory we need to sort again because symlinks may have been changed
|
||||
if path == "" {
|
||||
sort.Sort(ByName(entries))
|
||||
sort.Sort(ByName(directoryList))
|
||||
}
|
||||
|
||||
for _, entry := range entries {
|
||||
if entry.IsDir() {
|
||||
directoryList = append(directoryList, entry)
|
||||
} else {
|
||||
*fileList = append(*fileList, entry)
|
||||
}
|
||||
for _, entry := range directoryList {
|
||||
listingChannel <- entry
|
||||
}
|
||||
|
||||
for i, j := 0, len(directoryList)-1; i < j; i, j = i+1, j-1 {
|
||||
@@ -597,3 +846,100 @@ func (entry *Entry) Diff(chunkHashes []string, chunkLengths []int,
|
||||
|
||||
return modifiedLength
|
||||
}
|
||||
|
||||
func (entry *Entry) EncodeWithHash(encoder *msgpack.Encoder) error {
|
||||
entryBytes, err := msgpack.Marshal(entry)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
hash := sha256.Sum256(entryBytes)
|
||||
err = encoder.EncodeBytes(entryBytes)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = encoder.EncodeBytes(hash[:])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func DecodeEntryWithHash(decoder *msgpack.Decoder) (*Entry, error) {
|
||||
entryBytes, err := decoder.DecodeBytes()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
hashBytes, err := decoder.DecodeBytes()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
expectedHash := sha256.Sum256(entryBytes)
|
||||
if bytes.Compare(expectedHash[:], hashBytes) != 0 {
|
||||
return nil, fmt.Errorf("corrupted file metadata")
|
||||
}
|
||||
|
||||
var entry Entry
|
||||
err = msgpack.Unmarshal(entryBytes, &entry)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &entry, nil
|
||||
}
|
||||
|
||||
func (entry *Entry) check(chunkLengths []int) error {
|
||||
|
||||
if entry.Size < 0 {
|
||||
return fmt.Errorf("The file %s hash an invalid size (%d)", entry.Path, entry.Size)
|
||||
}
|
||||
|
||||
if !entry.IsFile() || entry.Size == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
if entry.StartChunk < 0 {
|
||||
return fmt.Errorf("The file %s starts at chunk %d", entry.Path, entry.StartChunk)
|
||||
}
|
||||
|
||||
if entry.EndChunk >= len(chunkLengths) {
|
||||
return fmt.Errorf("The file %s ends at chunk %d while the number of chunks is %d",
|
||||
entry.Path, entry.EndChunk, len(chunkLengths))
|
||||
}
|
||||
|
||||
if entry.EndChunk < entry.StartChunk {
|
||||
return fmt.Errorf("The file %s starts at chunk %d and ends at chunk %d",
|
||||
entry.Path, entry.StartChunk, entry.EndChunk)
|
||||
}
|
||||
|
||||
if entry.StartOffset >= chunkLengths[entry.StartChunk] {
|
||||
return fmt.Errorf("The file %s starts at offset %d of chunk %d of length %d",
|
||||
entry.Path, entry.StartOffset, entry.StartChunk, chunkLengths[entry.StartChunk])
|
||||
}
|
||||
|
||||
if entry.EndOffset > chunkLengths[entry.EndChunk] {
|
||||
return fmt.Errorf("The file %s ends at offset %d of chunk %d of length %d",
|
||||
entry.Path, entry.EndOffset, entry.EndChunk, chunkLengths[entry.EndChunk])
|
||||
}
|
||||
|
||||
fileSize := int64(0)
|
||||
|
||||
for i := entry.StartChunk; i <= entry.EndChunk; i++ {
|
||||
|
||||
start := 0
|
||||
if i == entry.StartChunk {
|
||||
start = entry.StartOffset
|
||||
}
|
||||
end := chunkLengths[i]
|
||||
if i == entry.EndChunk {
|
||||
end = entry.EndOffset
|
||||
}
|
||||
|
||||
fileSize += int64(end - start)
|
||||
}
|
||||
|
||||
if entry.Size != fileSize {
|
||||
return fmt.Errorf("The file %s has a size of %d but the total size of chunks is %d",
|
||||
entry.Path, entry.Size, fileSize)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -13,8 +13,11 @@ import (
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
|
||||
"github.com/gilbertchen/xattr"
|
||||
"github.com/vmihailenco/msgpack"
|
||||
)
|
||||
|
||||
func TestEntrySort(t *testing.T) {
|
||||
@@ -27,19 +30,19 @@ func TestEntrySort(t *testing.T) {
|
||||
"\xBB\xDDfile",
|
||||
"\xFF\xDDfile",
|
||||
"ab/",
|
||||
"ab-/",
|
||||
"ab0/",
|
||||
"ab1/",
|
||||
"ab/c",
|
||||
"ab+/c-",
|
||||
"ab+/c0",
|
||||
"ab+/c/",
|
||||
"ab+/c/d",
|
||||
"ab+/c+/",
|
||||
"ab+/c+/d",
|
||||
"ab+/c0/",
|
||||
"ab+/c/d",
|
||||
"ab+/c+/d",
|
||||
"ab+/c0/d",
|
||||
"ab-/",
|
||||
"ab-/c",
|
||||
"ab0/",
|
||||
"ab1/",
|
||||
"ab1/c",
|
||||
"ab1/\xBB\xDDfile",
|
||||
"ab1/\xFF\xDDfile",
|
||||
@@ -86,7 +89,7 @@ func TestEntrySort(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestEntryList(t *testing.T) {
|
||||
func TestEntryOrder(t *testing.T) {
|
||||
|
||||
testDir := filepath.Join(os.TempDir(), "duplicacy_test")
|
||||
os.RemoveAll(testDir)
|
||||
@@ -98,16 +101,16 @@ func TestEntryList(t *testing.T) {
|
||||
"ab0",
|
||||
"ab1",
|
||||
"ab+/",
|
||||
"ab2/",
|
||||
"ab3/",
|
||||
"ab+/c",
|
||||
"ab+/c+",
|
||||
"ab+/c1",
|
||||
"ab+/c-/",
|
||||
"ab+/c-/d",
|
||||
"ab+/c0/",
|
||||
"ab+/c-/d",
|
||||
"ab+/c0/d",
|
||||
"ab2/",
|
||||
"ab2/c",
|
||||
"ab3/",
|
||||
"ab3/c",
|
||||
}
|
||||
|
||||
@@ -172,18 +175,24 @@ func TestEntryList(t *testing.T) {
|
||||
directories = append(directories, CreateEntry("", 0, 0, 0))
|
||||
|
||||
entries := make([]*Entry, 0, 4)
|
||||
entryChannel := make(chan *Entry, 1024)
|
||||
entries = append(entries, CreateEntry("", 0, 0, 0))
|
||||
|
||||
for len(directories) > 0 {
|
||||
directory := directories[len(directories)-1]
|
||||
directories = directories[:len(directories)-1]
|
||||
entries = append(entries, directory)
|
||||
subdirectories, _, err := ListEntries(testDir, directory.Path, &entries, nil, "", false, false)
|
||||
subdirectories, _, err := ListEntries(testDir, directory.Path, nil, "", false, entryChannel)
|
||||
if err != nil {
|
||||
t.Errorf("ListEntries(%s, %s) returned an error: %s", testDir, directory.Path, err)
|
||||
}
|
||||
directories = append(directories, subdirectories...)
|
||||
}
|
||||
|
||||
close(entryChannel)
|
||||
for entry := range entryChannel {
|
||||
entries = append(entries, entry)
|
||||
}
|
||||
|
||||
entries = entries[1:]
|
||||
|
||||
for _, entry := range entries {
|
||||
@@ -274,18 +283,25 @@ func TestEntryExcludeByAttribute(t *testing.T) {
|
||||
directories = append(directories, CreateEntry("", 0, 0, 0))
|
||||
|
||||
entries := make([]*Entry, 0, 4)
|
||||
entryChannel := make(chan *Entry, 1024)
|
||||
entries = append(entries, CreateEntry("", 0, 0, 0))
|
||||
|
||||
for len(directories) > 0 {
|
||||
directory := directories[len(directories)-1]
|
||||
directories = directories[:len(directories)-1]
|
||||
entries = append(entries, directory)
|
||||
subdirectories, _, err := ListEntries(testDir, directory.Path, &entries, nil, "", false, excludeByAttribute)
|
||||
subdirectories, _, err := ListEntries(testDir, directory.Path, nil, "", excludeByAttribute, entryChannel)
|
||||
if err != nil {
|
||||
t.Errorf("ListEntries(%s, %s) returned an error: %s", testDir, directory.Path, err)
|
||||
}
|
||||
directories = append(directories, subdirectories...)
|
||||
}
|
||||
|
||||
close(entryChannel)
|
||||
|
||||
for entry := range entryChannel {
|
||||
entries = append(entries, entry)
|
||||
}
|
||||
|
||||
entries = entries[1:]
|
||||
|
||||
for _, entry := range entries {
|
||||
@@ -327,3 +343,33 @@ func TestEntryExcludeByAttribute(t *testing.T) {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestEntryEncoding(t *testing.T) {
|
||||
buffer := new(bytes.Buffer)
|
||||
encoder := msgpack.NewEncoder(buffer)
|
||||
|
||||
entry1 := CreateEntry("abcd", 1, 2, 0700)
|
||||
err := encoder.Encode(entry1)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to encode the entry: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
t.Logf("msgpack size: %d\n", len(buffer.Bytes()))
|
||||
decoder := msgpack.NewDecoder(buffer)
|
||||
|
||||
description, _ := json.Marshal(entry1)
|
||||
t.Logf("json size: %d\n", len(description))
|
||||
|
||||
var entry2 Entry
|
||||
err = decoder.Decode(&entry2)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to decode the entry: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if entry1.Path != entry2.Path || entry1.Size != entry2.Size || entry1.Time != entry2.Time {
|
||||
t.Error("Decoded entry is different than the original one")
|
||||
}
|
||||
|
||||
}
|
||||
574
src/duplicacy_entrylist.go
Normal file
574
src/duplicacy_entrylist.go
Normal file
@@ -0,0 +1,574 @@
|
||||
// Copyright (c) Acrosync LLC. All rights reserved.
|
||||
// Free for personal use and commercial trial
|
||||
// Commercial use requires per-user licenses available from https://duplicacy.com
|
||||
|
||||
package duplicacy
|
||||
|
||||
import (
|
||||
"encoding/hex"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"os"
|
||||
"io"
|
||||
"path"
|
||||
"crypto/sha256"
|
||||
"crypto/rand"
|
||||
"sync"
|
||||
|
||||
"github.com/vmihailenco/msgpack"
|
||||
)
|
||||
|
||||
// This struct stores information about a file entry that has been modified
|
||||
type ModifiedEntry struct {
|
||||
Path string
|
||||
Size int64
|
||||
Hash string
|
||||
}
|
||||
|
||||
// EntryList is basically a list of entries, which can be kept in the memory, or serialized to a disk file,
|
||||
// depending on if maximumInMemoryEntries is reached.
|
||||
//
|
||||
// The idea behind the on-disk entry list is that entries are written to a disk file as they are coming in.
|
||||
// Entries that have been modified and thus need to be uploaded will have their Incomplete bit set (i.e.,
|
||||
// with a size of -1). When the limit is reached, entries are moved to a disk file but ModifiedEntries and
|
||||
// UploadedChunks are still kept in memory. When later entries are read from the entry list, incomplete
|
||||
// entries are back-annotated with info from ModifiedEntries and UploadedChunk* before sending them out.
|
||||
|
||||
type EntryList struct {
|
||||
onDiskFile *os.File // the file to store entries
|
||||
encoder *msgpack.Encoder // msgpack encoder for entry serialization
|
||||
entries []*Entry // in-memory entry list
|
||||
|
||||
SnapshotID string // the snapshot id
|
||||
Token string // this unique random token makes sure we read/write
|
||||
// the same entry list
|
||||
ModifiedEntries []ModifiedEntry // entries that will be uploaded
|
||||
|
||||
UploadedChunkHashes []string // chunks from entries that have been uploaded
|
||||
UploadedChunkLengths []int // chunk lengths from entries that have been uploaded
|
||||
uploadedChunkLock sync.Mutex // lock for UploadedChunkHashes and UploadedChunkLengths
|
||||
|
||||
PreservedChunkHashes []string // chunks from entries not changed
|
||||
PreservedChunkLengths []int // chunk lengths from entries not changed
|
||||
|
||||
Checksum string // checksum of all entries to detect disk corruption
|
||||
|
||||
maximumInMemoryEntries int // max in-memory entries
|
||||
NumberOfEntries int64 // number of entries (not including directories and links)
|
||||
cachePath string // the directory for the on-disk file
|
||||
|
||||
// These 3 variables are used in entry infomation back-annotation
|
||||
modifiedEntryIndex int // points to the current modified entry
|
||||
uploadedChunkIndex int // counter for upload chunks
|
||||
uploadedChunkOffset int // the start offset for the current modified entry
|
||||
|
||||
}
|
||||
|
||||
// Create a new entry list
|
||||
func CreateEntryList(snapshotID string, cachePath string, maximumInMemoryEntries int) (*EntryList, error) {
|
||||
|
||||
token := make([]byte, 16)
|
||||
_, err := rand.Read(token)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Failed to create a random token: %v", err)
|
||||
}
|
||||
|
||||
entryList := &EntryList {
|
||||
SnapshotID: snapshotID,
|
||||
maximumInMemoryEntries: maximumInMemoryEntries,
|
||||
cachePath: cachePath,
|
||||
Token: string(token),
|
||||
}
|
||||
|
||||
return entryList, nil
|
||||
|
||||
}
|
||||
|
||||
// Create the on-disk entry list file
|
||||
func (entryList *EntryList)createOnDiskFile() error {
|
||||
file, err := os.OpenFile(path.Join(entryList.cachePath, "incomplete_files"), os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0600)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Failed to create on disk entry list: %v", err)
|
||||
}
|
||||
|
||||
entryList.onDiskFile = file
|
||||
entryList.encoder = msgpack.NewEncoder(file)
|
||||
|
||||
err = entryList.encoder.EncodeString(entryList.Token)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Failed to create on disk entry list: %v", err)
|
||||
}
|
||||
|
||||
for _, entry := range entryList.entries {
|
||||
err = entry.EncodeWithHash(entryList.encoder)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Add an entry to the entry list
|
||||
func (entryList *EntryList)AddEntry(entry *Entry) error {
|
||||
|
||||
if !entry.IsDir() && !entry.IsLink() {
|
||||
entryList.NumberOfEntries++
|
||||
}
|
||||
|
||||
if !entry.IsComplete() {
|
||||
if entry.IsDir() || entry.IsLink() {
|
||||
entry.Size = 0
|
||||
} else {
|
||||
modifiedEntry := ModifiedEntry {
|
||||
Path: entry.Path,
|
||||
Size: -1,
|
||||
}
|
||||
|
||||
entryList.ModifiedEntries = append(entryList.ModifiedEntries, modifiedEntry)
|
||||
}
|
||||
}
|
||||
|
||||
if entryList.onDiskFile != nil {
|
||||
return entry.EncodeWithHash(entryList.encoder)
|
||||
} else {
|
||||
entryList.entries = append(entryList.entries, entry)
|
||||
if entryList.maximumInMemoryEntries >= 0 && len(entryList.entries) > entryList.maximumInMemoryEntries {
|
||||
err := entryList.createOnDiskFile()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Add a preserved chunk that belongs to files that have not been modified
|
||||
func (entryList *EntryList)AddPreservedChunk(chunkHash string, chunkSize int) {
|
||||
entryList.PreservedChunkHashes = append(entryList.PreservedChunkHashes, chunkHash)
|
||||
entryList.PreservedChunkLengths = append(entryList.PreservedChunkLengths, chunkSize)
|
||||
}
|
||||
|
||||
// Add a chunk just uploaded (that belongs to files that have been modified)
|
||||
func (entryList *EntryList)AddUploadedChunk(chunkIndex int, chunkHash string, chunkSize int) {
|
||||
entryList.uploadedChunkLock.Lock()
|
||||
|
||||
for len(entryList.UploadedChunkHashes) <= chunkIndex {
|
||||
entryList.UploadedChunkHashes = append(entryList.UploadedChunkHashes, "")
|
||||
}
|
||||
|
||||
for len(entryList.UploadedChunkLengths) <= chunkIndex {
|
||||
entryList.UploadedChunkLengths = append(entryList.UploadedChunkLengths, 0)
|
||||
}
|
||||
|
||||
entryList.UploadedChunkHashes[chunkIndex] = chunkHash
|
||||
entryList.UploadedChunkLengths[chunkIndex] = chunkSize
|
||||
entryList.uploadedChunkLock.Unlock()
|
||||
}
|
||||
|
||||
// Close the on-disk file
|
||||
func (entryList *EntryList) CloseOnDiskFile() error {
|
||||
|
||||
if entryList.onDiskFile == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
err := entryList.onDiskFile.Sync()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = entryList.onDiskFile.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
entryList.onDiskFile = nil
|
||||
return nil
|
||||
}
|
||||
|
||||
// Return the length of the `index`th chunk
|
||||
func (entryList *EntryList) getChunkLength(index int) int {
|
||||
if index < len(entryList.PreservedChunkLengths) {
|
||||
return entryList.PreservedChunkLengths[index]
|
||||
} else {
|
||||
return entryList.UploadedChunkLengths[index - len(entryList.PreservedChunkLengths)]
|
||||
}
|
||||
}
|
||||
|
||||
// Sanity check for each entry
|
||||
func (entryList *EntryList) checkEntry(entry *Entry) error {
|
||||
|
||||
if entry.Size < 0 {
|
||||
return fmt.Errorf("the file %s hash an invalid size (%d)", entry.Path, entry.Size)
|
||||
}
|
||||
|
||||
if !entry.IsFile() || entry.Size == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
numberOfChunks := len(entryList.PreservedChunkLengths) + len(entryList.UploadedChunkLengths)
|
||||
|
||||
if entry.StartChunk < 0 {
|
||||
return fmt.Errorf("the file %s starts at chunk %d", entry.Path, entry.StartChunk)
|
||||
}
|
||||
|
||||
if entry.EndChunk >= numberOfChunks {
|
||||
return fmt.Errorf("the file %s ends at chunk %d while the number of chunks is %d",
|
||||
entry.Path, entry.EndChunk, numberOfChunks)
|
||||
}
|
||||
|
||||
if entry.EndChunk < entry.StartChunk {
|
||||
return fmt.Errorf("the file %s starts at chunk %d and ends at chunk %d",
|
||||
entry.Path, entry.StartChunk, entry.EndChunk)
|
||||
}
|
||||
|
||||
if entry.StartOffset >= entryList.getChunkLength(entry.StartChunk) {
|
||||
return fmt.Errorf("the file %s starts at offset %d of chunk %d with a length of %d",
|
||||
entry.Path, entry.StartOffset, entry.StartChunk, entryList.getChunkLength(entry.StartChunk))
|
||||
}
|
||||
|
||||
if entry.EndOffset > entryList.getChunkLength(entry.EndChunk) {
|
||||
return fmt.Errorf("the file %s ends at offset %d of chunk %d with a length of %d",
|
||||
entry.Path, entry.EndOffset, entry.EndChunk, entryList.getChunkLength(entry.EndChunk))
|
||||
}
|
||||
|
||||
fileSize := int64(0)
|
||||
|
||||
for i := entry.StartChunk; i <= entry.EndChunk; i++ {
|
||||
|
||||
start := 0
|
||||
if i == entry.StartChunk {
|
||||
start = entry.StartOffset
|
||||
}
|
||||
end := entryList.getChunkLength(i)
|
||||
if i == entry.EndChunk {
|
||||
end = entry.EndOffset
|
||||
}
|
||||
|
||||
fileSize += int64(end - start)
|
||||
}
|
||||
|
||||
if entry.Size != fileSize {
|
||||
return fmt.Errorf("the file %s has a size of %d but the total size of chunks is %d",
|
||||
entry.Path, entry.Size, fileSize)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// An incomplete entry (with a size of -1) does not have 'startChunk', 'startOffset', 'endChunk', and 'endOffset'. This function
|
||||
// is to fill in these information before sending the entry out.
|
||||
func (entryList *EntryList) fillAndSendEntry(entry *Entry, entryOut func(*Entry)error) (skipped bool, err error) {
|
||||
|
||||
if entry.IsComplete() {
|
||||
err := entryList.checkEntry(entry)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return false, entryOut(entry)
|
||||
}
|
||||
|
||||
if entryList.modifiedEntryIndex >= len(entryList.ModifiedEntries) {
|
||||
return false, fmt.Errorf("Unexpected file index %d (%d modified files)", entryList.modifiedEntryIndex, len(entryList.ModifiedEntries))
|
||||
}
|
||||
|
||||
modifiedEntry := &entryList.ModifiedEntries[entryList.modifiedEntryIndex]
|
||||
entryList.modifiedEntryIndex++
|
||||
|
||||
if modifiedEntry.Path != entry.Path {
|
||||
return false, fmt.Errorf("Unexpected file path %s when expecting %s", modifiedEntry.Path, entry.Path)
|
||||
}
|
||||
|
||||
if modifiedEntry.Size <= 0 {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
entry.Size = modifiedEntry.Size
|
||||
entry.Hash = modifiedEntry.Hash
|
||||
|
||||
entry.StartChunk = entryList.uploadedChunkIndex + len(entryList.PreservedChunkHashes)
|
||||
entry.StartOffset = entryList.uploadedChunkOffset
|
||||
entry.EndChunk = entry.StartChunk
|
||||
endOffset := int64(entry.StartOffset) + entry.Size
|
||||
|
||||
for entryList.uploadedChunkIndex < len(entryList.UploadedChunkLengths) && endOffset > int64(entryList.UploadedChunkLengths[entryList.uploadedChunkIndex]) {
|
||||
endOffset -= int64(entryList.UploadedChunkLengths[entryList.uploadedChunkIndex])
|
||||
entry.EndChunk++
|
||||
entryList.uploadedChunkIndex++
|
||||
}
|
||||
|
||||
if entryList.uploadedChunkIndex >= len(entryList.UploadedChunkLengths) {
|
||||
return false, fmt.Errorf("File %s has not been completely uploaded", entry.Path)
|
||||
}
|
||||
|
||||
entry.EndOffset = int(endOffset)
|
||||
entryList.uploadedChunkOffset = entry.EndOffset
|
||||
if entry.EndOffset == entryList.UploadedChunkLengths[entryList.uploadedChunkIndex] {
|
||||
entryList.uploadedChunkIndex++
|
||||
entryList.uploadedChunkOffset = 0
|
||||
}
|
||||
|
||||
err = entryList.checkEntry(entry)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
return false, entryOut(entry)
|
||||
}
|
||||
|
||||
// Iterate through the entries in this entry list
|
||||
func (entryList *EntryList) ReadEntries(entryOut func(*Entry)error) (error) {
|
||||
|
||||
entryList.modifiedEntryIndex = 0
|
||||
entryList.uploadedChunkIndex = 0
|
||||
entryList.uploadedChunkOffset = 0
|
||||
|
||||
if entryList.onDiskFile == nil {
|
||||
for _, entry := range entryList.entries {
|
||||
skipped, err := entryList.fillAndSendEntry(entry.Copy(), entryOut)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if skipped {
|
||||
continue
|
||||
}
|
||||
}
|
||||
} else {
|
||||
_, err := entryList.onDiskFile.Seek(0, os.SEEK_SET)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
decoder := msgpack.NewDecoder(entryList.onDiskFile)
|
||||
|
||||
_, err = decoder.DecodeString()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, err = decoder.PeekCode(); err == nil; _, err = decoder.PeekCode() {
|
||||
entry, err := DecodeEntryWithHash(decoder)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
skipped, err := entryList.fillAndSendEntry(entry, entryOut)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if skipped {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if err != io.EOF {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// When saving an incomplete snapshot, the on-disk entry list ('incomplete_files') is renamed to
|
||||
// 'incomplete_snapshot', and this EntryList struct is saved as 'incomplete_chunks'.
|
||||
func (entryList *EntryList) SaveIncompleteSnapshot() {
|
||||
entryList.uploadedChunkLock.Lock()
|
||||
defer entryList.uploadedChunkLock.Unlock()
|
||||
|
||||
if entryList.onDiskFile == nil {
|
||||
err := entryList.createOnDiskFile()
|
||||
if err != nil {
|
||||
LOG_WARN("INCOMPLETE_SAVE", "Failed to create the incomplete snapshot file: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
for _, entry := range entryList.entries {
|
||||
|
||||
err = entry.EncodeWithHash(entryList.encoder)
|
||||
if err != nil {
|
||||
LOG_WARN("INCOMPLETE_SAVE", "Failed to save the entry %s: %v", entry.Path, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
err := entryList.onDiskFile.Close()
|
||||
if err != nil {
|
||||
LOG_WARN("INCOMPLETE_SAVE", "Failed to close the on-disk file: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
filePath := path.Join(entryList.cachePath, "incomplete_snapshot")
|
||||
if _, err := os.Stat(filePath); err == nil {
|
||||
err = os.Remove(filePath)
|
||||
if err != nil {
|
||||
LOG_WARN("INCOMPLETE_REMOVE", "Failed to remove previous incomplete snapshot: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
err = os.Rename(path.Join(entryList.cachePath, "incomplete_files"), filePath)
|
||||
if err != nil {
|
||||
LOG_WARN("INCOMPLETE_SAVE", "Failed to rename the incomplete snapshot file: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
chunkFile := path.Join(entryList.cachePath, "incomplete_chunks")
|
||||
file, err := os.OpenFile(chunkFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0600)
|
||||
if err != nil {
|
||||
LOG_WARN("INCOMPLETE_SAVE", "Failed to create the incomplete chunk file: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
defer file.Close()
|
||||
encoder := msgpack.NewEncoder(file)
|
||||
|
||||
entryList.Checksum = entryList.CalculateChecksum()
|
||||
|
||||
err = encoder.Encode(entryList)
|
||||
if err != nil {
|
||||
LOG_WARN("INCOMPLETE_SAVE", "Failed to save the incomplete snapshot: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
LOG_INFO("INCOMPLETE_SAVE", "Incomplete snapshot saved to %s", filePath)
|
||||
}
|
||||
|
||||
// Calculate a checksum for this entry list
|
||||
func (entryList *EntryList) CalculateChecksum() string{
|
||||
|
||||
hasher := sha256.New()
|
||||
for _, s := range entryList.UploadedChunkHashes {
|
||||
hasher.Write([]byte(s))
|
||||
}
|
||||
|
||||
buffer := make([]byte, 8)
|
||||
for _, i := range entryList.UploadedChunkLengths {
|
||||
binary.LittleEndian.PutUint64(buffer, uint64(i))
|
||||
hasher.Write(buffer)
|
||||
}
|
||||
|
||||
for _, s := range entryList.PreservedChunkHashes {
|
||||
hasher.Write([]byte(s))
|
||||
}
|
||||
|
||||
for _, i := range entryList.PreservedChunkLengths {
|
||||
binary.LittleEndian.PutUint64(buffer, uint64(i))
|
||||
hasher.Write(buffer)
|
||||
}
|
||||
|
||||
for _, entry := range entryList.ModifiedEntries {
|
||||
binary.LittleEndian.PutUint64(buffer, uint64(entry.Size))
|
||||
hasher.Write(buffer)
|
||||
hasher.Write([]byte(entry.Hash))
|
||||
}
|
||||
|
||||
return hex.EncodeToString(hasher.Sum(nil))
|
||||
}
|
||||
|
||||
// Check if all chunks exist in 'chunkCache'
|
||||
func (entryList *EntryList) CheckChunks(config *Config, chunkCache map[string]bool) bool {
|
||||
for _, chunkHash := range entryList.UploadedChunkHashes {
|
||||
chunkID := config.GetChunkIDFromHash(chunkHash)
|
||||
if _, ok := chunkCache[chunkID]; !ok {
|
||||
return false
|
||||
}
|
||||
}
|
||||
for _, chunkHash := range entryList.PreservedChunkHashes {
|
||||
chunkID := config.GetChunkIDFromHash(chunkHash)
|
||||
if _, ok := chunkCache[chunkID]; !ok {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
|
||||
}
|
||||
|
||||
// Recover the on disk file from 'incomplete_snapshot', and restore the EntryList struct
|
||||
// from 'incomplete_chunks'
|
||||
func loadIncompleteSnapshot(snapshotID string, cachePath string) *EntryList {
|
||||
|
||||
onDiskFilePath := path.Join(cachePath, "incomplete_snapshot")
|
||||
entryListFilePath := path.Join(cachePath, "incomplete_chunks")
|
||||
|
||||
if _, err := os.Stat(onDiskFilePath); os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
|
||||
if _, err := os.Stat(entryListFilePath); os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
|
||||
entryList := &EntryList {}
|
||||
entryListFile, err := os.OpenFile(entryListFilePath, os.O_RDONLY, 0600)
|
||||
if err != nil {
|
||||
LOG_WARN("INCOMPLETE_LOAD", "Failed to open the incomplete snapshot: %v", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
defer entryListFile.Close()
|
||||
decoder := msgpack.NewDecoder(entryListFile)
|
||||
err = decoder.Decode(&entryList)
|
||||
if err != nil {
|
||||
LOG_WARN("INCOMPLETE_LOAD", "Failed to load the incomplete snapshot: %v", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
checksum := entryList.CalculateChecksum()
|
||||
if checksum != entryList.Checksum {
|
||||
LOG_WARN("INCOMPLETE_LOAD", "Failed to load the incomplete snapshot: checksum mismatched")
|
||||
return nil
|
||||
}
|
||||
|
||||
onDiskFile, err := os.OpenFile(onDiskFilePath, os.O_RDONLY, 0600)
|
||||
if err != nil {
|
||||
LOG_WARN("INCOMPLETE_LOAD", "Failed to open the on disk file for the incomplete snapshot: %v", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
decoder = msgpack.NewDecoder(onDiskFile)
|
||||
token, err := decoder.DecodeString()
|
||||
if err != nil {
|
||||
LOG_WARN("INCOMPLETE_LOAD", "Failed to read the token for the incomplete snapshot: %v", err)
|
||||
onDiskFile.Close()
|
||||
return nil
|
||||
}
|
||||
|
||||
if token != entryList.Token {
|
||||
LOG_WARN("INCOMPLETE_LOAD", "Mismatched tokens in the incomplete snapshot")
|
||||
onDiskFile.Close()
|
||||
return nil
|
||||
}
|
||||
|
||||
entryList.onDiskFile = onDiskFile
|
||||
|
||||
for i, hash := range entryList.UploadedChunkHashes {
|
||||
if len(hash) == 0 {
|
||||
// An empty hash means the chunk has not been uploaded in previous run
|
||||
entryList.UploadedChunkHashes = entryList.UploadedChunkHashes[0:i]
|
||||
entryList.UploadedChunkLengths = entryList.UploadedChunkLengths[0:i]
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
LOG_INFO("INCOMPLETE_LOAD", "Previous incomlete backup contains %d files and %d chunks",
|
||||
entryList.NumberOfEntries, len(entryList.PreservedChunkLengths) + len(entryList.UploadedChunkHashes))
|
||||
|
||||
return entryList
|
||||
}
|
||||
|
||||
// Delete the two incomplete files.
|
||||
func deleteIncompleteSnapshot(cachePath string) {
|
||||
|
||||
for _, file := range []string{"incomplete_snapshot", "incomplete_chunks"} {
|
||||
filePath := path.Join(cachePath, file)
|
||||
if _, err := os.Stat(filePath); err == nil {
|
||||
err = os.Remove(filePath)
|
||||
if err != nil {
|
||||
LOG_WARN("INCOMPLETE_REMOVE", "Failed to remove the incomplete snapshot: %v", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
179
src/duplicacy_entrylist_test.go
Normal file
179
src/duplicacy_entrylist_test.go
Normal file
@@ -0,0 +1,179 @@
|
||||
// Copyright (c) Acrosync LLC. All rights reserved.
|
||||
// Free for personal use and commercial trial
|
||||
// Commercial use requires per-user licenses available from https://duplicacy.com
|
||||
|
||||
package duplicacy
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path"
|
||||
"time"
|
||||
"testing"
|
||||
"math/rand"
|
||||
)
|
||||
|
||||
|
||||
func generateRandomString(length int) string {
|
||||
var letters = []rune("abcdefghijklmnopqrstuvwxyz")
|
||||
b := make([]rune, length)
|
||||
for i := range b {
|
||||
b[i] = letters[rand.Intn(len(letters))]
|
||||
}
|
||||
return string(b)
|
||||
}
|
||||
|
||||
var fileSizeGenerator = rand.NewZipf(rand.New(rand.NewSource(time.Now().UnixNano())), 1.2, 1.0, 1024)
|
||||
|
||||
func generateRandomFileSize() int64 {
|
||||
return int64(fileSizeGenerator.Uint64() + 1)
|
||||
}
|
||||
|
||||
func generateRandomChunks(totalFileSize int64) (chunks []string, lengths []int) {
|
||||
|
||||
totalChunkSize := int64(0)
|
||||
|
||||
for totalChunkSize < totalFileSize {
|
||||
chunks = append(chunks, generateRandomString(64))
|
||||
chunkSize := int64(1 + (rand.Int() % 64))
|
||||
if chunkSize + totalChunkSize > totalFileSize {
|
||||
chunkSize = totalFileSize - totalChunkSize
|
||||
}
|
||||
lengths = append(lengths, int(chunkSize))
|
||||
totalChunkSize += chunkSize
|
||||
}
|
||||
return chunks, lengths
|
||||
|
||||
}
|
||||
|
||||
func getPreservedChunks(entries []*Entry, chunks []string, lengths []int) (preservedChunks []string, preservedChunkLengths []int) {
|
||||
lastPreservedChunk := -1
|
||||
for i := range entries {
|
||||
if entries[i].Size < 0 {
|
||||
continue
|
||||
}
|
||||
delta := entries[i].StartChunk - len(chunks)
|
||||
if lastPreservedChunk != entries[i].StartChunk {
|
||||
lastPreservedChunk = entries[i].StartChunk
|
||||
preservedChunks = append(preservedChunks, chunks[entries[i].StartChunk])
|
||||
preservedChunkLengths = append(preservedChunkLengths, lengths[entries[i].StartChunk])
|
||||
delta++
|
||||
}
|
||||
for j := entries[i].StartChunk + 1; i <= entries[i].EndChunk; i++ {
|
||||
preservedChunks = append(preservedChunks, chunks[j])
|
||||
preservedChunkLengths = append(preservedChunkLengths, lengths[j])
|
||||
lastPreservedChunk = j
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func testEntryList(t *testing.T, numberOfEntries int, maximumInMemoryEntries int) {
|
||||
|
||||
entries := make([]*Entry, 0, numberOfEntries)
|
||||
entrySizes := make([]int64, 0)
|
||||
|
||||
for i := 0; i < numberOfEntries; i++ {
|
||||
entry:= CreateEntry(generateRandomString(16), -1, 0, 0700)
|
||||
entries = append(entries, entry)
|
||||
entrySizes = append(entrySizes, generateRandomFileSize())
|
||||
}
|
||||
|
||||
totalFileSize := int64(0)
|
||||
for _, size := range entrySizes {
|
||||
totalFileSize += size
|
||||
}
|
||||
|
||||
testDir := path.Join(os.TempDir(), "duplicacy_test")
|
||||
os.RemoveAll(testDir)
|
||||
os.MkdirAll(testDir, 0700)
|
||||
|
||||
os.MkdirAll(testDir + "/list1", 0700)
|
||||
os.MkdirAll(testDir + "/list2", 0700)
|
||||
os.MkdirAll(testDir + "/list3", 0700)
|
||||
os.MkdirAll(testDir + "/list1", 0700)
|
||||
|
||||
// For the first entry list, all entries are new
|
||||
entryList, _ := CreateEntryList("test", testDir + "/list1", maximumInMemoryEntries)
|
||||
for _, entry := range entries {
|
||||
entryList.AddEntry(entry)
|
||||
}
|
||||
uploadedChunks, uploadedChunksLengths := generateRandomChunks(totalFileSize)
|
||||
for i, chunk := range uploadedChunks {
|
||||
entryList.AddUploadedChunk(i, chunk, uploadedChunksLengths[i])
|
||||
}
|
||||
|
||||
for i := range entryList.ModifiedEntries {
|
||||
entryList.ModifiedEntries[i].Size = entrySizes[i]
|
||||
}
|
||||
|
||||
totalEntries := 0
|
||||
err := entryList.ReadEntries(func(entry *Entry) error {
|
||||
totalEntries++
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("ReadEntries returned an error: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
if totalEntries != numberOfEntries {
|
||||
t.Errorf("EntryList contains %d entries instead of %d", totalEntries, numberOfEntries)
|
||||
return
|
||||
}
|
||||
|
||||
// For the second entry list, half of the entries are new
|
||||
for i := range entries {
|
||||
if rand.Int() % 1 == 0 {
|
||||
entries[i].Size = -1
|
||||
} else {
|
||||
entries[i].Size = entrySizes[i]
|
||||
}
|
||||
}
|
||||
|
||||
preservedChunks, preservedChunkLengths := getPreservedChunks(entries, uploadedChunks, uploadedChunksLengths)
|
||||
entryList, _ = CreateEntryList("test", testDir + "/list2", maximumInMemoryEntries)
|
||||
for _, entry := range entries {
|
||||
entryList.AddEntry(entry)
|
||||
}
|
||||
for i, chunk := range preservedChunks {
|
||||
entryList.AddPreservedChunk(chunk, preservedChunkLengths[i])
|
||||
}
|
||||
|
||||
totalFileSize = 0
|
||||
for i := range entryList.ModifiedEntries {
|
||||
fileSize := generateRandomFileSize()
|
||||
entryList.ModifiedEntries[i].Size = fileSize
|
||||
totalFileSize += fileSize
|
||||
}
|
||||
|
||||
uploadedChunks, uploadedChunksLengths = generateRandomChunks(totalFileSize)
|
||||
for i, chunk := range uploadedChunks {
|
||||
entryList.AddUploadedChunk(i, chunk, uploadedChunksLengths[i])
|
||||
}
|
||||
|
||||
totalEntries = 0
|
||||
err = entryList.ReadEntries(func(entry *Entry) error {
|
||||
totalEntries++
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("ReadEntries returned an error: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
if totalEntries != numberOfEntries {
|
||||
t.Errorf("EntryList contains %d entries instead of %d", totalEntries, numberOfEntries)
|
||||
return
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
func TestEntryList(t *testing.T) {
|
||||
testEntryList(t, 1024, 1024)
|
||||
testEntryList(t, 1024, 512)
|
||||
testEntryList(t, 1024, 0)
|
||||
}
|
||||
@@ -8,17 +8,22 @@ import (
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
"sort"
|
||||
"bytes"
|
||||
|
||||
"github.com/vmihailenco/msgpack"
|
||||
|
||||
)
|
||||
|
||||
// Snapshot represents a backup of the repository.
|
||||
type Snapshot struct {
|
||||
Version int
|
||||
ID string // the snapshot id; must be different for different repositories
|
||||
Revision int // the revision number
|
||||
Options string // options used to create this snapshot (some not included)
|
||||
@@ -37,14 +42,11 @@ type Snapshot struct {
|
||||
// A sequence of chunks whose aggregated content is the json representation of 'ChunkLengths'.
|
||||
LengthSequence []string
|
||||
|
||||
Files []*Entry // list of files and subdirectories
|
||||
|
||||
ChunkHashes []string // a sequence of chunks representing the file content
|
||||
ChunkLengths []int // the length of each chunk
|
||||
|
||||
Flag bool // used to mark certain snapshots for deletion or copy
|
||||
|
||||
discardAttributes bool
|
||||
}
|
||||
|
||||
// CreateEmptySnapshot creates an empty snapshot.
|
||||
@@ -56,16 +58,14 @@ func CreateEmptySnapshot(id string) (snapshto *Snapshot) {
|
||||
}
|
||||
}
|
||||
|
||||
// CreateSnapshotFromDirectory creates a snapshot from the local directory 'top'. Only 'Files'
|
||||
// will be constructed, while 'ChunkHashes' and 'ChunkLengths' can only be populated after uploading.
|
||||
func CreateSnapshotFromDirectory(id string, top string, nobackupFile string, filtersFile string, excludeByAttribute bool) (snapshot *Snapshot, skippedDirectories []string,
|
||||
skippedFiles []string, err error) {
|
||||
type DirectoryListing struct {
|
||||
directory string
|
||||
files *[]Entry
|
||||
}
|
||||
|
||||
snapshot = &Snapshot{
|
||||
ID: id,
|
||||
Revision: 0,
|
||||
StartTime: time.Now().Unix(),
|
||||
}
|
||||
func (snapshot *Snapshot) ListLocalFiles(top string, nobackupFile string,
|
||||
filtersFile string, excludeByAttribute bool, listingChannel chan *Entry,
|
||||
skippedDirectories *[]string, skippedFiles *[]string) {
|
||||
|
||||
var patterns []string
|
||||
|
||||
@@ -77,45 +77,128 @@ func CreateSnapshotFromDirectory(id string, top string, nobackupFile string, fil
|
||||
directories := make([]*Entry, 0, 256)
|
||||
directories = append(directories, CreateEntry("", 0, 0, 0))
|
||||
|
||||
snapshot.Files = make([]*Entry, 0, 256)
|
||||
|
||||
attributeThreshold := 1024 * 1024
|
||||
if attributeThresholdValue, found := os.LookupEnv("DUPLICACY_ATTRIBUTE_THRESHOLD"); found && attributeThresholdValue != "" {
|
||||
attributeThreshold, _ = strconv.Atoi(attributeThresholdValue)
|
||||
}
|
||||
|
||||
for len(directories) > 0 {
|
||||
|
||||
directory := directories[len(directories)-1]
|
||||
directories = directories[:len(directories)-1]
|
||||
snapshot.Files = append(snapshot.Files, directory)
|
||||
subdirectories, skipped, err := ListEntries(top, directory.Path, &snapshot.Files, patterns, nobackupFile, snapshot.discardAttributes, excludeByAttribute)
|
||||
subdirectories, skipped, err := ListEntries(top, directory.Path, patterns, nobackupFile, excludeByAttribute, listingChannel)
|
||||
if err != nil {
|
||||
if directory.Path == "" {
|
||||
LOG_ERROR("LIST_FAILURE", "Failed to list the repository root: %v", err)
|
||||
return nil, nil, nil, err
|
||||
return
|
||||
}
|
||||
LOG_WARN("LIST_FAILURE", "Failed to list subdirectory %s: %v", directory.Path, err)
|
||||
skippedDirectories = append(skippedDirectories, directory.Path)
|
||||
if skippedDirectories != nil {
|
||||
*skippedDirectories = append(*skippedDirectories, directory.Path)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
directories = append(directories, subdirectories...)
|
||||
skippedFiles = append(skippedFiles, skipped...)
|
||||
|
||||
if !snapshot.discardAttributes && len(snapshot.Files) > attributeThreshold {
|
||||
LOG_INFO("LIST_ATTRIBUTES", "Discarding file attributes")
|
||||
snapshot.discardAttributes = true
|
||||
for _, file := range snapshot.Files {
|
||||
file.Attributes = nil
|
||||
}
|
||||
if skippedFiles != nil {
|
||||
*skippedFiles = append(*skippedFiles, skipped...)
|
||||
}
|
||||
|
||||
}
|
||||
close(listingChannel)
|
||||
}
|
||||
|
||||
func (snapshot *Snapshot)ListRemoteFiles(config *Config, chunkOperator *ChunkOperator, entryOut func(*Entry) bool) {
|
||||
|
||||
var chunks []string
|
||||
for _, chunkHash := range snapshot.FileSequence {
|
||||
chunks = append(chunks, chunkOperator.config.GetChunkIDFromHash(chunkHash))
|
||||
}
|
||||
|
||||
// Remove the root entry
|
||||
snapshot.Files = snapshot.Files[1:]
|
||||
var chunk *Chunk
|
||||
reader := sequenceReader{
|
||||
sequence: snapshot.FileSequence,
|
||||
buffer: new(bytes.Buffer),
|
||||
refillFunc: func(chunkHash string) []byte {
|
||||
if chunk != nil {
|
||||
config.PutChunk(chunk)
|
||||
}
|
||||
chunk = chunkOperator.Download(chunkHash, 0, true)
|
||||
return chunk.GetBytes()
|
||||
},
|
||||
}
|
||||
|
||||
if snapshot.Version == 0 {
|
||||
LOG_INFO("SNAPSHOT_VERSION", "snapshot %s at revision %d is encoded in an old version format", snapshot.ID, snapshot.Revision)
|
||||
files := make([]*Entry, 0)
|
||||
decoder := json.NewDecoder(&reader)
|
||||
|
||||
// read open bracket
|
||||
_, err := decoder.Token()
|
||||
if err != nil {
|
||||
LOG_ERROR("SNAPSHOT_PARSE", "Failed to open the snapshot %s at revision %d: not a list of entries",
|
||||
snapshot.ID, snapshot.Revision)
|
||||
return
|
||||
}
|
||||
|
||||
for decoder.More() {
|
||||
var entry Entry
|
||||
err = decoder.Decode(&entry)
|
||||
if err != nil {
|
||||
LOG_ERROR("SNAPSHOT_PARSE", "Failed to load files specified in the snapshot %s at revision %d: %v",
|
||||
snapshot.ID, snapshot.Revision, err)
|
||||
return
|
||||
}
|
||||
files = append(files, &entry)
|
||||
}
|
||||
|
||||
sort.Sort(ByName(files))
|
||||
|
||||
for _, file := range files {
|
||||
if !entryOut(file) {
|
||||
return
|
||||
}
|
||||
}
|
||||
} else if snapshot.Version == 1 {
|
||||
decoder := msgpack.NewDecoder(&reader)
|
||||
|
||||
lastEndChunk := 0
|
||||
|
||||
// while the array contains values
|
||||
for _, err := decoder.PeekCode(); err != io.EOF; _, err = decoder.PeekCode() {
|
||||
if err != nil {
|
||||
LOG_ERROR("SNAPSHOT_PARSE", "Failed to parse the snapshot %s at revision %d: %v",
|
||||
snapshot.ID, snapshot.Revision, err)
|
||||
return
|
||||
}
|
||||
var entry Entry
|
||||
err = decoder.Decode(&entry)
|
||||
if err != nil {
|
||||
LOG_ERROR("SNAPSHOT_PARSE", "Failed to load the snapshot %s at revision %d: %v",
|
||||
snapshot.ID, snapshot.Revision, err)
|
||||
return
|
||||
}
|
||||
|
||||
if entry.IsFile() {
|
||||
entry.StartChunk += lastEndChunk
|
||||
entry.EndChunk += entry.StartChunk
|
||||
lastEndChunk = entry.EndChunk
|
||||
}
|
||||
|
||||
err = entry.check(snapshot.ChunkLengths)
|
||||
if err != nil {
|
||||
LOG_ERROR("SNAPSHOT_ENTRY", "Failed to load the snapshot %s at revision %d: %v",
|
||||
snapshot.ID, snapshot.Revision, err)
|
||||
return
|
||||
}
|
||||
|
||||
if !entryOut(&entry) {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
LOG_ERROR("SNAPSHOT_VERSION", "snapshot %s at revision %d is encoded in unsupported version %d format",
|
||||
snapshot.ID, snapshot.Revision, snapshot.Version)
|
||||
return
|
||||
}
|
||||
|
||||
return snapshot, skippedDirectories, skippedFiles, nil
|
||||
}
|
||||
|
||||
func AppendPattern(patterns []string, new_pattern string) (new_patterns []string) {
|
||||
@@ -215,100 +298,6 @@ func ProcessFilterLines(patternFileLines []string, includedFiles []string) (patt
|
||||
return patterns
|
||||
}
|
||||
|
||||
// This is the struct used to save/load incomplete snapshots
|
||||
type IncompleteSnapshot struct {
|
||||
Files []*Entry
|
||||
ChunkHashes []string
|
||||
ChunkLengths []int
|
||||
}
|
||||
|
||||
// LoadIncompleteSnapshot loads the incomplete snapshot if it exists
|
||||
func LoadIncompleteSnapshot() (snapshot *Snapshot) {
|
||||
snapshotFile := path.Join(GetDuplicacyPreferencePath(), "incomplete")
|
||||
description, err := ioutil.ReadFile(snapshotFile)
|
||||
if err != nil {
|
||||
LOG_DEBUG("INCOMPLETE_LOCATE", "Failed to locate incomplete snapshot: %v", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
var incompleteSnapshot IncompleteSnapshot
|
||||
|
||||
err = json.Unmarshal(description, &incompleteSnapshot)
|
||||
if err != nil {
|
||||
LOG_DEBUG("INCOMPLETE_PARSE", "Failed to parse incomplete snapshot: %v", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
var chunkHashes []string
|
||||
for _, chunkHash := range incompleteSnapshot.ChunkHashes {
|
||||
hash, err := hex.DecodeString(chunkHash)
|
||||
if err != nil {
|
||||
LOG_DEBUG("INCOMPLETE_DECODE", "Failed to decode incomplete snapshot: %v", err)
|
||||
return nil
|
||||
}
|
||||
chunkHashes = append(chunkHashes, string(hash))
|
||||
}
|
||||
|
||||
snapshot = &Snapshot{
|
||||
Files: incompleteSnapshot.Files,
|
||||
ChunkHashes: chunkHashes,
|
||||
ChunkLengths: incompleteSnapshot.ChunkLengths,
|
||||
}
|
||||
LOG_INFO("INCOMPLETE_LOAD", "Incomplete snapshot loaded from %s", snapshotFile)
|
||||
return snapshot
|
||||
}
|
||||
|
||||
// SaveIncompleteSnapshot saves the incomplete snapshot under the preference directory
|
||||
func SaveIncompleteSnapshot(snapshot *Snapshot) {
|
||||
var files []*Entry
|
||||
for _, file := range snapshot.Files {
|
||||
// All unprocessed files will have a size of -1
|
||||
if file.Size >= 0 {
|
||||
file.Attributes = nil
|
||||
files = append(files, file)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
var chunkHashes []string
|
||||
for _, chunkHash := range snapshot.ChunkHashes {
|
||||
chunkHashes = append(chunkHashes, hex.EncodeToString([]byte(chunkHash)))
|
||||
}
|
||||
|
||||
incompleteSnapshot := IncompleteSnapshot{
|
||||
Files: files,
|
||||
ChunkHashes: chunkHashes,
|
||||
ChunkLengths: snapshot.ChunkLengths,
|
||||
}
|
||||
|
||||
description, err := json.MarshalIndent(incompleteSnapshot, "", " ")
|
||||
if err != nil {
|
||||
LOG_WARN("INCOMPLETE_ENCODE", "Failed to encode the incomplete snapshot: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
snapshotFile := path.Join(GetDuplicacyPreferencePath(), "incomplete")
|
||||
err = ioutil.WriteFile(snapshotFile, description, 0644)
|
||||
if err != nil {
|
||||
LOG_WARN("INCOMPLETE_WRITE", "Failed to save the incomplete snapshot: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
LOG_INFO("INCOMPLETE_SAVE", "Incomplete snapshot saved to %s", snapshotFile)
|
||||
}
|
||||
|
||||
func RemoveIncompleteSnapshot() {
|
||||
snapshotFile := path.Join(GetDuplicacyPreferencePath(), "incomplete")
|
||||
if stat, err := os.Stat(snapshotFile); err == nil && !stat.IsDir() {
|
||||
err = os.Remove(snapshotFile)
|
||||
if err != nil {
|
||||
LOG_INFO("INCOMPLETE_SAVE", "Failed to remove ncomplete snapshot: %v", err)
|
||||
} else {
|
||||
LOG_INFO("INCOMPLETE_SAVE", "Removed incomplete snapshot %s", snapshotFile)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// CreateSnapshotFromDescription creates a snapshot from json decription.
|
||||
func CreateSnapshotFromDescription(description []byte) (snapshot *Snapshot, err error) {
|
||||
|
||||
@@ -321,6 +310,14 @@ func CreateSnapshotFromDescription(description []byte) (snapshot *Snapshot, err
|
||||
|
||||
snapshot = &Snapshot{}
|
||||
|
||||
if value, ok := root["version"]; !ok {
|
||||
snapshot.Version = 0
|
||||
} else if version, ok := value.(float64); !ok {
|
||||
return nil, fmt.Errorf("Invalid version is specified in the snapshot")
|
||||
} else {
|
||||
snapshot.Version = int(version)
|
||||
}
|
||||
|
||||
if value, ok := root["id"]; !ok {
|
||||
return nil, fmt.Errorf("No id is specified in the snapshot")
|
||||
} else if snapshot.ID, ok = value.(string); !ok {
|
||||
@@ -437,6 +434,7 @@ func (snapshot *Snapshot) MarshalJSON() ([]byte, error) {
|
||||
|
||||
object := make(map[string]interface{})
|
||||
|
||||
object["version"] = 1
|
||||
object["id"] = snapshot.ID
|
||||
object["revision"] = snapshot.Revision
|
||||
object["options"] = snapshot.Options
|
||||
@@ -458,9 +456,7 @@ func (snapshot *Snapshot) MarshalJSON() ([]byte, error) {
|
||||
// MarshalSequence creates a json represetion for the specified chunk sequence.
|
||||
func (snapshot *Snapshot) MarshalSequence(sequenceType string) ([]byte, error) {
|
||||
|
||||
if sequenceType == "files" {
|
||||
return json.Marshal(snapshot.Files)
|
||||
} else if sequenceType == "chunks" {
|
||||
if sequenceType == "chunks" {
|
||||
return json.Marshal(encodeSequence(snapshot.ChunkHashes))
|
||||
} else {
|
||||
return json.Marshal(snapshot.ChunkLengths)
|
||||
@@ -489,3 +485,4 @@ func encodeSequence(sequence []string) []string {
|
||||
|
||||
return sequenceInHex
|
||||
}
|
||||
|
||||
|
||||
@@ -20,6 +20,8 @@ import (
|
||||
"strings"
|
||||
"text/tabwriter"
|
||||
"time"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/aryann/difflib"
|
||||
)
|
||||
@@ -189,7 +191,6 @@ type SnapshotManager struct {
|
||||
fileChunk *Chunk
|
||||
snapshotCache *FileStorage
|
||||
|
||||
chunkDownloader *ChunkDownloader
|
||||
chunkOperator *ChunkOperator
|
||||
}
|
||||
|
||||
@@ -268,72 +269,26 @@ func (reader *sequenceReader) Read(data []byte) (n int, err error) {
|
||||
return reader.buffer.Read(data)
|
||||
}
|
||||
|
||||
func (manager *SnapshotManager) CreateChunkDownloader() {
|
||||
if manager.chunkDownloader == nil {
|
||||
manager.chunkDownloader = CreateChunkDownloader(manager.config, manager.storage, manager.snapshotCache, false, 1, false)
|
||||
func (manager *SnapshotManager) CreateChunkOperator(resurrect bool, threads int, allowFailures bool) {
|
||||
if manager.chunkOperator == nil {
|
||||
manager.chunkOperator = CreateChunkOperator(manager.config, manager.storage, manager.snapshotCache, resurrect, threads, allowFailures)
|
||||
}
|
||||
}
|
||||
|
||||
// DownloadSequence returns the content represented by a sequence of chunks.
|
||||
func (manager *SnapshotManager) DownloadSequence(sequence []string) (content []byte) {
|
||||
manager.CreateChunkDownloader()
|
||||
manager.CreateChunkOperator(false, 1, false)
|
||||
for _, chunkHash := range sequence {
|
||||
i := manager.chunkDownloader.AddChunk(chunkHash)
|
||||
chunk := manager.chunkDownloader.WaitForChunk(i)
|
||||
chunk := manager.chunkOperator.Download(chunkHash, 0, true)
|
||||
content = append(content, chunk.GetBytes()...)
|
||||
manager.config.PutChunk(chunk)
|
||||
}
|
||||
|
||||
return content
|
||||
}
|
||||
|
||||
func (manager *SnapshotManager) DownloadSnapshotFileSequence(snapshot *Snapshot, patterns []string, attributesNeeded bool) bool {
|
||||
|
||||
manager.CreateChunkDownloader()
|
||||
|
||||
reader := sequenceReader{
|
||||
sequence: snapshot.FileSequence,
|
||||
buffer: new(bytes.Buffer),
|
||||
refillFunc: func(chunkHash string) []byte {
|
||||
i := manager.chunkDownloader.AddChunk(chunkHash)
|
||||
chunk := manager.chunkDownloader.WaitForChunk(i)
|
||||
return chunk.GetBytes()
|
||||
},
|
||||
}
|
||||
|
||||
files := make([]*Entry, 0)
|
||||
decoder := json.NewDecoder(&reader)
|
||||
|
||||
// read open bracket
|
||||
_, err := decoder.Token()
|
||||
if err != nil {
|
||||
LOG_ERROR("SNAPSHOT_PARSE", "Failed to load files specified in the snapshot %s at revision %d: not a list of entries",
|
||||
snapshot.ID, snapshot.Revision)
|
||||
return false
|
||||
}
|
||||
|
||||
// while the array contains values
|
||||
for decoder.More() {
|
||||
var entry Entry
|
||||
err = decoder.Decode(&entry)
|
||||
if err != nil {
|
||||
LOG_ERROR("SNAPSHOT_PARSE", "Failed to load files specified in the snapshot %s at revision %d: %v",
|
||||
snapshot.ID, snapshot.Revision, err)
|
||||
return false
|
||||
}
|
||||
|
||||
// If we don't need the attributes or the file isn't included we clear the attributes to save memory
|
||||
if !attributesNeeded || (len(patterns) != 0 && !MatchPath(entry.Path, patterns)) {
|
||||
entry.Attributes = nil
|
||||
}
|
||||
|
||||
files = append(files, &entry)
|
||||
}
|
||||
snapshot.Files = files
|
||||
return true
|
||||
}
|
||||
|
||||
// DownloadSnapshotSequence downloads the content represented by a sequence of chunks, and then unmarshal the content
|
||||
// using the specified 'loadFunction'. It purpose is to decode the chunk sequences representing chunk hashes or chunk lengths
|
||||
// using the specified 'loadFunction'. Its purpose is to decode the chunk sequences representing chunk hashes or chunk lengths
|
||||
// in a snapshot.
|
||||
func (manager *SnapshotManager) DownloadSnapshotSequence(snapshot *Snapshot, sequenceType string) bool {
|
||||
|
||||
@@ -362,30 +317,21 @@ func (manager *SnapshotManager) DownloadSnapshotSequence(snapshot *Snapshot, seq
|
||||
return true
|
||||
}
|
||||
|
||||
// DownloadSnapshotContents loads all chunk sequences in a snapshot. A snapshot, when just created, only contains
|
||||
// some metadata and theree sequence representing files, chunk hashes, and chunk lengths. This function must be called
|
||||
// for the actual content of the snapshot to be usable.
|
||||
func (manager *SnapshotManager) DownloadSnapshotContents(snapshot *Snapshot, patterns []string, attributesNeeded bool) bool {
|
||||
// DownloadSnapshotSequences loads all chunk sequences in a snapshot. A snapshot, when just created, only contains
|
||||
// some metadata and three sequence representing files, chunk hashes, and chunk lengths. This function must be called
|
||||
// for the chunk hash sequence and chunk length sequence to be usable.
|
||||
func (manager *SnapshotManager) DownloadSnapshotSequences(snapshot *Snapshot) bool {
|
||||
|
||||
manager.DownloadSnapshotFileSequence(snapshot, patterns, attributesNeeded)
|
||||
manager.DownloadSnapshotSequence(snapshot, "chunks")
|
||||
manager.DownloadSnapshotSequence(snapshot, "lengths")
|
||||
|
||||
err := manager.CheckSnapshot(snapshot)
|
||||
if err != nil {
|
||||
LOG_ERROR("SNAPSHOT_CHECK", "The snapshot %s at revision %d contains an error: %v",
|
||||
snapshot.ID, snapshot.Revision, err)
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// ClearSnapshotContents removes contents loaded by DownloadSnapshotContents
|
||||
func (manager *SnapshotManager) ClearSnapshotContents(snapshot *Snapshot) {
|
||||
// ClearSnapshotContents removes sequences loaded by DownloadSnapshotSequences
|
||||
func (manager *SnapshotManager) ClearSnapshotSequences(snapshot *Snapshot) {
|
||||
snapshot.ChunkHashes = nil
|
||||
snapshot.ChunkLengths = nil
|
||||
snapshot.Files = nil
|
||||
}
|
||||
|
||||
// CleanSnapshotCache removes all files not referenced by the specified 'snapshot' in the snapshot cache.
|
||||
@@ -577,10 +523,6 @@ func (manager *SnapshotManager) downloadLatestSnapshot(snapshotID string) (remot
|
||||
remote = manager.DownloadSnapshot(snapshotID, latest)
|
||||
}
|
||||
|
||||
if remote != nil {
|
||||
manager.DownloadSnapshotContents(remote, nil, false)
|
||||
}
|
||||
|
||||
return remote
|
||||
}
|
||||
|
||||
@@ -712,6 +654,12 @@ func (manager *SnapshotManager) ListSnapshots(snapshotID string, revisionsToList
|
||||
LOG_DEBUG("LIST_PARAMETERS", "id: %s, revisions: %v, tag: %s, showFiles: %t, showChunks: %t",
|
||||
snapshotID, revisionsToList, tag, showFiles, showChunks)
|
||||
|
||||
manager.CreateChunkOperator(false, 1, false)
|
||||
defer func() {
|
||||
manager.chunkOperator.Stop()
|
||||
manager.chunkOperator = nil
|
||||
}()
|
||||
|
||||
var snapshotIDs []string
|
||||
var err error
|
||||
|
||||
@@ -749,14 +697,16 @@ func (manager *SnapshotManager) ListSnapshots(snapshotID string, revisionsToList
|
||||
if len(snapshot.Tag) > 0 {
|
||||
tagWithSpace = snapshot.Tag + " "
|
||||
}
|
||||
LOG_INFO("SNAPSHOT_INFO", "Snapshot %s revision %d created at %s %s%s",
|
||||
snapshotID, revision, creationTime, tagWithSpace, snapshot.Options)
|
||||
|
||||
if showFiles {
|
||||
manager.DownloadSnapshotFileSequence(snapshot, nil, false)
|
||||
options := snapshot.Options
|
||||
if snapshot.Version == 0 {
|
||||
options += " (0)"
|
||||
}
|
||||
LOG_INFO("SNAPSHOT_INFO", "Snapshot %s revision %d created at %s %s%s",
|
||||
snapshotID, revision, creationTime, tagWithSpace, options)
|
||||
|
||||
if showFiles {
|
||||
// We need to fill in ChunkHashes and ChunkLengths to verify that each entry is valid
|
||||
manager.DownloadSnapshotSequences(snapshot)
|
||||
|
||||
if snapshot.NumberOfFiles > 0 {
|
||||
LOG_INFO("SNAPSHOT_STATS", "Files: %d", snapshot.NumberOfFiles)
|
||||
@@ -768,7 +718,7 @@ func (manager *SnapshotManager) ListSnapshots(snapshotID string, revisionsToList
|
||||
totalFileSize := int64(0)
|
||||
lastChunk := 0
|
||||
|
||||
for _, file := range snapshot.Files {
|
||||
snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func(file *Entry)bool {
|
||||
if file.IsFile() {
|
||||
totalFiles++
|
||||
totalFileSize += file.Size
|
||||
@@ -780,17 +730,18 @@ func (manager *SnapshotManager) ListSnapshots(snapshotID string, revisionsToList
|
||||
lastChunk = file.EndChunk
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
for _, file := range snapshot.Files {
|
||||
snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func(file *Entry)bool {
|
||||
if file.IsFile() {
|
||||
LOG_INFO("SNAPSHOT_FILE", "%s", file.String(maxSizeDigits))
|
||||
}
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
metaChunks := len(snapshot.FileSequence) + len(snapshot.ChunkSequence) + len(snapshot.LengthSequence)
|
||||
LOG_INFO("SNAPSHOT_STATS", "Files: %d, total size: %d, file chunks: %d, metadata chunks: %d",
|
||||
totalFiles, totalFileSize, lastChunk+1, metaChunks)
|
||||
LOG_INFO("SNAPSHOT_STATS", "Total size: %d, file chunks: %d, metadata chunks: %d", totalFileSize, lastChunk+1, metaChunks)
|
||||
}
|
||||
|
||||
if showChunks {
|
||||
@@ -807,11 +758,15 @@ func (manager *SnapshotManager) ListSnapshots(snapshotID string, revisionsToList
|
||||
|
||||
}
|
||||
|
||||
// ListSnapshots shows the information about a snapshot.
|
||||
// CheckSnapshots checks if there is any problem with a snapshot.
|
||||
func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToCheck []int, tag string, showStatistics bool, showTabular bool,
|
||||
checkFiles bool, checkChunks, searchFossils bool, resurrect bool, threads int, allowFailures bool) bool {
|
||||
|
||||
manager.chunkDownloader = CreateChunkDownloader(manager.config, manager.storage, manager.snapshotCache, false, threads, allowFailures)
|
||||
manager.CreateChunkOperator(resurrect, threads, allowFailures)
|
||||
defer func() {
|
||||
manager.chunkOperator.Stop()
|
||||
manager.chunkOperator = nil
|
||||
}()
|
||||
|
||||
LOG_DEBUG("LIST_PARAMETERS", "id: %s, revisions: %v, tag: %s, showStatistics: %t, showTabular: %t, checkFiles: %t, searchFossils: %t, resurrect: %t",
|
||||
snapshotID, revisionsToCheck, tag, showStatistics, showTabular, checkFiles, searchFossils, resurrect)
|
||||
@@ -911,9 +866,9 @@ func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToChe
|
||||
for _, snapshot := range snapshotMap[snapshotID] {
|
||||
|
||||
if checkFiles {
|
||||
manager.DownloadSnapshotContents(snapshot, nil, false)
|
||||
manager.DownloadSnapshotSequences(snapshot)
|
||||
manager.VerifySnapshot(snapshot)
|
||||
manager.ClearSnapshotContents(snapshot)
|
||||
manager.ClearSnapshotSequences(snapshot)
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -1026,6 +981,7 @@ func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToChe
|
||||
// .duplicacy/cache/storage/verified_chunks. Note that it contains the chunk ids not chunk
|
||||
// hashes.
|
||||
verifiedChunks := make(map[string]int64)
|
||||
var verifiedChunksLock sync.Mutex
|
||||
verifiedChunksFile := "verified_chunks"
|
||||
|
||||
manager.fileChunk.Reset(false)
|
||||
@@ -1061,16 +1017,11 @@ func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToChe
|
||||
defer saveVerifiedChunks()
|
||||
RunAtError = saveVerifiedChunks
|
||||
|
||||
manager.chunkDownloader.snapshotCache = nil
|
||||
LOG_INFO("SNAPSHOT_VERIFY", "Verifying %d chunks", len(*allChunkHashes))
|
||||
|
||||
startTime := time.Now()
|
||||
var chunkHashes []string
|
||||
|
||||
// The index of the first chunk to add to the downloader, which may have already downloaded
|
||||
// some metadata chunks so the index doesn't start with 0.
|
||||
chunkIndex := -1
|
||||
|
||||
skippedChunks := 0
|
||||
for chunkHash := range *allChunkHashes {
|
||||
if len(verifiedChunks) > 0 {
|
||||
@@ -1081,38 +1032,65 @@ func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToChe
|
||||
}
|
||||
}
|
||||
chunkHashes = append(chunkHashes, chunkHash)
|
||||
if chunkIndex == -1 {
|
||||
chunkIndex = manager.chunkDownloader.AddChunk(chunkHash)
|
||||
} else {
|
||||
manager.chunkDownloader.AddChunk(chunkHash)
|
||||
}
|
||||
}
|
||||
|
||||
if skippedChunks > 0 {
|
||||
LOG_INFO("SNAPSHOT_VERIFY", "Skipped %d chunks that have already been verified before", skippedChunks)
|
||||
}
|
||||
|
||||
var downloadedChunkSize int64
|
||||
totalChunks := len(chunkHashes)
|
||||
for i := 0; i < totalChunks; i++ {
|
||||
chunk := manager.chunkDownloader.WaitForChunk(i + chunkIndex)
|
||||
chunkID := manager.config.GetChunkIDFromHash(chunkHashes[i])
|
||||
if chunk.isBroken {
|
||||
continue
|
||||
}
|
||||
verifiedChunks[chunkID] = startTime.Unix()
|
||||
downloadedChunkSize += int64(chunk.GetLength())
|
||||
var totalDownloadedChunkSize int64
|
||||
var totalDownloadedChunks int64
|
||||
totalChunks := int64(len(chunkHashes))
|
||||
|
||||
elapsedTime := time.Now().Sub(startTime).Seconds()
|
||||
speed := int64(float64(downloadedChunkSize) / elapsedTime)
|
||||
remainingTime := int64(float64(totalChunks - i - 1) / float64(i + 1) * elapsedTime)
|
||||
percentage := float64(i + 1) / float64(totalChunks) * 100.0
|
||||
LOG_INFO("VERIFY_PROGRESS", "Verified chunk %s (%d/%d), %sB/s %s %.1f%%",
|
||||
chunkID, i + 1, totalChunks, PrettySize(speed), PrettyTime(remainingTime), percentage)
|
||||
chunkChannel := make(chan int, threads)
|
||||
var wg sync.WaitGroup
|
||||
|
||||
wg.Add(threads)
|
||||
for i := 0; i < threads; i++ {
|
||||
go func() {
|
||||
defer CatchLogException()
|
||||
|
||||
for {
|
||||
chunkIndex, ok := <- chunkChannel
|
||||
if !ok {
|
||||
wg.Done()
|
||||
return
|
||||
}
|
||||
|
||||
chunk := manager.chunkOperator.Download(chunkHashes[chunkIndex], chunkIndex, false)
|
||||
if chunk == nil {
|
||||
continue
|
||||
}
|
||||
chunkID := manager.config.GetChunkIDFromHash(chunkHashes[chunkIndex])
|
||||
verifiedChunksLock.Lock()
|
||||
verifiedChunks[chunkID] = startTime.Unix()
|
||||
verifiedChunksLock.Unlock()
|
||||
|
||||
downloadedChunkSize := atomic.AddInt64(&totalDownloadedChunkSize, int64(chunk.GetLength()))
|
||||
downloadedChunks := atomic.AddInt64(&totalDownloadedChunks, 1)
|
||||
|
||||
elapsedTime := time.Now().Sub(startTime).Seconds()
|
||||
speed := int64(float64(downloadedChunkSize) / elapsedTime)
|
||||
remainingTime := int64(float64(totalChunks - downloadedChunks) / float64(downloadedChunks) * elapsedTime)
|
||||
percentage := float64(downloadedChunks) / float64(totalChunks) * 100.0
|
||||
LOG_INFO("VERIFY_PROGRESS", "Verified chunk %s (%d/%d), %sB/s %s %.1f%%",
|
||||
chunkID, downloadedChunks, totalChunks, PrettySize(speed), PrettyTime(remainingTime), percentage)
|
||||
|
||||
manager.config.PutChunk(chunk)
|
||||
}
|
||||
} ()
|
||||
}
|
||||
|
||||
if manager.chunkDownloader.NumberOfFailedChunks > 0 {
|
||||
LOG_ERROR("SNAPSHOT_VERIFY", "%d out of %d chunks are corrupted", manager.chunkDownloader.NumberOfFailedChunks, len(*allChunkHashes))
|
||||
for chunkIndex := range chunkHashes {
|
||||
chunkChannel <- chunkIndex
|
||||
}
|
||||
|
||||
close(chunkChannel)
|
||||
wg.Wait()
|
||||
manager.chunkOperator.WaitForCompletion()
|
||||
|
||||
if manager.chunkOperator.NumberOfFailedChunks > 0 {
|
||||
LOG_ERROR("SNAPSHOT_VERIFY", "%d out of %d chunks are corrupted", manager.chunkOperator.NumberOfFailedChunks, len(*allChunkHashes))
|
||||
} else {
|
||||
LOG_INFO("SNAPSHOT_VERIFY", "All %d chunks have been successfully verified", len(*allChunkHashes))
|
||||
}
|
||||
@@ -1280,14 +1258,6 @@ func (manager *SnapshotManager) PrintSnapshot(snapshot *Snapshot) bool {
|
||||
object["chunks"] = manager.ConvertSequence(snapshot.ChunkHashes)
|
||||
object["lengths"] = snapshot.ChunkLengths
|
||||
|
||||
// By default the json serialization of a file entry contains the path in base64 format. This is
|
||||
// to convert every file entry into an object which include the path in a more readable format.
|
||||
var files []map[string]interface{}
|
||||
for _, file := range snapshot.Files {
|
||||
files = append(files, file.convertToObject(false))
|
||||
}
|
||||
object["files"] = files
|
||||
|
||||
description, err := json.MarshalIndent(object, "", " ")
|
||||
|
||||
if err != nil {
|
||||
@@ -1296,8 +1266,24 @@ func (manager *SnapshotManager) PrintSnapshot(snapshot *Snapshot) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
fmt.Printf("%s\n", string(description))
|
||||
// Don't print the ending bracket
|
||||
fmt.Printf("%s", string(description[:len(description) - 2]))
|
||||
fmt.Printf(",\n \"files\": [\n")
|
||||
isFirstFile := true
|
||||
snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func (file *Entry) bool {
|
||||
|
||||
fileDescription, _ := json.MarshalIndent(file.convertToObject(false), "", " ")
|
||||
|
||||
if isFirstFile {
|
||||
fmt.Printf("%s", fileDescription)
|
||||
isFirstFile = false
|
||||
} else {
|
||||
fmt.Printf(",\n%s", fileDescription)
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
fmt.Printf(" ]\n}\n")
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -1313,17 +1299,20 @@ func (manager *SnapshotManager) VerifySnapshot(snapshot *Snapshot) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
files := make([]*Entry, 0, len(snapshot.Files)/2)
|
||||
for _, file := range snapshot.Files {
|
||||
files := make([]*Entry, 0)
|
||||
snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func (file *Entry) bool {
|
||||
if file.IsFile() && file.Size != 0 {
|
||||
file.Attributes = nil
|
||||
files = append(files, file)
|
||||
}
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
sort.Sort(ByChunk(files))
|
||||
corruptedFiles := 0
|
||||
var lastChunk *Chunk
|
||||
for _, file := range files {
|
||||
if !manager.RetrieveFile(snapshot, file, func([]byte) {}) {
|
||||
if !manager.RetrieveFile(snapshot, file, &lastChunk, func([]byte) {}) {
|
||||
corruptedFiles++
|
||||
}
|
||||
LOG_TRACE("SNAPSHOT_VERIFY", "%s", file.Path)
|
||||
@@ -1341,21 +1330,13 @@ func (manager *SnapshotManager) VerifySnapshot(snapshot *Snapshot) bool {
|
||||
}
|
||||
|
||||
// RetrieveFile retrieves the file in the specified snapshot.
|
||||
func (manager *SnapshotManager) RetrieveFile(snapshot *Snapshot, file *Entry, output func([]byte)) bool {
|
||||
func (manager *SnapshotManager) RetrieveFile(snapshot *Snapshot, file *Entry, lastChunk **Chunk, output func([]byte)) bool {
|
||||
|
||||
if file.Size == 0 {
|
||||
return true
|
||||
}
|
||||
|
||||
manager.CreateChunkDownloader()
|
||||
|
||||
// Temporarily disable the snapshot cache of the download so that downloaded file chunks won't be saved
|
||||
// to the cache.
|
||||
snapshotCache := manager.chunkDownloader.snapshotCache
|
||||
manager.chunkDownloader.snapshotCache = nil
|
||||
defer func() {
|
||||
manager.chunkDownloader.snapshotCache = snapshotCache
|
||||
}()
|
||||
manager.CreateChunkOperator(false, 1, false)
|
||||
|
||||
fileHasher := manager.config.NewFileHasher()
|
||||
alternateHash := false
|
||||
@@ -1376,12 +1357,19 @@ func (manager *SnapshotManager) RetrieveFile(snapshot *Snapshot, file *Entry, ou
|
||||
}
|
||||
|
||||
hash := snapshot.ChunkHashes[i]
|
||||
lastChunk, lastChunkHash := manager.chunkDownloader.GetLastDownloadedChunk()
|
||||
if lastChunkHash != hash {
|
||||
i := manager.chunkDownloader.AddChunk(hash)
|
||||
chunk = manager.chunkDownloader.WaitForChunk(i)
|
||||
if lastChunk == nil {
|
||||
chunk = manager.chunkOperator.Download(hash, 0, false)
|
||||
} else if *lastChunk == nil {
|
||||
chunk = manager.chunkOperator.Download(hash, 0, false)
|
||||
*lastChunk = chunk
|
||||
} else {
|
||||
chunk = lastChunk
|
||||
if (*lastChunk).GetHash() == hash {
|
||||
chunk = *lastChunk
|
||||
} else {
|
||||
manager.config.PutChunk(*lastChunk)
|
||||
chunk = manager.chunkOperator.Download(hash, 0, false)
|
||||
*lastChunk = chunk
|
||||
}
|
||||
}
|
||||
|
||||
output(chunk.GetBytes()[start:end])
|
||||
@@ -1405,10 +1393,18 @@ func (manager *SnapshotManager) RetrieveFile(snapshot *Snapshot, file *Entry, ou
|
||||
|
||||
// FindFile returns the file entry that has the given file name.
|
||||
func (manager *SnapshotManager) FindFile(snapshot *Snapshot, filePath string, suppressError bool) *Entry {
|
||||
for _, entry := range snapshot.Files {
|
||||
|
||||
var found *Entry
|
||||
snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func (entry *Entry) bool {
|
||||
if entry.Path == filePath {
|
||||
return entry
|
||||
found = entry
|
||||
return false
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
if found != nil {
|
||||
return found
|
||||
}
|
||||
|
||||
if !suppressError {
|
||||
@@ -1440,13 +1436,8 @@ func (manager *SnapshotManager) PrintFile(snapshotID string, revision int, path
|
||||
return false
|
||||
}
|
||||
|
||||
patterns := []string{}
|
||||
if path != "" {
|
||||
patterns = []string{path}
|
||||
}
|
||||
|
||||
// If no path is specified, we're printing the snapshot so we need all attributes
|
||||
if !manager.DownloadSnapshotContents(snapshot, patterns, path == "") {
|
||||
// If no path is specified, we're printing the snapshot
|
||||
if !manager.DownloadSnapshotSequences(snapshot) {
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -1456,7 +1447,7 @@ func (manager *SnapshotManager) PrintFile(snapshotID string, revision int, path
|
||||
}
|
||||
|
||||
file := manager.FindFile(snapshot, path, false)
|
||||
if !manager.RetrieveFile(snapshot, file, func(chunk []byte) {
|
||||
if !manager.RetrieveFile(snapshot, file, nil, func(chunk []byte) {
|
||||
fmt.Printf("%s", chunk)
|
||||
}) {
|
||||
LOG_ERROR("SNAPSHOT_RETRIEVE", "File %s is corrupted in snapshot %s at revision %d",
|
||||
@@ -1474,22 +1465,38 @@ func (manager *SnapshotManager) Diff(top string, snapshotID string, revisions []
|
||||
LOG_DEBUG("DIFF_PARAMETERS", "top: %s, id: %s, revision: %v, path: %s, compareByHash: %t",
|
||||
top, snapshotID, revisions, filePath, compareByHash)
|
||||
|
||||
manager.CreateChunkOperator(false, 1, false)
|
||||
defer func() {
|
||||
manager.chunkOperator.Stop()
|
||||
manager.chunkOperator = nil
|
||||
} ()
|
||||
|
||||
var leftSnapshot *Snapshot
|
||||
var rightSnapshot *Snapshot
|
||||
var err error
|
||||
|
||||
leftSnapshotFiles := make([]*Entry, 0, 1024)
|
||||
rightSnapshotFiles := make([]*Entry, 0, 1024)
|
||||
|
||||
// If no or only one revision is specified, use the on-disk version for the right-hand side.
|
||||
if len(revisions) <= 1 {
|
||||
// Only scan the repository if filePath is not provided
|
||||
if len(filePath) == 0 {
|
||||
rightSnapshot, _, _, err = CreateSnapshotFromDirectory(snapshotID, top, nobackupFile, filtersFile, excludeByAttribute)
|
||||
if err != nil {
|
||||
LOG_ERROR("SNAPSHOT_LIST", "Failed to list the directory %s: %v", top, err)
|
||||
return false
|
||||
rightSnapshot = CreateEmptySnapshot(snapshotID)
|
||||
localListingChannel := make(chan *Entry)
|
||||
go func() {
|
||||
defer CatchLogException()
|
||||
rightSnapshot.ListLocalFiles(top, nobackupFile, filtersFile, excludeByAttribute, localListingChannel, nil, nil)
|
||||
} ()
|
||||
|
||||
for entry := range localListingChannel {
|
||||
entry.Attributes = nil // attributes are not compared
|
||||
rightSnapshotFiles = append(rightSnapshotFiles, entry)
|
||||
}
|
||||
|
||||
}
|
||||
} else {
|
||||
rightSnapshot = manager.DownloadSnapshot(snapshotID, revisions[1])
|
||||
manager.DownloadSnapshotSequences(rightSnapshot)
|
||||
}
|
||||
|
||||
// If no revision is specified, use the latest revision as the left-hand side.
|
||||
@@ -1503,15 +1510,11 @@ func (manager *SnapshotManager) Diff(top string, snapshotID string, revisions []
|
||||
leftSnapshot = manager.DownloadSnapshot(snapshotID, revisions[0])
|
||||
}
|
||||
|
||||
manager.DownloadSnapshotSequences(leftSnapshot)
|
||||
if len(filePath) > 0 {
|
||||
|
||||
manager.DownloadSnapshotContents(leftSnapshot, nil, false)
|
||||
if rightSnapshot != nil && rightSnapshot.Revision != 0 {
|
||||
manager.DownloadSnapshotContents(rightSnapshot, nil, false)
|
||||
}
|
||||
|
||||
var leftFile []byte
|
||||
if !manager.RetrieveFile(leftSnapshot, manager.FindFile(leftSnapshot, filePath, false), func(content []byte) {
|
||||
if !manager.RetrieveFile(leftSnapshot, manager.FindFile(leftSnapshot, filePath, false), nil, func(content []byte) {
|
||||
leftFile = append(leftFile, content...)
|
||||
}) {
|
||||
LOG_ERROR("SNAPSHOT_DIFF", "File %s is corrupted in snapshot %s at revision %d",
|
||||
@@ -1521,7 +1524,7 @@ func (manager *SnapshotManager) Diff(top string, snapshotID string, revisions []
|
||||
|
||||
var rightFile []byte
|
||||
if rightSnapshot != nil {
|
||||
if !manager.RetrieveFile(rightSnapshot, manager.FindFile(rightSnapshot, filePath, false), func(content []byte) {
|
||||
if !manager.RetrieveFile(rightSnapshot, manager.FindFile(rightSnapshot, filePath, false), nil, func(content []byte) {
|
||||
rightFile = append(rightFile, content...)
|
||||
}) {
|
||||
LOG_ERROR("SNAPSHOT_DIFF", "File %s is corrupted in snapshot %s at revision %d",
|
||||
@@ -1582,24 +1585,32 @@ func (manager *SnapshotManager) Diff(top string, snapshotID string, revisions []
|
||||
return true
|
||||
}
|
||||
|
||||
// We only need to decode the 'files' sequence, not 'chunkhashes' or 'chunklengthes'
|
||||
manager.DownloadSnapshotFileSequence(leftSnapshot, nil, false)
|
||||
if rightSnapshot != nil && rightSnapshot.Revision != 0 {
|
||||
manager.DownloadSnapshotFileSequence(rightSnapshot, nil, false)
|
||||
leftSnapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func(entry *Entry) bool {
|
||||
entry.Attributes = nil
|
||||
leftSnapshotFiles = append(leftSnapshotFiles, entry)
|
||||
return true
|
||||
})
|
||||
|
||||
if rightSnapshot.Revision != 0 {
|
||||
rightSnapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func(entry *Entry) bool {
|
||||
entry.Attributes = nil
|
||||
rightSnapshotFiles = append(rightSnapshotFiles, entry)
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
maxSize := int64(9)
|
||||
maxSizeDigits := 1
|
||||
|
||||
// Find the max Size value in order for pretty alignment.
|
||||
for _, file := range leftSnapshot.Files {
|
||||
for _, file := range leftSnapshotFiles {
|
||||
for !file.IsDir() && file.Size > maxSize {
|
||||
maxSize = maxSize*10 + 9
|
||||
maxSizeDigits += 1
|
||||
}
|
||||
}
|
||||
|
||||
for _, file := range rightSnapshot.Files {
|
||||
for _, file := range rightSnapshotFiles {
|
||||
for !file.IsDir() && file.Size > maxSize {
|
||||
maxSize = maxSize*10 + 9
|
||||
maxSizeDigits += 1
|
||||
@@ -1609,22 +1620,22 @@ func (manager *SnapshotManager) Diff(top string, snapshotID string, revisions []
|
||||
buffer := make([]byte, 32*1024)
|
||||
|
||||
var i, j int
|
||||
for i < len(leftSnapshot.Files) || j < len(rightSnapshot.Files) {
|
||||
for i < len(leftSnapshotFiles) || j < len(rightSnapshotFiles) {
|
||||
|
||||
if i >= len(leftSnapshot.Files) {
|
||||
if rightSnapshot.Files[j].IsFile() {
|
||||
LOG_INFO("SNAPSHOT_DIFF", "+ %s", rightSnapshot.Files[j].String(maxSizeDigits))
|
||||
if i >= len(leftSnapshotFiles) {
|
||||
if rightSnapshotFiles[j].IsFile() {
|
||||
LOG_INFO("SNAPSHOT_DIFF", "+ %s", rightSnapshotFiles[j].String(maxSizeDigits))
|
||||
}
|
||||
j++
|
||||
} else if j >= len(rightSnapshot.Files) {
|
||||
if leftSnapshot.Files[i].IsFile() {
|
||||
LOG_INFO("SNAPSHOT_DIFF", "- %s", leftSnapshot.Files[i].String(maxSizeDigits))
|
||||
} else if j >= len(rightSnapshotFiles) {
|
||||
if leftSnapshotFiles[i].IsFile() {
|
||||
LOG_INFO("SNAPSHOT_DIFF", "- %s", leftSnapshotFiles[i].String(maxSizeDigits))
|
||||
}
|
||||
i++
|
||||
} else {
|
||||
|
||||
left := leftSnapshot.Files[i]
|
||||
right := rightSnapshot.Files[j]
|
||||
left := leftSnapshotFiles[i]
|
||||
right := rightSnapshotFiles[j]
|
||||
|
||||
if !left.IsFile() {
|
||||
i++
|
||||
@@ -1679,6 +1690,12 @@ func (manager *SnapshotManager) ShowHistory(top string, snapshotID string, revis
|
||||
LOG_DEBUG("HISTORY_PARAMETERS", "top: %s, id: %s, revisions: %v, path: %s, showLocalHash: %t",
|
||||
top, snapshotID, revisions, filePath, showLocalHash)
|
||||
|
||||
manager.CreateChunkOperator(false, 1, false)
|
||||
defer func() {
|
||||
manager.chunkOperator.Stop()
|
||||
manager.chunkOperator = nil
|
||||
} ()
|
||||
|
||||
var err error
|
||||
|
||||
if len(revisions) == 0 {
|
||||
@@ -1693,7 +1710,7 @@ func (manager *SnapshotManager) ShowHistory(top string, snapshotID string, revis
|
||||
sort.Ints(revisions)
|
||||
for _, revision := range revisions {
|
||||
snapshot := manager.DownloadSnapshot(snapshotID, revision)
|
||||
manager.DownloadSnapshotFileSequence(snapshot, nil, false)
|
||||
manager.DownloadSnapshotSequences(snapshot)
|
||||
file := manager.FindFile(snapshot, filePath, true)
|
||||
|
||||
if file != nil {
|
||||
@@ -1801,8 +1818,11 @@ func (manager *SnapshotManager) PruneSnapshots(selfID string, snapshotID string,
|
||||
LOG_WARN("DELETE_OPTIONS", "Tags or retention policy will be ignored if at least one revision is specified")
|
||||
}
|
||||
|
||||
manager.chunkOperator = CreateChunkOperator(manager.storage, threads)
|
||||
defer manager.chunkOperator.Stop()
|
||||
manager.CreateChunkOperator(false, threads, false)
|
||||
defer func() {
|
||||
manager.chunkOperator.Stop()
|
||||
manager.chunkOperator = nil
|
||||
} ()
|
||||
|
||||
prefPath := GetDuplicacyPreferencePath()
|
||||
logDir := path.Join(prefPath, "logs")
|
||||
@@ -2184,7 +2204,7 @@ func (manager *SnapshotManager) PruneSnapshots(selfID string, snapshotID string,
|
||||
return false
|
||||
}
|
||||
|
||||
manager.chunkOperator.Stop()
|
||||
manager.chunkOperator.WaitForCompletion()
|
||||
for _, fossil := range manager.chunkOperator.fossils {
|
||||
collection.AddFossil(fossil)
|
||||
}
|
||||
@@ -2265,6 +2285,7 @@ func (manager *SnapshotManager) PruneSnapshots(selfID string, snapshotID string,
|
||||
} else {
|
||||
manager.CleanSnapshotCache(nil, allSnapshots)
|
||||
}
|
||||
manager.chunkOperator.WaitForCompletion()
|
||||
|
||||
return true
|
||||
}
|
||||
@@ -2477,8 +2498,6 @@ func (manager *SnapshotManager) pruneSnapshotsExhaustive(referencedFossils map[s
|
||||
// CheckSnapshot performs sanity checks on the given snapshot.
|
||||
func (manager *SnapshotManager) CheckSnapshot(snapshot *Snapshot) (err error) {
|
||||
|
||||
lastChunk := 0
|
||||
lastOffset := 0
|
||||
var lastEntry *Entry
|
||||
|
||||
numberOfChunks := len(snapshot.ChunkHashes)
|
||||
@@ -2488,57 +2507,39 @@ func (manager *SnapshotManager) CheckSnapshot(snapshot *Snapshot) (err error) {
|
||||
numberOfChunks, len(snapshot.ChunkLengths))
|
||||
}
|
||||
|
||||
entries := make([]*Entry, len(snapshot.Files))
|
||||
copy(entries, snapshot.Files)
|
||||
sort.Sort(ByChunk(entries))
|
||||
snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func (entry *Entry) bool {
|
||||
|
||||
for _, entry := range snapshot.Files {
|
||||
if lastEntry != nil && lastEntry.Compare(entry) >= 0 && !strings.Contains(lastEntry.Path, "\ufffd") {
|
||||
return fmt.Errorf("The entry %s appears before the entry %s", lastEntry.Path, entry.Path)
|
||||
err = fmt.Errorf("The entry %s appears before the entry %s", lastEntry.Path, entry.Path)
|
||||
return false
|
||||
}
|
||||
lastEntry = entry
|
||||
}
|
||||
|
||||
for _, entry := range entries {
|
||||
|
||||
if !entry.IsFile() || entry.Size == 0 {
|
||||
continue
|
||||
return true
|
||||
}
|
||||
|
||||
if entry.StartChunk < 0 {
|
||||
return fmt.Errorf("The file %s starts at chunk %d", entry.Path, entry.StartChunk)
|
||||
err = fmt.Errorf("The file %s starts at chunk %d", entry.Path, entry.StartChunk)
|
||||
return false
|
||||
}
|
||||
|
||||
if entry.EndChunk >= numberOfChunks {
|
||||
return fmt.Errorf("The file %s ends at chunk %d while the number of chunks is %d",
|
||||
err = fmt.Errorf("The file %s ends at chunk %d while the number of chunks is %d",
|
||||
entry.Path, entry.EndChunk, numberOfChunks)
|
||||
return false
|
||||
}
|
||||
|
||||
if entry.EndChunk < entry.StartChunk {
|
||||
return fmt.Errorf("The file %s starts at chunk %d and ends at chunk %d",
|
||||
fmt.Errorf("The file %s starts at chunk %d and ends at chunk %d",
|
||||
entry.Path, entry.StartChunk, entry.EndChunk)
|
||||
return false
|
||||
}
|
||||
|
||||
if entry.StartOffset > 0 {
|
||||
if entry.StartChunk < lastChunk {
|
||||
return fmt.Errorf("The file %s starts at chunk %d while the last chunk is %d",
|
||||
entry.Path, entry.StartChunk, lastChunk)
|
||||
}
|
||||
|
||||
if entry.StartChunk > lastChunk+1 {
|
||||
return fmt.Errorf("The file %s starts at chunk %d while the last chunk is %d",
|
||||
entry.Path, entry.StartChunk, lastChunk)
|
||||
}
|
||||
|
||||
if entry.StartChunk == lastChunk && entry.StartOffset < lastOffset {
|
||||
return fmt.Errorf("The file %s starts at offset %d of chunk %d while the last file ends at offset %d",
|
||||
entry.Path, entry.StartOffset, entry.StartChunk, lastOffset)
|
||||
}
|
||||
|
||||
if entry.StartChunk == entry.EndChunk && entry.StartOffset > entry.EndOffset {
|
||||
return fmt.Errorf("The file %s starts at offset %d and ends at offset %d of the same chunk %d",
|
||||
entry.Path, entry.StartOffset, entry.EndOffset, entry.StartChunk)
|
||||
}
|
||||
if entry.StartChunk == entry.EndChunk && entry.StartOffset > entry.EndOffset {
|
||||
err = fmt.Errorf("The file %s starts at offset %d and ends at offset %d of the same chunk %d",
|
||||
entry.Path, entry.StartOffset, entry.EndOffset, entry.StartChunk)
|
||||
return false
|
||||
}
|
||||
|
||||
fileSize := int64(0)
|
||||
@@ -2558,22 +2559,13 @@ func (manager *SnapshotManager) CheckSnapshot(snapshot *Snapshot) (err error) {
|
||||
}
|
||||
|
||||
if entry.Size != fileSize {
|
||||
return fmt.Errorf("The file %s has a size of %d but the total size of chunks is %d",
|
||||
err = fmt.Errorf("The file %s has a size of %d but the total size of chunks is %d",
|
||||
entry.Path, entry.Size, fileSize)
|
||||
return false
|
||||
}
|
||||
|
||||
lastChunk = entry.EndChunk
|
||||
lastOffset = entry.EndOffset
|
||||
}
|
||||
|
||||
if len(entries) > 0 && entries[0].StartChunk != 0 {
|
||||
return fmt.Errorf("The first file starts at chunk %d", entries[0].StartChunk)
|
||||
}
|
||||
|
||||
// There may be a last chunk whose size is 0 so we allow this to happen
|
||||
if lastChunk < numberOfChunks-2 {
|
||||
return fmt.Errorf("The last file ends at chunk %d but the number of chunks is %d", lastChunk, numberOfChunks)
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -116,19 +116,18 @@ func createTestSnapshotManager(testDir string) *SnapshotManager {
|
||||
|
||||
func uploadTestChunk(manager *SnapshotManager, content []byte) string {
|
||||
|
||||
completionFunc := func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) {
|
||||
chunkOperator := CreateChunkOperator(manager.config, manager.storage, nil, false, testThreads, false)
|
||||
chunkOperator.UploadCompletionFunc = func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) {
|
||||
LOG_INFO("UPLOAD_CHUNK", "Chunk %s size %d uploaded", chunk.GetID(), chunkSize)
|
||||
}
|
||||
|
||||
chunkUploader := CreateChunkUploader(manager.config, manager.storage, nil, testThreads, nil)
|
||||
chunkUploader.completionFunc = completionFunc
|
||||
chunkUploader.Start()
|
||||
|
||||
chunk := CreateChunk(manager.config, true)
|
||||
chunk.Reset(true)
|
||||
chunk.Write(content)
|
||||
chunkUploader.StartChunk(chunk, 0)
|
||||
chunkUploader.Stop()
|
||||
|
||||
chunkOperator.Upload(chunk, 0, false)
|
||||
chunkOperator.WaitForCompletion()
|
||||
chunkOperator.Stop()
|
||||
|
||||
return chunk.GetHash()
|
||||
}
|
||||
@@ -180,6 +179,12 @@ func createTestSnapshot(manager *SnapshotManager, snapshotID string, revision in
|
||||
|
||||
func checkTestSnapshots(manager *SnapshotManager, expectedSnapshots int, expectedFossils int) {
|
||||
|
||||
manager.CreateChunkOperator(false, 1, false)
|
||||
defer func() {
|
||||
manager.chunkOperator.Stop()
|
||||
manager.chunkOperator = nil
|
||||
}()
|
||||
|
||||
var snapshotIDs []string
|
||||
var err error
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
"runtime"
|
||||
|
||||
"github.com/gilbertchen/gopass"
|
||||
"golang.org/x/crypto/pbkdf2"
|
||||
@@ -460,3 +461,16 @@ func AtoSize(sizeString string) int {
|
||||
|
||||
return size
|
||||
}
|
||||
|
||||
func PrintMemoryUsage() {
|
||||
|
||||
for {
|
||||
var m runtime.MemStats
|
||||
runtime.ReadMemStats(&m)
|
||||
|
||||
LOG_INFO("MEMORY_STATS", "Currently allocated: %s, total allocated: %s, system memory: %s, number of GCs: %d",
|
||||
PrettySize(int64(m.Alloc)), PrettySize(int64(m.TotalAlloc)), PrettySize(int64(m.Sys)), m.NumGC)
|
||||
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
}
|
||||
@@ -52,11 +52,11 @@ func (entry *Entry) ReadAttributes(top string) {
|
||||
fullPath := filepath.Join(top, entry.Path)
|
||||
attributes, _ := xattr.List(fullPath)
|
||||
if len(attributes) > 0 {
|
||||
entry.Attributes = make(map[string][]byte)
|
||||
entry.Attributes = &map[string][]byte{}
|
||||
for _, name := range attributes {
|
||||
attribute, err := xattr.Get(fullPath, name)
|
||||
if err == nil {
|
||||
entry.Attributes[name] = attribute
|
||||
(*entry.Attributes)[name] = attribute
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -68,19 +68,19 @@ func (entry *Entry) SetAttributesToFile(fullPath string) {
|
||||
for _, name := range names {
|
||||
|
||||
|
||||
newAttribute, found := entry.Attributes[name]
|
||||
newAttribute, found := (*entry.Attributes)[name]
|
||||
if found {
|
||||
oldAttribute, _ := xattr.Get(fullPath, name)
|
||||
if !bytes.Equal(oldAttribute, newAttribute) {
|
||||
xattr.Set(fullPath, name, newAttribute)
|
||||
}
|
||||
delete(entry.Attributes, name)
|
||||
delete(*entry.Attributes, name)
|
||||
} else {
|
||||
xattr.Remove(fullPath, name)
|
||||
}
|
||||
}
|
||||
|
||||
for name, attribute := range entry.Attributes {
|
||||
for name, attribute := range *entry.Attributes {
|
||||
xattr.Set(fullPath, name, attribute)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user