1
0
mirror of https://github.com/gilbertchen/duplicacy synced 2025-12-06 00:03:38 +00:00

Rewrite the backup procedure to reduce memory usage

Main changes:

* Change the listing order of files/directories so that the local and remote
  snapshots can be compared on-the-fly.

* Introduce a new struct called EntryList that maintains a list of
  files/directories, which are kept in memory when the number is lower, and
  serialized into a file when there are too many.

* EntryList can also be turned into an on-disk incomplete snapshot quickly,
  to support fast-resume on next run.

* ChunkOperator can now download and upload chunks, thus replacing original
  ChunkDownloader and ChunkUploader.  The new ChunkDownloader is only used
  to prefetch chunks during the restore operation.
This commit is contained in:
Gilbert Chen
2021-10-24 23:34:49 -04:00
parent f83e4f3c44
commit d9f6545d63
20 changed files with 2762 additions and 1749 deletions

View File

@@ -147,6 +147,10 @@ func setGlobalOptions(context *cli.Context) {
duplicacy.SetLoggingLevel(duplicacy.DEBUG)
}
if context.GlobalBool("print-memory-usage") {
go duplicacy.PrintMemoryUsage()
}
ScriptEnabled = true
if context.GlobalBool("no-script") {
ScriptEnabled = false
@@ -781,7 +785,10 @@ func backupRepository(context *cli.Context) {
backupManager.SetupSnapshotCache(preference.Name)
backupManager.SetDryRun(dryRun)
backupManager.Backup(repository, quickMode, threads, context.String("t"), showStatistics, enableVSS, vssTimeout, enumOnly)
metadataChunkSize := context.Int("metadata-chunk-size")
maximumInMemoryEntries := context.Int("max-in-memory-entries")
backupManager.Backup(repository, quickMode, threads, context.String("t"), showStatistics, enableVSS, vssTimeout, enumOnly, metadataChunkSize, maximumInMemoryEntries)
runScript(context, preference.Name, "post")
}
@@ -1506,6 +1513,19 @@ func main() {
Name: "enum-only",
Usage: "enumerate the repository recursively and then exit",
},
cli.IntFlag{
Name: "metadata-chunk-size",
Value: 1024 * 1024,
Usage: "the average size of metadata chunks (defaults to 1M)",
Argument: "<size>",
},
cli.IntFlag{
Name: "max-in-memory-entries",
Value: 1024 * 1024,
Usage: "the maximum number of entries kept in memory (defaults to 1M)",
Argument: "<number>",
},
},
Usage: "Save a snapshot of the repository to the storage",
ArgsUsage: " ",
@@ -2180,6 +2200,10 @@ func main() {
Usage: "suppress logs with the specified id",
Argument: "<id>",
},
cli.BoolFlag{
Name: "print-memory-usage",
Usage: "print memory usage every second",
},
}
app.HideVersion = true

File diff suppressed because it is too large Load Diff

View File

@@ -257,7 +257,7 @@ func TestBackupManager(t *testing.T) {
backupManager.SetupSnapshotCache("default")
SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy")
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false)
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false, 1024, 1024)
time.Sleep(time.Duration(delay) * time.Second)
SetDuplicacyPreferencePath(testDir + "/repository2/.duplicacy")
failedFiles := backupManager.Restore(testDir+"/repository2", threads /*inPlace=*/, false /*quickMode=*/, false, threads /*overwrite=*/, true,
@@ -282,7 +282,7 @@ func TestBackupManager(t *testing.T) {
modifyFile(testDir+"/repository1/dir1/file3", 0.3)
SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy")
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "second", false, false, 0, false)
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "second", false, false, 0, false, 1024, 1024)
time.Sleep(time.Duration(delay) * time.Second)
SetDuplicacyPreferencePath(testDir + "/repository2/.duplicacy")
failedFiles = backupManager.Restore(testDir+"/repository2", 2 /*inPlace=*/, true /*quickMode=*/, true, threads /*overwrite=*/, true,
@@ -303,7 +303,7 @@ func TestBackupManager(t *testing.T) {
os.Mkdir(testDir+"/repository1/dir2/dir3", 0700)
os.Mkdir(testDir+"/repository1/dir4", 0700)
SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy")
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, false, threads, "third", false, false, 0, false)
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, false, threads, "third", false, false, 0, false, 1024, 1024)
time.Sleep(time.Duration(delay) * time.Second)
// Create some directories and files under repository2 that will be deleted during restore
@@ -368,7 +368,7 @@ func TestBackupManager(t *testing.T) {
}
backupManager.SnapshotManager.CheckSnapshots( /*snapshotID*/ "host1" /*revisions*/, []int{2, 3} /*tag*/, "",
/*showStatistics*/ false /*showTabular*/, false /*checkFiles*/, false /*checkChunks*/, false /*searchFossils*/, false /*resurrect*/, false, 1 /*allowFailures*/, false)
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, false, threads, "fourth", false, false, 0, false)
backupManager.Backup(testDir+"/repository1" /*quickMode=*/, false, threads, "fourth", false, false, 0, false, 1024, 1024)
backupManager.SnapshotManager.PruneSnapshots("host1", "host1" /*revisions*/, nil /*tags*/, nil /*retentions*/, nil,
/*exhaustive*/ false /*exclusive=*/, true /*ignoredIDs*/, nil /*dryRun*/, false /*deleteOnly*/, false /*collectOnly*/, false, 1)
numberOfSnapshots = backupManager.SnapshotManager.ListSnapshots( /*snapshotID*/ "host1" /*revisionsToList*/, nil /*tag*/, "" /*showFiles*/, false /*showChunks*/, false)
@@ -533,7 +533,7 @@ func TestPersistRestore(t *testing.T) {
unencBackupManager.SetupSnapshotCache("default")
SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy")
unencBackupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false)
unencBackupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false, 1024, 1024)
time.Sleep(time.Duration(delay) * time.Second)
@@ -543,7 +543,7 @@ func TestPersistRestore(t *testing.T) {
encBackupManager.SetupSnapshotCache("default")
SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy")
encBackupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false)
encBackupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false, 1024, 1024)
time.Sleep(time.Duration(delay) * time.Second)

View File

@@ -29,29 +29,29 @@ func benchmarkSplit(reader *bytes.Reader, fileSize int64, chunkSize int, compres
config.HashKey = DEFAULT_KEY
config.IDKey = DEFAULT_KEY
maker := CreateChunkMaker(config, false)
maker := CreateFileChunkMaker(config, false)
startTime := float64(time.Now().UnixNano()) / 1e9
numberOfChunks := 0
reader.Seek(0, os.SEEK_SET)
maker.ForEachChunk(reader,
func(chunk *Chunk, final bool) {
if compression {
key := ""
if encryption {
key = "0123456789abcdef0123456789abcdef"
}
err := chunk.Encrypt([]byte(key), "", false)
if err != nil {
LOG_ERROR("BENCHMARK_ENCRYPT", "Failed to encrypt the chunk: %v", err)
}
chunkFunc := func(chunk *Chunk) {
if compression {
key := ""
if encryption {
key = "0123456789abcdef0123456789abcdef"
}
config.PutChunk(chunk)
numberOfChunks++
},
func(size int64, hash string) (io.Reader, bool) {
return nil, false
})
err := chunk.Encrypt([]byte(key), "", false)
if err != nil {
LOG_ERROR("BENCHMARK_ENCRYPT", "Failed to encrypt the chunk: %v", err)
}
}
config.PutChunk(chunk)
numberOfChunks++
}
maker.AddData(reader, chunkFunc)
maker.AddData(nil, chunkFunc)
runningTime := float64(time.Now().UnixNano())/1e9 - startTime
speed := int64(float64(fileSize) / runningTime)

View File

@@ -65,8 +65,8 @@ type Chunk struct {
config *Config // Every chunk is associated with a Config object. Which hashing algorithm to use is determined
// by the config
isSnapshot bool // Indicates if the chunk is a snapshot chunk (instead of a file chunk). This is only used by RSA
// encryption, where a snapshot chunk is not encrypted by RSA
isMetadata bool // Indicates if the chunk is a metadata chunk (instead of a file chunk). This is primarily used by RSA
// encryption, where a metadata chunk is not encrypted by RSA
isBroken bool // Indicates the chunk did not download correctly. This is only used for -persist (allowFailures) mode
}
@@ -127,7 +127,7 @@ func (chunk *Chunk) Reset(hashNeeded bool) {
chunk.hash = nil
chunk.id = ""
chunk.size = 0
chunk.isSnapshot = false
chunk.isMetadata = false
chunk.isBroken = false
}
@@ -186,7 +186,7 @@ func (chunk *Chunk) VerifyID() {
// Encrypt encrypts the plain data stored in the chunk buffer. If derivationKey is not nil, the actual
// encryption key will be HMAC-SHA256(encryptionKey, derivationKey).
func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isSnapshot bool) (err error) {
func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isMetadata bool) (err error) {
var aesBlock cipher.Block
var gcm cipher.AEAD
@@ -203,8 +203,8 @@ func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isSnapsh
key := encryptionKey
usingRSA := false
// Enable RSA encryption only when the chunk is not a snapshot chunk
if chunk.config.rsaPublicKey != nil && !isSnapshot && !chunk.isSnapshot {
// Enable RSA encryption only when the chunk is not a metadata chunk
if chunk.config.rsaPublicKey != nil && !isMetadata && !chunk.isMetadata {
randomKey := make([]byte, 32)
_, err := rand.Read(randomKey)
if err != nil {

View File

@@ -5,7 +5,6 @@
package duplicacy
import (
"io"
"sync/atomic"
"time"
)
@@ -20,78 +19,47 @@ type ChunkDownloadTask struct {
isDownloading bool // 'true' means the chunk has been downloaded or is being downloaded
}
// ChunkDownloadCompletion represents the nofication when a chunk has been downloaded.
type ChunkDownloadCompletion struct {
chunkIndex int // The index of this chunk in the chunk list
chunk *Chunk // The chunk that has been downloaded
chunk *Chunk
chunkIndex int
}
// ChunkDownloader is capable of performing multi-threaded downloading. Chunks to be downloaded are first organized
// ChunkDownloader is a wrapper of ChunkOperator and is only used by the restore procedure.capable of performing multi-threaded downloading. Chunks to be downloaded are first organized
// as a list of ChunkDownloadTasks, with only the chunkHash field initialized. When a chunk is needed, the
// corresponding ChunkDownloadTask is sent to the dowloading goroutine. Once a chunk is downloaded, it will be
// inserted in the completed task list.
type ChunkDownloader struct {
operator *ChunkOperator
totalChunkSize int64 // Total chunk size
downloadedChunkSize int64 // Downloaded chunk size
config *Config // Associated config
storage Storage // Download from this storage
snapshotCache *FileStorage // Used as cache if not nil; usually for downloading snapshot chunks
showStatistics bool // Show a stats log for each chunk if true
threads int // Number of threads
allowFailures bool // Whether to failfast on download error, or continue
taskList []ChunkDownloadTask // The list of chunks to be downloaded
completedTasks map[int]bool // Store downloaded chunks
lastChunkIndex int // a monotonically increasing number indicating the last chunk to be downloaded
taskQueue chan ChunkDownloadTask // Downloading goroutines are waiting on this channel for input
stopChannel chan bool // Used to stop the dowloading goroutines
completionChannel chan ChunkDownloadCompletion // A downloading goroutine sends back the chunk via this channel after downloading
startTime int64 // The time it starts downloading
numberOfDownloadedChunks int // The number of chunks that have been downloaded
numberOfDownloadingChunks int // The number of chunks still being downloaded
numberOfActiveChunks int // The number of chunks that is being downloaded or has been downloaded but not reclaimed
NumberOfFailedChunks int // The number of chunks that can't be downloaded
}
func CreateChunkDownloader(config *Config, storage Storage, snapshotCache *FileStorage, showStatistics bool, threads int, allowFailures bool) *ChunkDownloader {
func CreateChunkDownloader(operator *ChunkOperator) *ChunkDownloader {
downloader := &ChunkDownloader{
config: config,
storage: storage,
snapshotCache: snapshotCache,
showStatistics: showStatistics,
threads: threads,
allowFailures: allowFailures,
operator: operator,
taskList: nil,
completedTasks: make(map[int]bool),
lastChunkIndex: 0,
taskQueue: make(chan ChunkDownloadTask, threads),
stopChannel: make(chan bool),
completionChannel: make(chan ChunkDownloadCompletion),
startTime: time.Now().Unix(),
}
// Start the downloading goroutines
for i := 0; i < downloader.threads; i++ {
go func(threadIndex int) {
defer CatchLogException()
for {
select {
case task := <-downloader.taskQueue:
downloader.Download(threadIndex, task)
case <-downloader.stopChannel:
return
}
}
}(i)
}
return downloader
}
@@ -129,26 +97,6 @@ func (downloader *ChunkDownloader) AddFiles(snapshot *Snapshot, files []*Entry)
}
}
// AddChunk adds a single chunk the download list.
func (downloader *ChunkDownloader) AddChunk(chunkHash string) int {
task := ChunkDownloadTask{
chunkIndex: len(downloader.taskList),
chunkHash: chunkHash,
chunkLength: 0,
needed: true,
isDownloading: false,
}
downloader.taskList = append(downloader.taskList, task)
if downloader.numberOfActiveChunks < downloader.threads {
downloader.taskQueue <- task
downloader.numberOfDownloadingChunks++
downloader.numberOfActiveChunks++
downloader.taskList[len(downloader.taskList)-1].isDownloading = true
}
return len(downloader.taskList) - 1
}
// Prefetch adds up to 'threads' chunks needed by a file to the download list
func (downloader *ChunkDownloader) Prefetch(file *Entry) {
@@ -159,20 +107,22 @@ func (downloader *ChunkDownloader) Prefetch(file *Entry) {
task := &downloader.taskList[i]
if task.needed {
if !task.isDownloading {
if downloader.numberOfActiveChunks >= downloader.threads {
if downloader.numberOfActiveChunks >= downloader.operator.threads {
return
}
LOG_DEBUG("DOWNLOAD_PREFETCH", "Prefetching %s chunk %s", file.Path,
downloader.config.GetChunkIDFromHash(task.chunkHash))
downloader.taskQueue <- *task
downloader.operator.config.GetChunkIDFromHash(task.chunkHash))
downloader.operator.DownloadAsync(task.chunkHash, i, false, func (chunk *Chunk, chunkIndex int) {
downloader.completionChannel <- ChunkDownloadCompletion { chunk: chunk, chunkIndex: chunkIndex }
})
task.isDownloading = true
downloader.numberOfDownloadingChunks++
downloader.numberOfActiveChunks++
}
} else {
LOG_DEBUG("DOWNLOAD_PREFETCH", "%s chunk %s is not needed", file.Path,
downloader.config.GetChunkIDFromHash(task.chunkHash))
downloader.operator.config.GetChunkIDFromHash(task.chunkHash))
}
}
}
@@ -186,7 +136,7 @@ func (downloader *ChunkDownloader) Reclaim(chunkIndex int) {
for i := range downloader.completedTasks {
if i < chunkIndex && downloader.taskList[i].chunk != nil {
downloader.config.PutChunk(downloader.taskList[i].chunk)
downloader.operator.config.PutChunk(downloader.taskList[i].chunk)
downloader.taskList[i].chunk = nil
delete(downloader.completedTasks, i)
downloader.numberOfActiveChunks--
@@ -222,8 +172,10 @@ func (downloader *ChunkDownloader) WaitForChunk(chunkIndex int) (chunk *Chunk) {
// If we haven't started download the specified chunk, download it now
if !downloader.taskList[chunkIndex].isDownloading {
LOG_DEBUG("DOWNLOAD_FETCH", "Fetching chunk %s",
downloader.config.GetChunkIDFromHash(downloader.taskList[chunkIndex].chunkHash))
downloader.taskQueue <- downloader.taskList[chunkIndex]
downloader.operator.config.GetChunkIDFromHash(downloader.taskList[chunkIndex].chunkHash))
downloader.operator.DownloadAsync(downloader.taskList[chunkIndex].chunkHash, chunkIndex, false, func (chunk *Chunk, chunkIndex int) {
downloader.completionChannel <- ChunkDownloadCompletion { chunk: chunk, chunkIndex: chunkIndex }
})
downloader.taskList[chunkIndex].isDownloading = true
downloader.numberOfDownloadingChunks++
downloader.numberOfActiveChunks++
@@ -231,7 +183,7 @@ func (downloader *ChunkDownloader) WaitForChunk(chunkIndex int) (chunk *Chunk) {
// We also need to look ahead and prefetch other chunks as many as permitted by the number of threads
for i := chunkIndex + 1; i < len(downloader.taskList); i++ {
if downloader.numberOfActiveChunks >= downloader.threads {
if downloader.numberOfActiveChunks >= downloader.operator.threads {
break
}
task := &downloader.taskList[i]
@@ -240,8 +192,10 @@ func (downloader *ChunkDownloader) WaitForChunk(chunkIndex int) (chunk *Chunk) {
}
if !task.isDownloading {
LOG_DEBUG("DOWNLOAD_PREFETCH", "Prefetching chunk %s", downloader.config.GetChunkIDFromHash(task.chunkHash))
downloader.taskQueue <- *task
LOG_DEBUG("DOWNLOAD_PREFETCH", "Prefetching chunk %s", downloader.operator.config.GetChunkIDFromHash(task.chunkHash))
downloader.operator.DownloadAsync(task.chunkHash, task.chunkIndex, false, func (chunk *Chunk, chunkIndex int) {
downloader.completionChannel <- ChunkDownloadCompletion { chunk: chunk, chunkIndex: chunkIndex }
})
task.isDownloading = true
downloader.numberOfDownloadingChunks++
downloader.numberOfActiveChunks++
@@ -255,9 +209,6 @@ func (downloader *ChunkDownloader) WaitForChunk(chunkIndex int) (chunk *Chunk) {
downloader.taskList[completion.chunkIndex].chunk = completion.chunk
downloader.numberOfDownloadedChunks++
downloader.numberOfDownloadingChunks--
if completion.chunk.isBroken {
downloader.NumberOfFailedChunks++
}
}
return downloader.taskList[chunkIndex].chunk
}
@@ -281,13 +232,10 @@ func (downloader *ChunkDownloader) WaitForCompletion() {
// Wait for a completion event first
if downloader.numberOfActiveChunks > 0 {
completion := <-downloader.completionChannel
downloader.config.PutChunk(completion.chunk)
downloader.operator.config.PutChunk(completion.chunk)
downloader.numberOfActiveChunks--
downloader.numberOfDownloadedChunks++
downloader.numberOfDownloadingChunks--
if completion.chunk.isBroken {
downloader.NumberOfFailedChunks++
}
}
// Pass the tasks one by one to the download queue
@@ -297,7 +245,9 @@ func (downloader *ChunkDownloader) WaitForCompletion() {
downloader.lastChunkIndex++
continue
}
downloader.taskQueue <- *task
downloader.operator.DownloadAsync(task.chunkHash, task.chunkIndex, false, func (chunk *Chunk, chunkIndex int) {
downloader.completionChannel <- ChunkDownloadCompletion { chunk: chunk, chunkIndex: chunkIndex }
})
task.isDownloading = true
downloader.numberOfDownloadingChunks++
downloader.numberOfActiveChunks++
@@ -306,213 +256,3 @@ func (downloader *ChunkDownloader) WaitForCompletion() {
}
}
// Stop terminates all downloading goroutines
func (downloader *ChunkDownloader) Stop() {
for downloader.numberOfDownloadingChunks > 0 {
completion := <-downloader.completionChannel
downloader.completedTasks[completion.chunkIndex] = true
downloader.taskList[completion.chunkIndex].chunk = completion.chunk
downloader.numberOfDownloadedChunks++
downloader.numberOfDownloadingChunks--
if completion.chunk.isBroken {
downloader.NumberOfFailedChunks++
}
}
for i := range downloader.completedTasks {
downloader.config.PutChunk(downloader.taskList[i].chunk)
downloader.taskList[i].chunk = nil
downloader.numberOfActiveChunks--
}
for i := 0; i < downloader.threads; i++ {
downloader.stopChannel <- true
}
}
// Download downloads a chunk from the storage.
func (downloader *ChunkDownloader) Download(threadIndex int, task ChunkDownloadTask) bool {
cachedPath := ""
chunk := downloader.config.GetChunk()
chunkID := downloader.config.GetChunkIDFromHash(task.chunkHash)
if downloader.snapshotCache != nil && downloader.storage.IsCacheNeeded() {
var exist bool
var err error
// Reset the chunk with a hasher -- we're reading from the cache where chunk are not encrypted or compressed
chunk.Reset(true)
cachedPath, exist, _, err = downloader.snapshotCache.FindChunk(threadIndex, chunkID, false)
if err != nil {
LOG_WARN("DOWNLOAD_CACHE", "Failed to find the cache path for the chunk %s: %v", chunkID, err)
} else if exist {
err = downloader.snapshotCache.DownloadFile(0, cachedPath, chunk)
if err != nil {
LOG_WARN("DOWNLOAD_CACHE", "Failed to load the chunk %s from the snapshot cache: %v", chunkID, err)
} else {
actualChunkID := chunk.GetID()
if actualChunkID != chunkID {
LOG_WARN("DOWNLOAD_CACHE_CORRUPTED",
"The chunk %s load from the snapshot cache has a hash id of %s", chunkID, actualChunkID)
} else {
LOG_DEBUG("CHUNK_CACHE", "Chunk %s has been loaded from the snapshot cache", chunkID)
downloader.completionChannel <- ChunkDownloadCompletion{chunk: chunk, chunkIndex: task.chunkIndex}
return false
}
}
}
}
// Reset the chunk without a hasher -- the downloaded content will be encrypted and/or compressed and the hasher
// will be set up before the encryption
chunk.Reset(false)
// If failures are allowed, complete the task properly
completeFailedChunk := func(chunk *Chunk) {
if downloader.allowFailures {
chunk.isBroken = true
downloader.completionChannel <- ChunkDownloadCompletion{chunk: chunk, chunkIndex: task.chunkIndex}
}
}
const MaxDownloadAttempts = 3
for downloadAttempt := 0; ; downloadAttempt++ {
// Find the chunk by ID first.
chunkPath, exist, _, err := downloader.storage.FindChunk(threadIndex, chunkID, false)
if err != nil {
completeFailedChunk(chunk)
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CHUNK", "Failed to find the chunk %s: %v", chunkID, err)
return false
}
if !exist {
// No chunk is found. Have to find it in the fossil pool again.
fossilPath, exist, _, err := downloader.storage.FindChunk(threadIndex, chunkID, true)
if err != nil {
completeFailedChunk(chunk)
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CHUNK", "Failed to find the chunk %s: %v", chunkID, err)
return false
}
if !exist {
retry := false
// Retry for Hubic or WebDAV as it may return 404 even when the chunk exists
if _, ok := downloader.storage.(*HubicStorage); ok {
retry = true
}
if _, ok := downloader.storage.(*WebDAVStorage); ok {
retry = true
}
if retry && downloadAttempt < MaxDownloadAttempts {
LOG_WARN("DOWNLOAD_RETRY", "Failed to find the chunk %s; retrying", chunkID)
continue
}
completeFailedChunk(chunk)
// A chunk is not found. This is a serious error and hopefully it will never happen.
if err != nil {
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CHUNK", "Chunk %s can't be found: %v", chunkID, err)
} else {
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CHUNK", "Chunk %s can't be found", chunkID)
}
return false
}
// We can't download the fossil directly. We have to turn it back into a regular chunk and try
// downloading again.
err = downloader.storage.MoveFile(threadIndex, fossilPath, chunkPath)
if err != nil {
completeFailedChunk(chunk)
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CHUNK", "Failed to resurrect chunk %s: %v", chunkID, err)
return false
}
LOG_WARN("DOWNLOAD_RESURRECT", "Fossil %s has been resurrected", chunkID)
continue
}
err = downloader.storage.DownloadFile(threadIndex, chunkPath, chunk)
if err != nil {
_, isHubic := downloader.storage.(*HubicStorage)
// Retry on EOF or if it is a Hubic backend as it may return 404 even when the chunk exists
if (err == io.ErrUnexpectedEOF || isHubic) && downloadAttempt < MaxDownloadAttempts {
LOG_WARN("DOWNLOAD_RETRY", "Failed to download the chunk %s: %v; retrying", chunkID, err)
chunk.Reset(false)
continue
} else {
completeFailedChunk(chunk)
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CHUNK", "Failed to download the chunk %s: %v", chunkID, err)
return false
}
}
err = chunk.Decrypt(downloader.config.ChunkKey, task.chunkHash)
if err != nil {
if downloadAttempt < MaxDownloadAttempts {
LOG_WARN("DOWNLOAD_RETRY", "Failed to decrypt the chunk %s: %v; retrying", chunkID, err)
chunk.Reset(false)
continue
} else {
completeFailedChunk(chunk)
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_DECRYPT", "Failed to decrypt the chunk %s: %v", chunkID, err)
return false
}
}
actualChunkID := chunk.GetID()
if actualChunkID != chunkID {
if downloadAttempt < MaxDownloadAttempts {
LOG_WARN("DOWNLOAD_RETRY", "The chunk %s has a hash id of %s; retrying", chunkID, actualChunkID)
chunk.Reset(false)
continue
} else {
completeFailedChunk(chunk)
LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CORRUPTED", "The chunk %s has a hash id of %s", chunkID, actualChunkID)
return false
}
}
break
}
if len(cachedPath) > 0 {
// Save a copy to the local snapshot cache
err := downloader.snapshotCache.UploadFile(threadIndex, cachedPath, chunk.GetBytes())
if err != nil {
LOG_WARN("DOWNLOAD_CACHE", "Failed to add the chunk %s to the snapshot cache: %v", chunkID, err)
}
}
downloadedChunkSize := atomic.AddInt64(&downloader.downloadedChunkSize, int64(chunk.GetLength()))
if (downloader.showStatistics || IsTracing()) && downloader.totalChunkSize > 0 {
now := time.Now().Unix()
if now <= downloader.startTime {
now = downloader.startTime + 1
}
speed := downloadedChunkSize / (now - downloader.startTime)
remainingTime := int64(0)
if speed > 0 {
remainingTime = (downloader.totalChunkSize-downloadedChunkSize)/speed + 1
}
percentage := float32(downloadedChunkSize * 1000 / downloader.totalChunkSize)
LOG_INFO("DOWNLOAD_PROGRESS", "Downloaded chunk %d size %d, %sB/s %s %.1f%%",
task.chunkIndex+1, chunk.GetLength(),
PrettySize(speed), PrettyTime(remainingTime), percentage/10)
} else {
LOG_DEBUG("CHUNK_DOWNLOAD", "Chunk %s has been downloaded", chunkID)
}
downloader.completionChannel <- ChunkDownloadCompletion{chunk: chunk, chunkIndex: task.chunkIndex}
return true
}

View File

@@ -25,15 +25,20 @@ type ChunkMaker struct {
bufferSize int
bufferStart int
minimumReached bool
hashSum uint64
chunk *Chunk
config *Config
hashOnly bool
hashOnlyChunk *Chunk
}
// CreateChunkMaker creates a chunk maker. 'randomSeed' is used to generate the character-to-integer table needed by
// buzhash.
func CreateChunkMaker(config *Config, hashOnly bool) *ChunkMaker {
func CreateFileChunkMaker(config *Config, hashOnly bool) *ChunkMaker {
size := 1
for size*2 <= config.AverageChunkSize {
size *= 2
@@ -67,6 +72,33 @@ func CreateChunkMaker(config *Config, hashOnly bool) *ChunkMaker {
}
maker.buffer = make([]byte, 2*config.MinimumChunkSize)
maker.bufferStart = 0
maker.bufferSize = 0
maker.startNewChunk()
return maker
}
// CreateMetaDataChunkMaker creates a chunk maker that always uses the variable-sized chunking algorithm
func CreateMetaDataChunkMaker(config *Config, chunkSize int) *ChunkMaker {
size := 1
for size*2 <= chunkSize {
size *= 2
}
if size != chunkSize {
LOG_FATAL("CHUNK_SIZE", "Invalid metadata chunk size: %d is not a power of 2", chunkSize)
return nil
}
maker := CreateFileChunkMaker(config, false)
maker.hashMask = uint64(chunkSize - 1)
maker.maximumChunkSize = chunkSize * 4
maker.minimumChunkSize = chunkSize / 4
maker.bufferCapacity = 2 * maker.minimumChunkSize
maker.buffer = make([]byte, maker.bufferCapacity)
return maker
}
@@ -90,62 +122,50 @@ func (maker *ChunkMaker) buzhashUpdate(sum uint64, out byte, in byte, length int
return rotateLeftByOne(sum) ^ rotateLeft(maker.randomTable[out], uint(length)) ^ maker.randomTable[in]
}
// ForEachChunk reads data from 'reader'. If EOF is encountered, it will call 'nextReader' to ask for next file. If
// 'nextReader' returns false, it will process remaining data in the buffer and then quit. When a chunk is identified,
// it will call 'endOfChunk' to return the chunk size and a boolean flag indicating if it is the last chunk.
func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *Chunk, final bool),
nextReader func(size int64, hash string) (io.Reader, bool)) {
func (maker *ChunkMaker) startNewChunk() (chunk *Chunk) {
maker.hashSum = 0
maker.minimumReached = false
if maker.hashOnly {
maker.chunk = maker.hashOnlyChunk
maker.chunk.Reset(true)
} else {
maker.chunk = maker.config.GetChunk()
maker.chunk.Reset(true)
}
return
}
maker.bufferStart = 0
maker.bufferSize = 0
var minimumReached bool
var hashSum uint64
var chunk *Chunk
func (maker *ChunkMaker) AddData(reader io.Reader, sendChunk func(*Chunk)) (int64, string) {
isEOF := false
fileSize := int64(0)
fileHasher := maker.config.NewFileHasher()
// Start a new chunk.
startNewChunk := func() {
hashSum = 0
minimumReached = false
if maker.hashOnly {
chunk = maker.hashOnlyChunk
chunk.Reset(true)
} else {
chunk = maker.config.GetChunk()
chunk.Reset(true)
}
}
// Move data from the buffer to the chunk.
fill := func(count int) {
if maker.bufferStart+count < maker.bufferCapacity {
chunk.Write(maker.buffer[maker.bufferStart : maker.bufferStart+count])
maker.chunk.Write(maker.buffer[maker.bufferStart : maker.bufferStart+count])
maker.bufferStart += count
maker.bufferSize -= count
} else {
chunk.Write(maker.buffer[maker.bufferStart:])
chunk.Write(maker.buffer[:count-(maker.bufferCapacity-maker.bufferStart)])
maker.chunk.Write(maker.buffer[maker.bufferStart:])
maker.chunk.Write(maker.buffer[:count-(maker.bufferCapacity-maker.bufferStart)])
maker.bufferStart = count - (maker.bufferCapacity - maker.bufferStart)
maker.bufferSize -= count
}
}
startNewChunk()
var err error
isEOF := false
if maker.minimumChunkSize == maker.maximumChunkSize {
if maker.bufferCapacity < maker.minimumChunkSize {
maker.buffer = make([]byte, maker.minimumChunkSize)
if reader == nil {
return 0, ""
}
for {
maker.startNewChunk()
maker.bufferStart = 0
for maker.bufferStart < maker.minimumChunkSize && !isEOF {
count, err := reader.Read(maker.buffer[maker.bufferStart:maker.minimumChunkSize])
@@ -153,7 +173,7 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C
if err != nil {
if err != io.EOF {
LOG_ERROR("CHUNK_MAKER", "Failed to read %d bytes: %s", count, err.Error())
return
return 0, ""
} else {
isEOF = true
}
@@ -161,26 +181,15 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C
maker.bufferStart += count
}
fileHasher.Write(maker.buffer[:maker.bufferStart])
fileSize += int64(maker.bufferStart)
chunk.Write(maker.buffer[:maker.bufferStart])
if maker.bufferStart > 0 {
fileHasher.Write(maker.buffer[:maker.bufferStart])
fileSize += int64(maker.bufferStart)
maker.chunk.Write(maker.buffer[:maker.bufferStart])
sendChunk(maker.chunk)
}
if isEOF {
var ok bool
reader, ok = nextReader(fileSize, hex.EncodeToString(fileHasher.Sum(nil)))
if !ok {
endOfChunk(chunk, true)
return
} else {
endOfChunk(chunk, false)
startNewChunk()
fileSize = 0
fileHasher = maker.config.NewFileHasher()
isEOF = false
}
} else {
endOfChunk(chunk, false)
startNewChunk()
return fileSize, hex.EncodeToString(fileHasher.Sum(nil))
}
}
@@ -189,7 +198,7 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C
for {
// If the buffer still has some space left and EOF is not seen, read more data.
for maker.bufferSize < maker.bufferCapacity && !isEOF {
for maker.bufferSize < maker.bufferCapacity && !isEOF && reader != nil {
start := maker.bufferStart + maker.bufferSize
count := maker.bufferCapacity - start
if start >= maker.bufferCapacity {
@@ -201,7 +210,7 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C
if err != nil && err != io.EOF {
LOG_ERROR("CHUNK_MAKER", "Failed to read %d bytes: %s", count, err.Error())
return
return 0, ""
}
maker.bufferSize += count
@@ -210,54 +219,55 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C
// if EOF is seen, try to switch to next file and continue
if err == io.EOF {
var ok bool
reader, ok = nextReader(fileSize, hex.EncodeToString(fileHasher.Sum(nil)))
if !ok {
isEOF = true
} else {
fileSize = 0
fileHasher = maker.config.NewFileHasher()
isEOF = false
}
isEOF = true
break
}
}
// No eough data to meet the minimum chunk size requirement, so just return as a chunk.
if maker.bufferSize < maker.minimumChunkSize {
fill(maker.bufferSize)
endOfChunk(chunk, true)
return
if reader == nil {
fill(maker.bufferSize)
if maker.chunk.GetLength() > 0 {
sendChunk(maker.chunk)
}
return 0, ""
} else if isEOF {
return fileSize, hex.EncodeToString(fileHasher.Sum(nil))
} else {
continue
}
}
// Minimum chunk size has been reached. Calculate the buzhash for the minimum size chunk.
if !minimumReached {
if !maker.minimumReached {
bytes := maker.minimumChunkSize
if maker.bufferStart+bytes < maker.bufferCapacity {
hashSum = maker.buzhashSum(0, maker.buffer[maker.bufferStart:maker.bufferStart+bytes])
maker.hashSum = maker.buzhashSum(0, maker.buffer[maker.bufferStart:maker.bufferStart+bytes])
} else {
hashSum = maker.buzhashSum(0, maker.buffer[maker.bufferStart:])
hashSum = maker.buzhashSum(hashSum,
maker.hashSum = maker.buzhashSum(0, maker.buffer[maker.bufferStart:])
maker.hashSum = maker.buzhashSum(maker.hashSum,
maker.buffer[:bytes-(maker.bufferCapacity-maker.bufferStart)])
}
if (hashSum & maker.hashMask) == 0 {
if (maker.hashSum & maker.hashMask) == 0 {
// This is a minimum size chunk
fill(bytes)
endOfChunk(chunk, false)
startNewChunk()
sendChunk(maker.chunk)
maker.startNewChunk()
continue
}
minimumReached = true
maker.minimumReached = true
}
// Now check the buzhash of the data in the buffer, shifting one byte at a time.
bytes := maker.bufferSize - maker.minimumChunkSize
isEOC := false
maxSize := maker.maximumChunkSize - chunk.GetLength()
for i := 0; i < maker.bufferSize-maker.minimumChunkSize; i++ {
isEOC := false // chunk boundary found
maxSize := maker.maximumChunkSize - maker.chunk.GetLength()
for i := 0; i < bytes; i++ {
out := maker.bufferStart + i
if out >= maker.bufferCapacity {
out -= maker.bufferCapacity
@@ -267,8 +277,8 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C
in -= maker.bufferCapacity
}
hashSum = maker.buzhashUpdate(hashSum, maker.buffer[out], maker.buffer[in], maker.minimumChunkSize)
if (hashSum&maker.hashMask) == 0 || i == maxSize-maker.minimumChunkSize-1 {
maker.hashSum = maker.buzhashUpdate(maker.hashSum, maker.buffer[out], maker.buffer[in], maker.minimumChunkSize)
if (maker.hashSum&maker.hashMask) == 0 || i == maxSize-maker.minimumChunkSize-1 {
// A chunk is completed.
bytes = i + 1 + maker.minimumChunkSize
isEOC = true
@@ -277,21 +287,20 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C
}
fill(bytes)
if isEOC {
if isEOF && maker.bufferSize == 0 {
endOfChunk(chunk, true)
return
sendChunk(maker.chunk)
maker.startNewChunk()
} else {
if reader == nil {
fill(maker.minimumChunkSize)
sendChunk(maker.chunk)
maker.startNewChunk()
return 0, ""
}
endOfChunk(chunk, false)
startNewChunk()
continue
}
if isEOF {
fill(maker.bufferSize)
endOfChunk(chunk, true)
return
return fileSize, hex.EncodeToString(fileHasher.Sum(nil))
}
}
}

View File

@@ -7,14 +7,12 @@ package duplicacy
import (
"bytes"
crypto_rand "crypto/rand"
"io"
"math/rand"
"sort"
"testing"
)
func splitIntoChunks(content []byte, n, averageChunkSize, maxChunkSize, minChunkSize,
bufferCapacity int) ([]string, int) {
func splitIntoChunks(content []byte, n, averageChunkSize, maxChunkSize, minChunkSize int) ([]string, int) {
config := CreateConfig()
@@ -27,14 +25,12 @@ func splitIntoChunks(content []byte, n, averageChunkSize, maxChunkSize, minChunk
config.HashKey = DEFAULT_KEY
config.IDKey = DEFAULT_KEY
maker := CreateChunkMaker(config, false)
maker := CreateFileChunkMaker(config, false)
var chunks []string
totalChunkSize := 0
totalFileSize := int64(0)
//LOG_INFO("CHUNK_SPLIT", "bufferCapacity: %d", bufferCapacity)
buffers := make([]*bytes.Buffer, n)
sizes := make([]int, n)
sizes[0] = 0
@@ -42,7 +38,7 @@ func splitIntoChunks(content []byte, n, averageChunkSize, maxChunkSize, minChunk
same := true
for same {
same = false
sizes[i] = rand.Int() % n
sizes[i] = rand.Int() % len(content)
for j := 0; j < i; j++ {
if sizes[i] == sizes[j] {
same = true
@@ -59,22 +55,17 @@ func splitIntoChunks(content []byte, n, averageChunkSize, maxChunkSize, minChunk
}
buffers[n-1] = bytes.NewBuffer(content[sizes[n-1]:])
i := 0
chunkFunc := func(chunk *Chunk) {
chunks = append(chunks, chunk.GetHash())
totalChunkSize += chunk.GetLength()
config.PutChunk(chunk)
}
maker.ForEachChunk(buffers[0],
func(chunk *Chunk, final bool) {
//LOG_INFO("CHUNK_SPLIT", "i: %d, chunk: %s, size: %d", i, chunk.GetHash(), size)
chunks = append(chunks, chunk.GetHash())
totalChunkSize += chunk.GetLength()
},
func(size int64, hash string) (io.Reader, bool) {
totalFileSize += size
i++
if i >= len(buffers) {
return nil, false
}
return buffers[i], true
})
for _, buffer := range buffers {
fileSize, _ := maker.AddData(buffer, chunkFunc)
totalFileSize += fileSize
}
maker.AddData(nil, chunkFunc)
if totalFileSize != int64(totalChunkSize) {
LOG_ERROR("CHUNK_SPLIT", "total chunk size: %d, total file size: %d", totalChunkSize, totalFileSize)
@@ -96,35 +87,28 @@ func TestChunkMaker(t *testing.T) {
continue
}
chunkArray1, totalSize1 := splitIntoChunks(content, 10, 32, 64, 16, 32)
chunkArray1, totalSize1 := splitIntoChunks(content, 10, 32, 64, 16)
capacities := [...]int{32, 33, 34, 61, 62, 63, 64, 65, 66, 126, 127, 128, 129, 130,
255, 256, 257, 511, 512, 513, 1023, 1024, 1025,
32, 48, 64, 128, 256, 512, 1024, 2048}
//capacities := [...]int { 32 }
for _, n := range [...]int{6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} {
chunkArray2, totalSize2 := splitIntoChunks(content, n, 32, 64, 16)
for _, capacity := range capacities {
if totalSize1 != totalSize2 {
t.Errorf("[size %d] total size is %d instead of %d",
size, totalSize2, totalSize1)
}
for _, n := range [...]int{6, 7, 8, 9, 10} {
chunkArray2, totalSize2 := splitIntoChunks(content, n, 32, 64, 16, capacity)
if totalSize1 != totalSize2 {
t.Errorf("[size %d, capacity %d] total size is %d instead of %d",
size, capacity, totalSize2, totalSize1)
}
if len(chunkArray1) != len(chunkArray2) {
t.Errorf("[size %d, capacity %d] number of chunks is %d instead of %d",
size, capacity, len(chunkArray2), len(chunkArray1))
} else {
for i := 0; i < len(chunkArray1); i++ {
if chunkArray1[i] != chunkArray2[i] {
t.Errorf("[size %d, capacity %d, chunk %d] chunk is different", size, capacity, i)
}
if len(chunkArray1) != len(chunkArray2) {
t.Errorf("[size %d] number of chunks is %d instead of %d",
size, len(chunkArray2), len(chunkArray1))
} else {
for i := 0; i < len(chunkArray1); i++ {
if chunkArray1[i] != chunkArray2[i] {
t.Errorf("[size %d, chunk %d] chunk is different", size, i)
}
}
}
}
}

View File

@@ -5,6 +5,7 @@
package duplicacy
import (
"io"
"sync"
"sync/atomic"
"time"
@@ -12,42 +13,69 @@ import (
// These are operations that ChunkOperator will perform.
const (
ChunkOperationFind = 0
ChunkOperationDelete = 1
ChunkOperationFossilize = 2
ChunkOperationResurrect = 3
ChunkOperationDownload = 0
ChunkOperationUpload = 1
ChunkOperationDelete = 2
ChunkOperationFossilize = 3
ChunkOperationResurrect = 4
ChunkOperationFind = 5
)
// ChunkOperatorTask is used to pass parameters for different kinds of chunk operations.
type ChunkOperatorTask struct {
operation int // The type of operation
chunkID string // The chunk id
filePath string // The path of the chunk file; it may be empty
// ChunkTask is used to pass parameters for different kinds of chunk operations.
type ChunkTask struct {
operation int // The type of operation
chunkID string // The chunk id
chunkHash string // The chunk hash
chunkIndex int // The chunk index
filePath string // The path of the chunk file; it may be empty
isMetadata bool
chunk *Chunk
completionFunc func(chunk *Chunk, chunkIndex int)
}
// ChunkOperator is capable of performing multi-threaded operations on chunks.
type ChunkOperator struct {
numberOfActiveTasks int64 // The number of chunks that are being operated on
storage Storage // This storage
threads int // Number of threads
taskQueue chan ChunkOperatorTask // Operating goroutines are waiting on this channel for input
stopChannel chan bool // Used to stop all the goroutines
config *Config // Associated config
storage Storage // This storage
snapshotCache *FileStorage
showStatistics bool
threads int // Number of threads
taskQueue chan ChunkTask // Operating goroutines are waiting on this channel for input
stopChannel chan bool // Used to stop all the goroutines
fossils []string // For fossilize operation, the paths of the fossils are stored in this slice
fossilsLock *sync.Mutex // The lock for 'fossils'
numberOfActiveTasks int64 // The number of chunks that are being operated on
fossils []string // For fossilize operation, the paths of the fossils are stored in this slice
collectionLock *sync.Mutex // The lock for accessing 'fossils'
startTime int64 // The time it starts downloading
totalChunkSize int64 // Total chunk size
downloadedChunkSize int64 // Downloaded chunk size
allowFailures bool // Whether to fail on download error, or continue
NumberOfFailedChunks int64 // The number of chunks that can't be downloaded
UploadCompletionFunc func(chunk *Chunk, chunkIndex int, inCache bool, chunkSize int, uploadSize int)
}
// CreateChunkOperator creates a new ChunkOperator.
func CreateChunkOperator(storage Storage, threads int) *ChunkOperator {
func CreateChunkOperator(config *Config, storage Storage, snapshotCache *FileStorage, showStatistics bool, threads int, allowFailures bool) *ChunkOperator {
operator := &ChunkOperator{
config: config,
storage: storage,
snapshotCache: snapshotCache,
showStatistics: showStatistics,
threads: threads,
taskQueue: make(chan ChunkOperatorTask, threads*4),
taskQueue: make(chan ChunkTask, threads),
stopChannel: make(chan bool),
fossils: make([]string, 0),
fossilsLock: &sync.Mutex{},
collectionLock: &sync.Mutex{},
allowFailures: allowFailures,
}
// Start the operator goroutines
@@ -84,38 +112,78 @@ func (operator *ChunkOperator) Stop() {
atomic.AddInt64(&operator.numberOfActiveTasks, int64(-1))
}
func (operator *ChunkOperator) AddTask(operation int, chunkID string, filePath string) {
func (operator *ChunkOperator) WaitForCompletion() {
task := ChunkOperatorTask{
operation: operation,
chunkID: chunkID,
filePath: filePath,
for atomic.LoadInt64(&operator.numberOfActiveTasks) > 0 {
time.Sleep(100 * time.Millisecond)
}
operator.taskQueue <- task
atomic.AddInt64(&operator.numberOfActiveTasks, int64(1))
}
func (operator *ChunkOperator) Find(chunkID string) {
operator.AddTask(ChunkOperationFind, chunkID, "")
func (operator *ChunkOperator) AddTask(operation int, chunkID string, chunkHash string, filePath string, chunkIndex int, chunk *Chunk, isMetadata bool, completionFunc func(*Chunk, int)) {
task := ChunkTask {
operation: operation,
chunkID: chunkID,
chunkHash: chunkHash,
chunkIndex: chunkIndex,
filePath: filePath,
chunk: chunk,
isMetadata: isMetadata,
completionFunc: completionFunc,
}
operator.taskQueue <- task
atomic.AddInt64(&operator.numberOfActiveTasks, int64(1))
return
}
func (operator *ChunkOperator) Download(chunkHash string, chunkIndex int, isMetadata bool) *Chunk {
chunkID := operator.config.GetChunkIDFromHash(chunkHash)
completionChannel := make(chan *Chunk)
completionFunc := func(chunk *Chunk, chunkIndex int) {
completionChannel <- chunk
}
operator.AddTask(ChunkOperationDownload, chunkID, chunkHash, "", chunkIndex, nil, isMetadata, completionFunc)
return <- completionChannel
}
func (operator *ChunkOperator) DownloadAsync(chunkHash string, chunkIndex int, isMetadata bool, completionFunc func(*Chunk, int)) {
chunkID := operator.config.GetChunkIDFromHash(chunkHash)
operator.AddTask(ChunkOperationDownload, chunkID, chunkHash, "", chunkIndex, nil, isMetadata, completionFunc)
}
func (operator *ChunkOperator) Upload(chunk *Chunk, chunkIndex int, isMetadata bool) {
chunkHash := chunk.GetHash()
chunkID := operator.config.GetChunkIDFromHash(chunkHash)
operator.AddTask(ChunkOperationUpload, chunkID, chunkHash, "", chunkIndex, chunk, isMetadata, nil)
}
func (operator *ChunkOperator) Delete(chunkID string, filePath string) {
operator.AddTask(ChunkOperationDelete, chunkID, filePath)
operator.AddTask(ChunkOperationDelete, chunkID, "", filePath, 0, nil, false, nil)
}
func (operator *ChunkOperator) Fossilize(chunkID string, filePath string) {
operator.AddTask(ChunkOperationFossilize, chunkID, filePath)
operator.AddTask(ChunkOperationFossilize, chunkID, "", filePath, 0, nil, false, nil)
}
func (operator *ChunkOperator) Resurrect(chunkID string, filePath string) {
operator.AddTask(ChunkOperationResurrect, chunkID, filePath)
operator.AddTask(ChunkOperationResurrect, chunkID, "", filePath, 0, nil, false, nil)
}
func (operator *ChunkOperator) Run(threadIndex int, task ChunkOperatorTask) {
func (operator *ChunkOperator) Run(threadIndex int, task ChunkTask) {
defer func() {
atomic.AddInt64(&operator.numberOfActiveTasks, int64(-1))
}()
if task.operation == ChunkOperationDownload {
operator.DownloadChunk(threadIndex, task)
return
} else if task.operation == ChunkOperationUpload {
operator.UploadChunk(threadIndex, task)
return
}
// task.filePath may be empty. If so, find the chunk first.
if task.operation == ChunkOperationDelete || task.operation == ChunkOperationFossilize {
if task.filePath == "" {
@@ -132,9 +200,9 @@ func (operator *ChunkOperator) Run(threadIndex int, task ChunkOperatorTask) {
fossilPath, exist, _, _ := operator.storage.FindChunk(threadIndex, task.chunkID, true)
if exist {
LOG_WARN("CHUNK_FOSSILIZE", "Chunk %s is already a fossil", task.chunkID)
operator.fossilsLock.Lock()
operator.collectionLock.Lock()
operator.fossils = append(operator.fossils, fossilPath)
operator.fossilsLock.Unlock()
operator.collectionLock.Unlock()
} else {
LOG_ERROR("CHUNK_FIND", "Chunk %s does not exist in the storage", task.chunkID)
}
@@ -175,17 +243,17 @@ func (operator *ChunkOperator) Run(threadIndex int, task ChunkOperatorTask) {
if err == nil {
LOG_TRACE("CHUNK_DELETE", "Deleted chunk file %s as the fossil already exists", task.chunkID)
}
operator.fossilsLock.Lock()
operator.collectionLock.Lock()
operator.fossils = append(operator.fossils, fossilPath)
operator.fossilsLock.Unlock()
operator.collectionLock.Unlock()
} else {
LOG_ERROR("CHUNK_DELETE", "Failed to fossilize the chunk %s: %v", task.chunkID, err)
}
} else {
LOG_TRACE("CHUNK_FOSSILIZE", "The chunk %s has been marked as a fossil", task.chunkID)
operator.fossilsLock.Lock()
operator.collectionLock.Lock()
operator.fossils = append(operator.fossils, fossilPath)
operator.fossilsLock.Unlock()
operator.collectionLock.Unlock()
}
} else if task.operation == ChunkOperationResurrect {
chunkPath, exist, _, err := operator.storage.FindChunk(threadIndex, task.chunkID, false)
@@ -207,3 +275,267 @@ func (operator *ChunkOperator) Run(threadIndex int, task ChunkOperatorTask) {
}
}
}
// Download downloads a chunk from the storage.
func (operator *ChunkOperator) DownloadChunk(threadIndex int, task ChunkTask) {
cachedPath := ""
chunk := operator.config.GetChunk()
chunk.isMetadata = task.isMetadata
chunkID := task.chunkID
defer func() {
if chunk != nil {
operator.config.PutChunk(chunk)
}
} ()
if task.isMetadata && operator.snapshotCache != nil {
var exist bool
var err error
// Reset the chunk with a hasher -- we're reading from the cache where chunk are not encrypted or compressed
chunk.Reset(true)
cachedPath, exist, _, err = operator.snapshotCache.FindChunk(threadIndex, chunkID, false)
if err != nil {
LOG_WARN("DOWNLOAD_CACHE", "Failed to find the cache path for the chunk %s: %v", chunkID, err)
} else if exist {
err = operator.snapshotCache.DownloadFile(0, cachedPath, chunk)
if err != nil {
LOG_WARN("DOWNLOAD_CACHE", "Failed to load the chunk %s from the snapshot cache: %v", chunkID, err)
} else {
actualChunkID := chunk.GetID()
if actualChunkID != chunkID {
LOG_WARN("DOWNLOAD_CACHE_CORRUPTED",
"The chunk %s load from the snapshot cache has a hash id of %s", chunkID, actualChunkID)
} else {
LOG_DEBUG("CHUNK_CACHE", "Chunk %s has been loaded from the snapshot cache", chunkID)
task.completionFunc(chunk, task.chunkIndex)
chunk = nil
return
}
}
}
}
// Reset the chunk without a hasher -- the downloaded content will be encrypted and/or compressed and the hasher
// will be set up before the encryption
chunk.Reset(false)
chunk.isMetadata = task.isMetadata
// If failures are allowed, complete the task properly
completeFailedChunk := func() {
atomic.AddInt64(&operator.NumberOfFailedChunks, 1)
if operator.allowFailures {
task.completionFunc(chunk, task.chunkIndex)
}
}
const MaxDownloadAttempts = 3
for downloadAttempt := 0; ; downloadAttempt++ {
// Find the chunk by ID first.
chunkPath, exist, _, err := operator.storage.FindChunk(threadIndex, chunkID, false)
if err != nil {
completeFailedChunk()
LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Failed to find the chunk %s: %v", chunkID, err)
return
}
if !exist {
// No chunk is found. Have to find it in the fossil pool again.
fossilPath, exist, _, err := operator.storage.FindChunk(threadIndex, chunkID, true)
if err != nil {
completeFailedChunk()
LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Failed to find the chunk %s: %v", chunkID, err)
return
}
if !exist {
retry := false
// Retry for Hubic or WebDAV as it may return 404 even when the chunk exists
if _, ok := operator.storage.(*HubicStorage); ok {
retry = true
}
if _, ok := operator.storage.(*WebDAVStorage); ok {
retry = true
}
if retry && downloadAttempt < MaxDownloadAttempts {
LOG_WARN("DOWNLOAD_RETRY", "Failed to find the chunk %s; retrying", chunkID)
continue
}
// A chunk is not found. This is a serious error and hopefully it will never happen.
completeFailedChunk()
if err != nil {
LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Chunk %s can't be found: %v", chunkID, err)
} else {
LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Chunk %s can't be found", chunkID)
}
return
}
// We can't download the fossil directly. We have to turn it back into a regular chunk and try
// downloading again.
err = operator.storage.MoveFile(threadIndex, fossilPath, chunkPath)
if err != nil {
completeFailedChunk()
LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Failed to resurrect chunk %s: %v", chunkID, err)
return
}
LOG_WARN("DOWNLOAD_RESURRECT", "Fossil %s has been resurrected", chunkID)
continue
}
err = operator.storage.DownloadFile(threadIndex, chunkPath, chunk)
if err != nil {
_, isHubic := operator.storage.(*HubicStorage)
// Retry on EOF or if it is a Hubic backend as it may return 404 even when the chunk exists
if (err == io.ErrUnexpectedEOF || isHubic) && downloadAttempt < MaxDownloadAttempts {
LOG_WARN("DOWNLOAD_RETRY", "Failed to download the chunk %s: %v; retrying", chunkID, err)
chunk.Reset(false)
chunk.isMetadata = task.isMetadata
continue
} else {
completeFailedChunk()
LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Failed to download the chunk %s: %v", chunkID, err)
return
}
}
err = chunk.Decrypt(operator.config.ChunkKey, task.chunkHash)
if err != nil {
if downloadAttempt < MaxDownloadAttempts {
LOG_WARN("DOWNLOAD_RETRY", "Failed to decrypt the chunk %s: %v; retrying", chunkID, err)
chunk.Reset(false)
chunk.isMetadata = task.isMetadata
continue
} else {
completeFailedChunk()
LOG_WERROR(operator.allowFailures, "DOWNLOAD_DECRYPT", "Failed to decrypt the chunk %s: %v", chunkID, err)
return
}
}
actualChunkID := chunk.GetID()
if actualChunkID != chunkID {
if downloadAttempt < MaxDownloadAttempts {
LOG_WARN("DOWNLOAD_RETRY", "The chunk %s has a hash id of %s; retrying", chunkID, actualChunkID)
chunk.Reset(false)
chunk.isMetadata = task.isMetadata
continue
} else {
completeFailedChunk()
LOG_WERROR(operator.allowFailures, "DOWNLOAD_CORRUPTED", "The chunk %s has a hash id of %s", chunkID, actualChunkID)
return
}
}
break
}
if chunk.isMetadata && len(cachedPath) > 0 {
// Save a copy to the local snapshot cache
err := operator.snapshotCache.UploadFile(threadIndex, cachedPath, chunk.GetBytes())
if err != nil {
LOG_WARN("DOWNLOAD_CACHE", "Failed to add the chunk %s to the snapshot cache: %v", chunkID, err)
}
}
downloadedChunkSize := atomic.AddInt64(&operator.downloadedChunkSize, int64(chunk.GetLength()))
if (operator.showStatistics || IsTracing()) && operator.totalChunkSize > 0 {
now := time.Now().Unix()
if now <= operator.startTime {
now = operator.startTime + 1
}
speed := downloadedChunkSize / (now - operator.startTime)
remainingTime := int64(0)
if speed > 0 {
remainingTime = (operator.totalChunkSize-downloadedChunkSize)/speed + 1
}
percentage := float32(downloadedChunkSize * 1000 / operator.totalChunkSize)
LOG_INFO("DOWNLOAD_PROGRESS", "Downloaded chunk %d size %d, %sB/s %s %.1f%%",
task.chunkIndex+1, chunk.GetLength(),
PrettySize(speed), PrettyTime(remainingTime), percentage/10)
} else {
LOG_DEBUG("CHUNK_DOWNLOAD", "Chunk %s has been downloaded", chunkID)
}
task.completionFunc(chunk, task.chunkIndex)
chunk = nil
return
}
// UploadChunk is called by the task goroutines to perform the actual uploading
func (operator *ChunkOperator) UploadChunk(threadIndex int, task ChunkTask) bool {
chunk := task.chunk
chunkID := task.chunkID
chunkSize := chunk.GetLength()
// For a snapshot chunk, verify that its chunk id is correct
if task.isMetadata {
chunk.VerifyID()
}
if task.isMetadata && operator.storage.IsCacheNeeded() {
// Save a copy to the local snapshot.
chunkPath, exist, _, err := operator.snapshotCache.FindChunk(threadIndex, chunkID, false)
if err != nil {
LOG_WARN("UPLOAD_CACHE", "Failed to find the cache path for the chunk %s: %v", chunkID, err)
} else if exist {
LOG_DEBUG("CHUNK_CACHE", "Chunk %s already exists in the snapshot cache", chunkID)
} else if err = operator.snapshotCache.UploadFile(threadIndex, chunkPath, chunk.GetBytes()); err != nil {
LOG_WARN("UPLOAD_CACHE", "Failed to save the chunk %s to the snapshot cache: %v", chunkID, err)
} else {
LOG_DEBUG("CHUNK_CACHE", "Chunk %s has been saved to the snapshot cache", chunkID)
}
}
// This returns the path the chunk file should be at.
chunkPath, exist, _, err := operator.storage.FindChunk(threadIndex, chunkID, false)
if err != nil {
LOG_ERROR("UPLOAD_CHUNK", "Failed to find the path for the chunk %s: %v", chunkID, err)
return false
}
if exist {
// Chunk deduplication by name in effect here.
LOG_DEBUG("CHUNK_DUPLICATE", "Chunk %s already exists", chunkID)
operator.UploadCompletionFunc(chunk, task.chunkIndex, false, chunkSize, 0)
return false
}
// Encrypt the chunk only after we know that it must be uploaded.
err = chunk.Encrypt(operator.config.ChunkKey, chunk.GetHash(), task.isMetadata)
if err != nil {
LOG_ERROR("UPLOAD_CHUNK", "Failed to encrypt the chunk %s: %v", chunkID, err)
return false
}
if !operator.config.dryRun {
err = operator.storage.UploadFile(threadIndex, chunkPath, chunk.GetBytes())
if err != nil {
LOG_ERROR("UPLOAD_CHUNK", "Failed to upload the chunk %s: %v", chunkID, err)
return false
}
LOG_DEBUG("CHUNK_UPLOAD", "Chunk %s has been uploaded", chunkID)
} else {
LOG_DEBUG("CHUNK_UPLOAD", "Uploading was skipped for chunk %s", chunkID)
}
operator.UploadCompletionFunc(chunk, task.chunkIndex, false, chunkSize, chunk.GetLength())
return true
}

View File

@@ -15,11 +15,11 @@ import (
"math/rand"
)
func TestUploaderAndDownloader(t *testing.T) {
func TestChunkOperator(t *testing.T) {
rand.Seed(time.Now().UnixNano())
setTestingT(t)
SetLoggingLevel(INFO)
SetLoggingLevel(DEBUG)
defer func() {
if r := recover(); r != nil {
@@ -87,35 +87,25 @@ func TestUploaderAndDownloader(t *testing.T) {
totalFileSize += chunk.GetLength()
}
completionFunc := func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) {
chunkOperator := CreateChunkOperator(config, storage, nil, false, testThreads, false)
chunkOperator.UploadCompletionFunc = func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) {
t.Logf("Chunk %s size %d (%d/%d) uploaded", chunk.GetID(), chunkSize, chunkIndex, len(chunks))
}
chunkUploader := CreateChunkUploader(config, storage, nil, testThreads, nil)
chunkUploader.completionFunc = completionFunc
chunkUploader.Start()
for i, chunk := range chunks {
chunkUploader.StartChunk(chunk, i)
chunkOperator.Upload(chunk, i, false)
}
chunkUploader.Stop()
chunkDownloader := CreateChunkDownloader(config, storage, nil, true, testThreads, false)
chunkDownloader.totalChunkSize = int64(totalFileSize)
for _, chunk := range chunks {
chunkDownloader.AddChunk(chunk.GetHash())
}
chunkOperator.WaitForCompletion()
for i, chunk := range chunks {
downloaded := chunkDownloader.WaitForChunk(i)
downloaded := chunkOperator.Download(chunk.GetHash(), i, false)
if downloaded.GetID() != chunk.GetID() {
t.Errorf("Uploaded: %s, downloaded: %s", chunk.GetID(), downloaded.GetID())
}
}
chunkDownloader.Stop()
chunkOperator.Stop()
for _, file := range listChunks(storage) {
err = storage.DeleteFile(0, "chunks/"+file)

View File

@@ -1,151 +0,0 @@
// Copyright (c) Acrosync LLC. All rights reserved.
// Free for personal use and commercial trial
// Commercial use requires per-user licenses available from https://duplicacy.com
package duplicacy
import (
"sync/atomic"
"time"
)
// ChunkUploadTask represents a chunk to be uploaded.
type ChunkUploadTask struct {
chunk *Chunk
chunkIndex int
}
// ChunkUploader uploads chunks to the storage using one or more uploading goroutines. Chunks are added
// by the call to StartChunk(), and then passed to the uploading goroutines. The completion function is
// called when the downloading is completed. Note that ChunkUploader does not release chunks to the
// chunk pool; instead
type ChunkUploader struct {
config *Config // Associated config
storage Storage // Download from this storage
snapshotCache *FileStorage // Used as cache if not nil; usually for uploading snapshot chunks
threads int // Number of uploading goroutines
taskQueue chan ChunkUploadTask // Uploading goroutines are listening on this channel for upload jobs
stopChannel chan bool // Used to terminate uploading goroutines
numberOfUploadingTasks int32 // The number of uploading tasks
// Uploading goroutines call this function after having downloaded chunks
completionFunc func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int)
}
// CreateChunkUploader creates a chunk uploader.
func CreateChunkUploader(config *Config, storage Storage, snapshotCache *FileStorage, threads int,
completionFunc func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int)) *ChunkUploader {
uploader := &ChunkUploader{
config: config,
storage: storage,
snapshotCache: snapshotCache,
threads: threads,
taskQueue: make(chan ChunkUploadTask, 1),
stopChannel: make(chan bool),
completionFunc: completionFunc,
}
return uploader
}
// Starts starts uploading goroutines.
func (uploader *ChunkUploader) Start() {
for i := 0; i < uploader.threads; i++ {
go func(threadIndex int) {
defer CatchLogException()
for {
select {
case task := <-uploader.taskQueue:
uploader.Upload(threadIndex, task)
case <-uploader.stopChannel:
return
}
}
}(i)
}
}
// StartChunk sends a chunk to be uploaded to a waiting uploading goroutine. It may block if all uploading goroutines are busy.
func (uploader *ChunkUploader) StartChunk(chunk *Chunk, chunkIndex int) {
atomic.AddInt32(&uploader.numberOfUploadingTasks, 1)
uploader.taskQueue <- ChunkUploadTask{
chunk: chunk,
chunkIndex: chunkIndex,
}
}
// Stop stops all uploading goroutines.
func (uploader *ChunkUploader) Stop() {
for atomic.LoadInt32(&uploader.numberOfUploadingTasks) > 0 {
time.Sleep(100 * time.Millisecond)
}
for i := 0; i < uploader.threads; i++ {
uploader.stopChannel <- false
}
}
// Upload is called by the uploading goroutines to perform the actual uploading
func (uploader *ChunkUploader) Upload(threadIndex int, task ChunkUploadTask) bool {
chunk := task.chunk
chunkSize := chunk.GetLength()
chunkID := chunk.GetID()
// For a snapshot chunk, verify that its chunk id is correct
if uploader.snapshotCache != nil {
chunk.VerifyID()
}
if uploader.snapshotCache != nil && uploader.storage.IsCacheNeeded() {
// Save a copy to the local snapshot.
chunkPath, exist, _, err := uploader.snapshotCache.FindChunk(threadIndex, chunkID, false)
if err != nil {
LOG_WARN("UPLOAD_CACHE", "Failed to find the cache path for the chunk %s: %v", chunkID, err)
} else if exist {
LOG_DEBUG("CHUNK_CACHE", "Chunk %s already exists in the snapshot cache", chunkID)
} else if err = uploader.snapshotCache.UploadFile(threadIndex, chunkPath, chunk.GetBytes()); err != nil {
LOG_WARN("UPLOAD_CACHE", "Failed to save the chunk %s to the snapshot cache: %v", chunkID, err)
} else {
LOG_DEBUG("CHUNK_CACHE", "Chunk %s has been saved to the snapshot cache", chunkID)
}
}
// This returns the path the chunk file should be at.
chunkPath, exist, _, err := uploader.storage.FindChunk(threadIndex, chunkID, false)
if err != nil {
LOG_ERROR("UPLOAD_CHUNK", "Failed to find the path for the chunk %s: %v", chunkID, err)
return false
}
if exist {
// Chunk deduplication by name in effect here.
LOG_DEBUG("CHUNK_DUPLICATE", "Chunk %s already exists", chunkID)
uploader.completionFunc(chunk, task.chunkIndex, true, chunkSize, 0)
atomic.AddInt32(&uploader.numberOfUploadingTasks, -1)
return false
}
// Encrypt the chunk only after we know that it must be uploaded.
err = chunk.Encrypt(uploader.config.ChunkKey, chunk.GetHash(), uploader.snapshotCache != nil)
if err != nil {
LOG_ERROR("UPLOAD_CHUNK", "Failed to encrypt the chunk %s: %v", chunkID, err)
return false
}
if !uploader.config.dryRun {
err = uploader.storage.UploadFile(threadIndex, chunkPath, chunk.GetBytes())
if err != nil {
LOG_ERROR("UPLOAD_CHUNK", "Failed to upload the chunk %s: %v", chunkID, err)
return false
}
LOG_DEBUG("CHUNK_UPLOAD", "Chunk %s has been uploaded", chunkID)
} else {
LOG_DEBUG("CHUNK_UPLOAD", "Uploading was skipped for chunk %s", chunkID)
}
uploader.completionFunc(chunk, task.chunkIndex, false, chunkSize, chunk.GetLength())
atomic.AddInt32(&uploader.numberOfUploadingTasks, -1)
return true
}

View File

@@ -16,6 +16,11 @@ import (
"strconv"
"strings"
"time"
"bytes"
"crypto/sha256"
"github.com/vmihailenco/msgpack"
)
// This is the hidden directory in the repository for storing various files.
@@ -45,7 +50,7 @@ type Entry struct {
EndChunk int
EndOffset int
Attributes map[string][]byte
Attributes *map[string][]byte
}
// CreateEntry creates an entry from file properties.
@@ -93,6 +98,27 @@ func CreateEntryFromFileInfo(fileInfo os.FileInfo, directory string) *Entry {
return entry
}
func (entry *Entry) Copy() *Entry {
return &Entry{
Path: entry.Path,
Size: entry.Size,
Time: entry.Time,
Mode: entry.Mode,
Link: entry.Link,
Hash: entry.Hash,
UID: entry.UID,
GID: entry.GID,
StartChunk: entry.StartChunk,
StartOffset: entry.StartOffset,
EndChunk: entry.EndChunk,
EndOffset: entry.EndOffset,
Attributes: entry.Attributes,
}
}
// CreateEntryFromJSON creates an entry from a json description.
func (entry *Entry) UnmarshalJSON(description []byte) (err error) {
@@ -175,17 +201,17 @@ func (entry *Entry) UnmarshalJSON(description []byte) (err error) {
if attributes, ok := value.(map[string]interface{}); !ok {
return fmt.Errorf("Attributes are invalid for file '%s' in the snapshot", entry.Path)
} else {
entry.Attributes = make(map[string][]byte)
entry.Attributes = &map[string][]byte{}
for name, object := range attributes {
if object == nil {
entry.Attributes[name] = []byte("")
(*entry.Attributes)[name] = []byte("")
} else if attributeInBase64, ok := object.(string); !ok {
return fmt.Errorf("Attribute '%s' is invalid for file '%s' in the snapshot", name, entry.Path)
} else if attribute, err := base64.StdEncoding.DecodeString(attributeInBase64); err != nil {
return fmt.Errorf("Failed to decode attribute '%s' for file '%s' in the snapshot: %v",
name, entry.Path, err)
} else {
entry.Attributes[name] = attribute
(*entry.Attributes)[name] = attribute
}
}
}
@@ -244,7 +270,7 @@ func (entry *Entry) convertToObject(encodeName bool) map[string]interface{} {
object["gid"] = entry.GID
}
if len(entry.Attributes) > 0 {
if entry.Attributes != nil && len(*entry.Attributes) > 0 {
object["attributes"] = entry.Attributes
}
@@ -259,6 +285,197 @@ func (entry *Entry) MarshalJSON() ([]byte, error) {
return description, err
}
var _ msgpack.CustomEncoder = (*Entry)(nil)
var _ msgpack.CustomDecoder = (*Entry)(nil)
func (entry *Entry) EncodeMsgpack(encoder *msgpack.Encoder) error {
err := encoder.EncodeString(entry.Path)
if err != nil {
return err
}
err = encoder.EncodeInt(entry.Size)
if err != nil {
return err
}
err = encoder.EncodeInt(entry.Time)
if err != nil {
return err
}
err = encoder.EncodeInt(int64(entry.Mode))
if err != nil {
return err
}
err = encoder.EncodeString(entry.Link)
if err != nil {
return err
}
err = encoder.EncodeString(entry.Hash)
if err != nil {
return err
}
err = encoder.EncodeInt(int64(entry.StartChunk))
if err != nil {
return err
}
err = encoder.EncodeInt(int64(entry.StartOffset))
if err != nil {
return err
}
err = encoder.EncodeInt(int64(entry.EndChunk))
if err != nil {
return err
}
err = encoder.EncodeInt(int64(entry.EndOffset))
if err != nil {
return err
}
err = encoder.EncodeInt(int64(entry.UID))
if err != nil {
return err
}
err = encoder.EncodeInt(int64(entry.GID))
if err != nil {
return err
}
var numberOfAttributes int64
if entry.Attributes != nil {
numberOfAttributes = int64(len(*entry.Attributes))
}
err = encoder.EncodeInt(numberOfAttributes)
if err != nil {
return err
}
if entry.Attributes != nil {
attributes := make([]string, numberOfAttributes)
i := 0
for attribute := range *entry.Attributes {
attributes[i] = attribute
i++
}
sort.Strings(attributes)
for _, attribute := range attributes {
err = encoder.EncodeString(attribute)
if err != nil {
return err
}
err = encoder.EncodeString(string((*entry.Attributes)[attribute]))
if err != nil {
return err
}
}
}
return nil
}
func (entry *Entry) DecodeMsgpack(decoder *msgpack.Decoder) error {
var err error
entry.Path, err = decoder.DecodeString()
if err != nil {
return err
}
entry.Size, err = decoder.DecodeInt64()
if err != nil {
return err
}
entry.Time, err = decoder.DecodeInt64()
if err != nil {
return err
}
mode, err := decoder.DecodeInt64()
if err != nil {
return err
}
entry.Mode = uint32(mode)
entry.Link, err = decoder.DecodeString()
if err != nil {
return err
}
entry.Hash, err = decoder.DecodeString()
if err != nil {
return err
}
startChunk, err := decoder.DecodeInt()
if err != nil {
return err
}
entry.StartChunk = int(startChunk)
startOffset, err := decoder.DecodeInt()
if err != nil {
return err
}
entry.StartOffset = int(startOffset)
endChunk, err := decoder.DecodeInt()
if err != nil {
return err
}
entry.EndChunk = int(endChunk)
endOffset, err := decoder.DecodeInt()
if err != nil {
return err
}
entry.EndOffset = int(endOffset)
uid, err := decoder.DecodeInt()
if err != nil {
return err
}
entry.UID = int(uid)
gid, err := decoder.DecodeInt()
if err != nil {
return err
}
entry.GID = int(gid)
numberOfAttributes, err := decoder.DecodeInt()
if err != nil {
return err
}
if numberOfAttributes > 0 {
entry.Attributes = &map[string][]byte{}
for i := 0; i < numberOfAttributes; i++ {
attribute, err := decoder.DecodeString()
if err != nil {
return err
}
value, err := decoder.DecodeString()
if err != nil {
return err
}
(*entry.Attributes)[attribute] = []byte(value)
}
}
return nil
}
func (entry *Entry) IsFile() bool {
return entry.Mode&uint32(os.ModeType) == 0
}
@@ -271,10 +488,27 @@ func (entry *Entry) IsLink() bool {
return entry.Mode&uint32(os.ModeSymlink) != 0
}
func (entry *Entry) IsComplete() bool {
return entry.Size >= 0
}
func (entry *Entry) GetPermissions() os.FileMode {
return os.FileMode(entry.Mode) & fileModeMask
}
func (entry *Entry) GetParent() string {
path := entry.Path
if path != "" && path[len(path) - 1] == '/' {
path = path[:len(path) - 1]
}
i := strings.LastIndex(path, "/")
if i == -1 {
return ""
} else {
return path[:i]
}
}
func (entry *Entry) IsSameAs(other *Entry) bool {
return entry.Size == other.Size && entry.Time <= other.Time+1 && entry.Time >= other.Time-1
}
@@ -326,7 +560,7 @@ func (entry *Entry) RestoreMetadata(fullPath string, fileInfo *os.FileInfo, setO
}
}
if len(entry.Attributes) > 0 {
if entry.Attributes != nil && len(*entry.Attributes) > 0 {
entry.SetAttributesToFile(fullPath)
}
@@ -335,47 +569,62 @@ func (entry *Entry) RestoreMetadata(fullPath string, fileInfo *os.FileInfo, setO
// Return -1 if 'left' should appear before 'right', 1 if opposite, and 0 if they are the same.
// Files are always arranged before subdirectories under the same parent directory.
func (left *Entry) Compare(right *Entry) int {
path1 := left.Path
path2 := right.Path
func ComparePaths(left string, right string) int {
p := 0
for ; p < len(path1) && p < len(path2); p++ {
if path1[p] != path2[p] {
for ; p < len(left) && p < len(right); p++ {
if left[p] != right[p] {
break
}
}
// c1, c2 is the first byte that differs
// c1, c2 are the first bytes that differ
var c1, c2 byte
if p < len(path1) {
c1 = path1[p]
if p < len(left) {
c1 = left[p]
}
if p < len(path2) {
c2 = path2[p]
if p < len(right) {
c2 = right[p]
}
// c3, c4 indicates how the current component ends
// c3 == '/': the current component is a directory
// c3 != '/': the current component is the last one
// c3, c4 indicate how the current component ends
// c3 == '/': the current component is a directory; c3 != '/': the current component is the last one
c3 := c1
for i := p; c3 != '/' && i < len(path1); i++ {
c3 = path1[i]
// last1, last2 means if the current compoent is the last component
last1 := true
for i := p; i < len(left); i++ {
c3 = left[i]
if c3 == '/' {
last1 = i == len(left) - 1
break
}
}
c4 := c2
for i := p; c4 != '/' && i < len(path2); i++ {
c4 = path2[i]
last2 := true
for i := p; i < len(right); i++ {
c4 = right[i]
if c4 == '/' {
last2 = i == len(right) - 1
break
}
}
if last1 != last2 {
if last1 {
return -1
} else {
return 1
}
}
if c3 == '/' {
if c4 == '/' {
// We are comparing two directory components
if c1 == '/' {
// left is shorter
// Note that c2 maybe smaller than c1 but c1 is '/' which is counted
// as 0
// left is shorter; note that c2 maybe smaller than c1 but c1 should be treated as 0 therefore
// this is a special case that must be handled separately
return -1
} else if c2 == '/' {
// right is shorter
@@ -397,6 +646,10 @@ func (left *Entry) Compare(right *Entry) int {
}
}
func (left *Entry) Compare(right *Entry) int {
return ComparePaths(left.Path, right.Path)
}
// This is used to sort entries by their names.
type ByName []*Entry
@@ -443,7 +696,7 @@ func (files FileInfoCompare) Less(i, j int) bool {
// ListEntries returns a list of entries representing file and subdirectories under the directory 'path'. Entry paths
// are normalized as relative to 'top'. 'patterns' are used to exclude or include certain files.
func ListEntries(top string, path string, fileList *[]*Entry, patterns []string, nobackupFile string, discardAttributes bool, excludeByAttribute bool) (directoryList []*Entry,
func ListEntries(top string, path string, patterns []string, nobackupFile string, excludeByAttribute bool, listingChannel chan *Entry) (directoryList []*Entry,
skippedFiles []string, err error) {
LOG_DEBUG("LIST_ENTRIES", "Listing %s", path)
@@ -478,8 +731,6 @@ func ListEntries(top string, path string, fileList *[]*Entry, patterns []string,
sort.Sort(FileInfoCompare(files))
entries := make([]*Entry, 0, 4)
for _, f := range files {
if f.Name() == DUPLICACY_DIRECTORY {
continue
@@ -520,11 +771,9 @@ func ListEntries(top string, path string, fileList *[]*Entry, patterns []string,
}
}
if !discardAttributes {
entry.ReadAttributes(top)
}
entry.ReadAttributes(top)
if excludeByAttribute && excludedByAttribute(entry.Attributes) {
if excludeByAttribute && entry.Attributes != nil && excludedByAttribute(*entry.Attributes) {
LOG_DEBUG("LIST_EXCLUDE", "%s is excluded by attribute", entry.Path)
continue
}
@@ -535,20 +784,20 @@ func ListEntries(top string, path string, fileList *[]*Entry, patterns []string,
continue
}
entries = append(entries, entry)
if entry.IsDir() {
directoryList = append(directoryList, entry)
} else {
listingChannel <- entry
}
}
// For top level directory we need to sort again because symlinks may have been changed
if path == "" {
sort.Sort(ByName(entries))
sort.Sort(ByName(directoryList))
}
for _, entry := range entries {
if entry.IsDir() {
directoryList = append(directoryList, entry)
} else {
*fileList = append(*fileList, entry)
}
for _, entry := range directoryList {
listingChannel <- entry
}
for i, j := 0, len(directoryList)-1; i < j; i, j = i+1, j-1 {
@@ -597,3 +846,100 @@ func (entry *Entry) Diff(chunkHashes []string, chunkLengths []int,
return modifiedLength
}
func (entry *Entry) EncodeWithHash(encoder *msgpack.Encoder) error {
entryBytes, err := msgpack.Marshal(entry)
if err != nil {
return err
}
hash := sha256.Sum256(entryBytes)
err = encoder.EncodeBytes(entryBytes)
if err != nil {
return err
}
err = encoder.EncodeBytes(hash[:])
if err != nil {
return err
}
return nil
}
func DecodeEntryWithHash(decoder *msgpack.Decoder) (*Entry, error) {
entryBytes, err := decoder.DecodeBytes()
if err != nil {
return nil, err
}
hashBytes, err := decoder.DecodeBytes()
if err != nil {
return nil, err
}
expectedHash := sha256.Sum256(entryBytes)
if bytes.Compare(expectedHash[:], hashBytes) != 0 {
return nil, fmt.Errorf("corrupted file metadata")
}
var entry Entry
err = msgpack.Unmarshal(entryBytes, &entry)
if err != nil {
return nil, err
}
return &entry, nil
}
func (entry *Entry) check(chunkLengths []int) error {
if entry.Size < 0 {
return fmt.Errorf("The file %s hash an invalid size (%d)", entry.Path, entry.Size)
}
if !entry.IsFile() || entry.Size == 0 {
return nil
}
if entry.StartChunk < 0 {
return fmt.Errorf("The file %s starts at chunk %d", entry.Path, entry.StartChunk)
}
if entry.EndChunk >= len(chunkLengths) {
return fmt.Errorf("The file %s ends at chunk %d while the number of chunks is %d",
entry.Path, entry.EndChunk, len(chunkLengths))
}
if entry.EndChunk < entry.StartChunk {
return fmt.Errorf("The file %s starts at chunk %d and ends at chunk %d",
entry.Path, entry.StartChunk, entry.EndChunk)
}
if entry.StartOffset >= chunkLengths[entry.StartChunk] {
return fmt.Errorf("The file %s starts at offset %d of chunk %d of length %d",
entry.Path, entry.StartOffset, entry.StartChunk, chunkLengths[entry.StartChunk])
}
if entry.EndOffset > chunkLengths[entry.EndChunk] {
return fmt.Errorf("The file %s ends at offset %d of chunk %d of length %d",
entry.Path, entry.EndOffset, entry.EndChunk, chunkLengths[entry.EndChunk])
}
fileSize := int64(0)
for i := entry.StartChunk; i <= entry.EndChunk; i++ {
start := 0
if i == entry.StartChunk {
start = entry.StartOffset
}
end := chunkLengths[i]
if i == entry.EndChunk {
end = entry.EndOffset
}
fileSize += int64(end - start)
}
if entry.Size != fileSize {
return fmt.Errorf("The file %s has a size of %d but the total size of chunks is %d",
entry.Path, entry.Size, fileSize)
}
return nil
}

View File

@@ -13,8 +13,11 @@ import (
"sort"
"strings"
"testing"
"bytes"
"encoding/json"
"github.com/gilbertchen/xattr"
"github.com/vmihailenco/msgpack"
)
func TestEntrySort(t *testing.T) {
@@ -27,19 +30,19 @@ func TestEntrySort(t *testing.T) {
"\xBB\xDDfile",
"\xFF\xDDfile",
"ab/",
"ab-/",
"ab0/",
"ab1/",
"ab/c",
"ab+/c-",
"ab+/c0",
"ab+/c/",
"ab+/c/d",
"ab+/c+/",
"ab+/c+/d",
"ab+/c0/",
"ab+/c/d",
"ab+/c+/d",
"ab+/c0/d",
"ab-/",
"ab-/c",
"ab0/",
"ab1/",
"ab1/c",
"ab1/\xBB\xDDfile",
"ab1/\xFF\xDDfile",
@@ -86,7 +89,7 @@ func TestEntrySort(t *testing.T) {
}
}
func TestEntryList(t *testing.T) {
func TestEntryOrder(t *testing.T) {
testDir := filepath.Join(os.TempDir(), "duplicacy_test")
os.RemoveAll(testDir)
@@ -98,16 +101,16 @@ func TestEntryList(t *testing.T) {
"ab0",
"ab1",
"ab+/",
"ab2/",
"ab3/",
"ab+/c",
"ab+/c+",
"ab+/c1",
"ab+/c-/",
"ab+/c-/d",
"ab+/c0/",
"ab+/c-/d",
"ab+/c0/d",
"ab2/",
"ab2/c",
"ab3/",
"ab3/c",
}
@@ -172,18 +175,24 @@ func TestEntryList(t *testing.T) {
directories = append(directories, CreateEntry("", 0, 0, 0))
entries := make([]*Entry, 0, 4)
entryChannel := make(chan *Entry, 1024)
entries = append(entries, CreateEntry("", 0, 0, 0))
for len(directories) > 0 {
directory := directories[len(directories)-1]
directories = directories[:len(directories)-1]
entries = append(entries, directory)
subdirectories, _, err := ListEntries(testDir, directory.Path, &entries, nil, "", false, false)
subdirectories, _, err := ListEntries(testDir, directory.Path, nil, "", false, entryChannel)
if err != nil {
t.Errorf("ListEntries(%s, %s) returned an error: %s", testDir, directory.Path, err)
}
directories = append(directories, subdirectories...)
}
close(entryChannel)
for entry := range entryChannel {
entries = append(entries, entry)
}
entries = entries[1:]
for _, entry := range entries {
@@ -274,18 +283,25 @@ func TestEntryExcludeByAttribute(t *testing.T) {
directories = append(directories, CreateEntry("", 0, 0, 0))
entries := make([]*Entry, 0, 4)
entryChannel := make(chan *Entry, 1024)
entries = append(entries, CreateEntry("", 0, 0, 0))
for len(directories) > 0 {
directory := directories[len(directories)-1]
directories = directories[:len(directories)-1]
entries = append(entries, directory)
subdirectories, _, err := ListEntries(testDir, directory.Path, &entries, nil, "", false, excludeByAttribute)
subdirectories, _, err := ListEntries(testDir, directory.Path, nil, "", excludeByAttribute, entryChannel)
if err != nil {
t.Errorf("ListEntries(%s, %s) returned an error: %s", testDir, directory.Path, err)
}
directories = append(directories, subdirectories...)
}
close(entryChannel)
for entry := range entryChannel {
entries = append(entries, entry)
}
entries = entries[1:]
for _, entry := range entries {
@@ -327,3 +343,33 @@ func TestEntryExcludeByAttribute(t *testing.T) {
}
}
func TestEntryEncoding(t *testing.T) {
buffer := new(bytes.Buffer)
encoder := msgpack.NewEncoder(buffer)
entry1 := CreateEntry("abcd", 1, 2, 0700)
err := encoder.Encode(entry1)
if err != nil {
t.Errorf("Failed to encode the entry: %v", err)
return
}
t.Logf("msgpack size: %d\n", len(buffer.Bytes()))
decoder := msgpack.NewDecoder(buffer)
description, _ := json.Marshal(entry1)
t.Logf("json size: %d\n", len(description))
var entry2 Entry
err = decoder.Decode(&entry2)
if err != nil {
t.Errorf("Failed to decode the entry: %v", err)
return
}
if entry1.Path != entry2.Path || entry1.Size != entry2.Size || entry1.Time != entry2.Time {
t.Error("Decoded entry is different than the original one")
}
}

574
src/duplicacy_entrylist.go Normal file
View File

@@ -0,0 +1,574 @@
// Copyright (c) Acrosync LLC. All rights reserved.
// Free for personal use and commercial trial
// Commercial use requires per-user licenses available from https://duplicacy.com
package duplicacy
import (
"encoding/hex"
"encoding/binary"
"fmt"
"os"
"io"
"path"
"crypto/sha256"
"crypto/rand"
"sync"
"github.com/vmihailenco/msgpack"
)
// This struct stores information about a file entry that has been modified
type ModifiedEntry struct {
Path string
Size int64
Hash string
}
// EntryList is basically a list of entries, which can be kept in the memory, or serialized to a disk file,
// depending on if maximumInMemoryEntries is reached.
//
// The idea behind the on-disk entry list is that entries are written to a disk file as they are coming in.
// Entries that have been modified and thus need to be uploaded will have their Incomplete bit set (i.e.,
// with a size of -1). When the limit is reached, entries are moved to a disk file but ModifiedEntries and
// UploadedChunks are still kept in memory. When later entries are read from the entry list, incomplete
// entries are back-annotated with info from ModifiedEntries and UploadedChunk* before sending them out.
type EntryList struct {
onDiskFile *os.File // the file to store entries
encoder *msgpack.Encoder // msgpack encoder for entry serialization
entries []*Entry // in-memory entry list
SnapshotID string // the snapshot id
Token string // this unique random token makes sure we read/write
// the same entry list
ModifiedEntries []ModifiedEntry // entries that will be uploaded
UploadedChunkHashes []string // chunks from entries that have been uploaded
UploadedChunkLengths []int // chunk lengths from entries that have been uploaded
uploadedChunkLock sync.Mutex // lock for UploadedChunkHashes and UploadedChunkLengths
PreservedChunkHashes []string // chunks from entries not changed
PreservedChunkLengths []int // chunk lengths from entries not changed
Checksum string // checksum of all entries to detect disk corruption
maximumInMemoryEntries int // max in-memory entries
NumberOfEntries int64 // number of entries (not including directories and links)
cachePath string // the directory for the on-disk file
// These 3 variables are used in entry infomation back-annotation
modifiedEntryIndex int // points to the current modified entry
uploadedChunkIndex int // counter for upload chunks
uploadedChunkOffset int // the start offset for the current modified entry
}
// Create a new entry list
func CreateEntryList(snapshotID string, cachePath string, maximumInMemoryEntries int) (*EntryList, error) {
token := make([]byte, 16)
_, err := rand.Read(token)
if err != nil {
return nil, fmt.Errorf("Failed to create a random token: %v", err)
}
entryList := &EntryList {
SnapshotID: snapshotID,
maximumInMemoryEntries: maximumInMemoryEntries,
cachePath: cachePath,
Token: string(token),
}
return entryList, nil
}
// Create the on-disk entry list file
func (entryList *EntryList)createOnDiskFile() error {
file, err := os.OpenFile(path.Join(entryList.cachePath, "incomplete_files"), os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0600)
if err != nil {
return fmt.Errorf("Failed to create on disk entry list: %v", err)
}
entryList.onDiskFile = file
entryList.encoder = msgpack.NewEncoder(file)
err = entryList.encoder.EncodeString(entryList.Token)
if err != nil {
return fmt.Errorf("Failed to create on disk entry list: %v", err)
}
for _, entry := range entryList.entries {
err = entry.EncodeWithHash(entryList.encoder)
if err != nil {
return err
}
}
return nil
}
// Add an entry to the entry list
func (entryList *EntryList)AddEntry(entry *Entry) error {
if !entry.IsDir() && !entry.IsLink() {
entryList.NumberOfEntries++
}
if !entry.IsComplete() {
if entry.IsDir() || entry.IsLink() {
entry.Size = 0
} else {
modifiedEntry := ModifiedEntry {
Path: entry.Path,
Size: -1,
}
entryList.ModifiedEntries = append(entryList.ModifiedEntries, modifiedEntry)
}
}
if entryList.onDiskFile != nil {
return entry.EncodeWithHash(entryList.encoder)
} else {
entryList.entries = append(entryList.entries, entry)
if entryList.maximumInMemoryEntries >= 0 && len(entryList.entries) > entryList.maximumInMemoryEntries {
err := entryList.createOnDiskFile()
if err != nil {
return err
}
}
}
return nil
}
// Add a preserved chunk that belongs to files that have not been modified
func (entryList *EntryList)AddPreservedChunk(chunkHash string, chunkSize int) {
entryList.PreservedChunkHashes = append(entryList.PreservedChunkHashes, chunkHash)
entryList.PreservedChunkLengths = append(entryList.PreservedChunkLengths, chunkSize)
}
// Add a chunk just uploaded (that belongs to files that have been modified)
func (entryList *EntryList)AddUploadedChunk(chunkIndex int, chunkHash string, chunkSize int) {
entryList.uploadedChunkLock.Lock()
for len(entryList.UploadedChunkHashes) <= chunkIndex {
entryList.UploadedChunkHashes = append(entryList.UploadedChunkHashes, "")
}
for len(entryList.UploadedChunkLengths) <= chunkIndex {
entryList.UploadedChunkLengths = append(entryList.UploadedChunkLengths, 0)
}
entryList.UploadedChunkHashes[chunkIndex] = chunkHash
entryList.UploadedChunkLengths[chunkIndex] = chunkSize
entryList.uploadedChunkLock.Unlock()
}
// Close the on-disk file
func (entryList *EntryList) CloseOnDiskFile() error {
if entryList.onDiskFile == nil {
return nil
}
err := entryList.onDiskFile.Sync()
if err != nil {
return err
}
err = entryList.onDiskFile.Close()
if err != nil {
return err
}
entryList.onDiskFile = nil
return nil
}
// Return the length of the `index`th chunk
func (entryList *EntryList) getChunkLength(index int) int {
if index < len(entryList.PreservedChunkLengths) {
return entryList.PreservedChunkLengths[index]
} else {
return entryList.UploadedChunkLengths[index - len(entryList.PreservedChunkLengths)]
}
}
// Sanity check for each entry
func (entryList *EntryList) checkEntry(entry *Entry) error {
if entry.Size < 0 {
return fmt.Errorf("the file %s hash an invalid size (%d)", entry.Path, entry.Size)
}
if !entry.IsFile() || entry.Size == 0 {
return nil
}
numberOfChunks := len(entryList.PreservedChunkLengths) + len(entryList.UploadedChunkLengths)
if entry.StartChunk < 0 {
return fmt.Errorf("the file %s starts at chunk %d", entry.Path, entry.StartChunk)
}
if entry.EndChunk >= numberOfChunks {
return fmt.Errorf("the file %s ends at chunk %d while the number of chunks is %d",
entry.Path, entry.EndChunk, numberOfChunks)
}
if entry.EndChunk < entry.StartChunk {
return fmt.Errorf("the file %s starts at chunk %d and ends at chunk %d",
entry.Path, entry.StartChunk, entry.EndChunk)
}
if entry.StartOffset >= entryList.getChunkLength(entry.StartChunk) {
return fmt.Errorf("the file %s starts at offset %d of chunk %d with a length of %d",
entry.Path, entry.StartOffset, entry.StartChunk, entryList.getChunkLength(entry.StartChunk))
}
if entry.EndOffset > entryList.getChunkLength(entry.EndChunk) {
return fmt.Errorf("the file %s ends at offset %d of chunk %d with a length of %d",
entry.Path, entry.EndOffset, entry.EndChunk, entryList.getChunkLength(entry.EndChunk))
}
fileSize := int64(0)
for i := entry.StartChunk; i <= entry.EndChunk; i++ {
start := 0
if i == entry.StartChunk {
start = entry.StartOffset
}
end := entryList.getChunkLength(i)
if i == entry.EndChunk {
end = entry.EndOffset
}
fileSize += int64(end - start)
}
if entry.Size != fileSize {
return fmt.Errorf("the file %s has a size of %d but the total size of chunks is %d",
entry.Path, entry.Size, fileSize)
}
return nil
}
// An incomplete entry (with a size of -1) does not have 'startChunk', 'startOffset', 'endChunk', and 'endOffset'. This function
// is to fill in these information before sending the entry out.
func (entryList *EntryList) fillAndSendEntry(entry *Entry, entryOut func(*Entry)error) (skipped bool, err error) {
if entry.IsComplete() {
err := entryList.checkEntry(entry)
if err != nil {
return false, err
}
return false, entryOut(entry)
}
if entryList.modifiedEntryIndex >= len(entryList.ModifiedEntries) {
return false, fmt.Errorf("Unexpected file index %d (%d modified files)", entryList.modifiedEntryIndex, len(entryList.ModifiedEntries))
}
modifiedEntry := &entryList.ModifiedEntries[entryList.modifiedEntryIndex]
entryList.modifiedEntryIndex++
if modifiedEntry.Path != entry.Path {
return false, fmt.Errorf("Unexpected file path %s when expecting %s", modifiedEntry.Path, entry.Path)
}
if modifiedEntry.Size <= 0 {
return true, nil
}
entry.Size = modifiedEntry.Size
entry.Hash = modifiedEntry.Hash
entry.StartChunk = entryList.uploadedChunkIndex + len(entryList.PreservedChunkHashes)
entry.StartOffset = entryList.uploadedChunkOffset
entry.EndChunk = entry.StartChunk
endOffset := int64(entry.StartOffset) + entry.Size
for entryList.uploadedChunkIndex < len(entryList.UploadedChunkLengths) && endOffset > int64(entryList.UploadedChunkLengths[entryList.uploadedChunkIndex]) {
endOffset -= int64(entryList.UploadedChunkLengths[entryList.uploadedChunkIndex])
entry.EndChunk++
entryList.uploadedChunkIndex++
}
if entryList.uploadedChunkIndex >= len(entryList.UploadedChunkLengths) {
return false, fmt.Errorf("File %s has not been completely uploaded", entry.Path)
}
entry.EndOffset = int(endOffset)
entryList.uploadedChunkOffset = entry.EndOffset
if entry.EndOffset == entryList.UploadedChunkLengths[entryList.uploadedChunkIndex] {
entryList.uploadedChunkIndex++
entryList.uploadedChunkOffset = 0
}
err = entryList.checkEntry(entry)
if err != nil {
return false, err
}
return false, entryOut(entry)
}
// Iterate through the entries in this entry list
func (entryList *EntryList) ReadEntries(entryOut func(*Entry)error) (error) {
entryList.modifiedEntryIndex = 0
entryList.uploadedChunkIndex = 0
entryList.uploadedChunkOffset = 0
if entryList.onDiskFile == nil {
for _, entry := range entryList.entries {
skipped, err := entryList.fillAndSendEntry(entry.Copy(), entryOut)
if err != nil {
return err
}
if skipped {
continue
}
}
} else {
_, err := entryList.onDiskFile.Seek(0, os.SEEK_SET)
if err != nil {
return err
}
decoder := msgpack.NewDecoder(entryList.onDiskFile)
_, err = decoder.DecodeString()
if err != nil {
return err
}
for _, err = decoder.PeekCode(); err == nil; _, err = decoder.PeekCode() {
entry, err := DecodeEntryWithHash(decoder)
if err != nil {
return err
}
skipped, err := entryList.fillAndSendEntry(entry, entryOut)
if err != nil {
return err
}
if skipped {
continue
}
}
if err != io.EOF {
return err
}
}
return nil
}
// When saving an incomplete snapshot, the on-disk entry list ('incomplete_files') is renamed to
// 'incomplete_snapshot', and this EntryList struct is saved as 'incomplete_chunks'.
func (entryList *EntryList) SaveIncompleteSnapshot() {
entryList.uploadedChunkLock.Lock()
defer entryList.uploadedChunkLock.Unlock()
if entryList.onDiskFile == nil {
err := entryList.createOnDiskFile()
if err != nil {
LOG_WARN("INCOMPLETE_SAVE", "Failed to create the incomplete snapshot file: %v", err)
return
}
for _, entry := range entryList.entries {
err = entry.EncodeWithHash(entryList.encoder)
if err != nil {
LOG_WARN("INCOMPLETE_SAVE", "Failed to save the entry %s: %v", entry.Path, err)
return
}
}
}
err := entryList.onDiskFile.Close()
if err != nil {
LOG_WARN("INCOMPLETE_SAVE", "Failed to close the on-disk file: %v", err)
return
}
filePath := path.Join(entryList.cachePath, "incomplete_snapshot")
if _, err := os.Stat(filePath); err == nil {
err = os.Remove(filePath)
if err != nil {
LOG_WARN("INCOMPLETE_REMOVE", "Failed to remove previous incomplete snapshot: %v", err)
}
}
err = os.Rename(path.Join(entryList.cachePath, "incomplete_files"), filePath)
if err != nil {
LOG_WARN("INCOMPLETE_SAVE", "Failed to rename the incomplete snapshot file: %v", err)
return
}
chunkFile := path.Join(entryList.cachePath, "incomplete_chunks")
file, err := os.OpenFile(chunkFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0600)
if err != nil {
LOG_WARN("INCOMPLETE_SAVE", "Failed to create the incomplete chunk file: %v", err)
return
}
defer file.Close()
encoder := msgpack.NewEncoder(file)
entryList.Checksum = entryList.CalculateChecksum()
err = encoder.Encode(entryList)
if err != nil {
LOG_WARN("INCOMPLETE_SAVE", "Failed to save the incomplete snapshot: %v", err)
return
}
LOG_INFO("INCOMPLETE_SAVE", "Incomplete snapshot saved to %s", filePath)
}
// Calculate a checksum for this entry list
func (entryList *EntryList) CalculateChecksum() string{
hasher := sha256.New()
for _, s := range entryList.UploadedChunkHashes {
hasher.Write([]byte(s))
}
buffer := make([]byte, 8)
for _, i := range entryList.UploadedChunkLengths {
binary.LittleEndian.PutUint64(buffer, uint64(i))
hasher.Write(buffer)
}
for _, s := range entryList.PreservedChunkHashes {
hasher.Write([]byte(s))
}
for _, i := range entryList.PreservedChunkLengths {
binary.LittleEndian.PutUint64(buffer, uint64(i))
hasher.Write(buffer)
}
for _, entry := range entryList.ModifiedEntries {
binary.LittleEndian.PutUint64(buffer, uint64(entry.Size))
hasher.Write(buffer)
hasher.Write([]byte(entry.Hash))
}
return hex.EncodeToString(hasher.Sum(nil))
}
// Check if all chunks exist in 'chunkCache'
func (entryList *EntryList) CheckChunks(config *Config, chunkCache map[string]bool) bool {
for _, chunkHash := range entryList.UploadedChunkHashes {
chunkID := config.GetChunkIDFromHash(chunkHash)
if _, ok := chunkCache[chunkID]; !ok {
return false
}
}
for _, chunkHash := range entryList.PreservedChunkHashes {
chunkID := config.GetChunkIDFromHash(chunkHash)
if _, ok := chunkCache[chunkID]; !ok {
return false
}
}
return true
}
// Recover the on disk file from 'incomplete_snapshot', and restore the EntryList struct
// from 'incomplete_chunks'
func loadIncompleteSnapshot(snapshotID string, cachePath string) *EntryList {
onDiskFilePath := path.Join(cachePath, "incomplete_snapshot")
entryListFilePath := path.Join(cachePath, "incomplete_chunks")
if _, err := os.Stat(onDiskFilePath); os.IsNotExist(err) {
return nil
}
if _, err := os.Stat(entryListFilePath); os.IsNotExist(err) {
return nil
}
entryList := &EntryList {}
entryListFile, err := os.OpenFile(entryListFilePath, os.O_RDONLY, 0600)
if err != nil {
LOG_WARN("INCOMPLETE_LOAD", "Failed to open the incomplete snapshot: %v", err)
return nil
}
defer entryListFile.Close()
decoder := msgpack.NewDecoder(entryListFile)
err = decoder.Decode(&entryList)
if err != nil {
LOG_WARN("INCOMPLETE_LOAD", "Failed to load the incomplete snapshot: %v", err)
return nil
}
checksum := entryList.CalculateChecksum()
if checksum != entryList.Checksum {
LOG_WARN("INCOMPLETE_LOAD", "Failed to load the incomplete snapshot: checksum mismatched")
return nil
}
onDiskFile, err := os.OpenFile(onDiskFilePath, os.O_RDONLY, 0600)
if err != nil {
LOG_WARN("INCOMPLETE_LOAD", "Failed to open the on disk file for the incomplete snapshot: %v", err)
return nil
}
decoder = msgpack.NewDecoder(onDiskFile)
token, err := decoder.DecodeString()
if err != nil {
LOG_WARN("INCOMPLETE_LOAD", "Failed to read the token for the incomplete snapshot: %v", err)
onDiskFile.Close()
return nil
}
if token != entryList.Token {
LOG_WARN("INCOMPLETE_LOAD", "Mismatched tokens in the incomplete snapshot")
onDiskFile.Close()
return nil
}
entryList.onDiskFile = onDiskFile
for i, hash := range entryList.UploadedChunkHashes {
if len(hash) == 0 {
// An empty hash means the chunk has not been uploaded in previous run
entryList.UploadedChunkHashes = entryList.UploadedChunkHashes[0:i]
entryList.UploadedChunkLengths = entryList.UploadedChunkLengths[0:i]
break
}
}
LOG_INFO("INCOMPLETE_LOAD", "Previous incomlete backup contains %d files and %d chunks",
entryList.NumberOfEntries, len(entryList.PreservedChunkLengths) + len(entryList.UploadedChunkHashes))
return entryList
}
// Delete the two incomplete files.
func deleteIncompleteSnapshot(cachePath string) {
for _, file := range []string{"incomplete_snapshot", "incomplete_chunks"} {
filePath := path.Join(cachePath, file)
if _, err := os.Stat(filePath); err == nil {
err = os.Remove(filePath)
if err != nil {
LOG_WARN("INCOMPLETE_REMOVE", "Failed to remove the incomplete snapshot: %v", err)
return
}
}
}
}

View File

@@ -0,0 +1,179 @@
// Copyright (c) Acrosync LLC. All rights reserved.
// Free for personal use and commercial trial
// Commercial use requires per-user licenses available from https://duplicacy.com
package duplicacy
import (
"os"
"path"
"time"
"testing"
"math/rand"
)
func generateRandomString(length int) string {
var letters = []rune("abcdefghijklmnopqrstuvwxyz")
b := make([]rune, length)
for i := range b {
b[i] = letters[rand.Intn(len(letters))]
}
return string(b)
}
var fileSizeGenerator = rand.NewZipf(rand.New(rand.NewSource(time.Now().UnixNano())), 1.2, 1.0, 1024)
func generateRandomFileSize() int64 {
return int64(fileSizeGenerator.Uint64() + 1)
}
func generateRandomChunks(totalFileSize int64) (chunks []string, lengths []int) {
totalChunkSize := int64(0)
for totalChunkSize < totalFileSize {
chunks = append(chunks, generateRandomString(64))
chunkSize := int64(1 + (rand.Int() % 64))
if chunkSize + totalChunkSize > totalFileSize {
chunkSize = totalFileSize - totalChunkSize
}
lengths = append(lengths, int(chunkSize))
totalChunkSize += chunkSize
}
return chunks, lengths
}
func getPreservedChunks(entries []*Entry, chunks []string, lengths []int) (preservedChunks []string, preservedChunkLengths []int) {
lastPreservedChunk := -1
for i := range entries {
if entries[i].Size < 0 {
continue
}
delta := entries[i].StartChunk - len(chunks)
if lastPreservedChunk != entries[i].StartChunk {
lastPreservedChunk = entries[i].StartChunk
preservedChunks = append(preservedChunks, chunks[entries[i].StartChunk])
preservedChunkLengths = append(preservedChunkLengths, lengths[entries[i].StartChunk])
delta++
}
for j := entries[i].StartChunk + 1; i <= entries[i].EndChunk; i++ {
preservedChunks = append(preservedChunks, chunks[j])
preservedChunkLengths = append(preservedChunkLengths, lengths[j])
lastPreservedChunk = j
}
}
return
}
func testEntryList(t *testing.T, numberOfEntries int, maximumInMemoryEntries int) {
entries := make([]*Entry, 0, numberOfEntries)
entrySizes := make([]int64, 0)
for i := 0; i < numberOfEntries; i++ {
entry:= CreateEntry(generateRandomString(16), -1, 0, 0700)
entries = append(entries, entry)
entrySizes = append(entrySizes, generateRandomFileSize())
}
totalFileSize := int64(0)
for _, size := range entrySizes {
totalFileSize += size
}
testDir := path.Join(os.TempDir(), "duplicacy_test")
os.RemoveAll(testDir)
os.MkdirAll(testDir, 0700)
os.MkdirAll(testDir + "/list1", 0700)
os.MkdirAll(testDir + "/list2", 0700)
os.MkdirAll(testDir + "/list3", 0700)
os.MkdirAll(testDir + "/list1", 0700)
// For the first entry list, all entries are new
entryList, _ := CreateEntryList("test", testDir + "/list1", maximumInMemoryEntries)
for _, entry := range entries {
entryList.AddEntry(entry)
}
uploadedChunks, uploadedChunksLengths := generateRandomChunks(totalFileSize)
for i, chunk := range uploadedChunks {
entryList.AddUploadedChunk(i, chunk, uploadedChunksLengths[i])
}
for i := range entryList.ModifiedEntries {
entryList.ModifiedEntries[i].Size = entrySizes[i]
}
totalEntries := 0
err := entryList.ReadEntries(func(entry *Entry) error {
totalEntries++
return nil
})
if err != nil {
t.Errorf("ReadEntries returned an error: %s", err)
return
}
if totalEntries != numberOfEntries {
t.Errorf("EntryList contains %d entries instead of %d", totalEntries, numberOfEntries)
return
}
// For the second entry list, half of the entries are new
for i := range entries {
if rand.Int() % 1 == 0 {
entries[i].Size = -1
} else {
entries[i].Size = entrySizes[i]
}
}
preservedChunks, preservedChunkLengths := getPreservedChunks(entries, uploadedChunks, uploadedChunksLengths)
entryList, _ = CreateEntryList("test", testDir + "/list2", maximumInMemoryEntries)
for _, entry := range entries {
entryList.AddEntry(entry)
}
for i, chunk := range preservedChunks {
entryList.AddPreservedChunk(chunk, preservedChunkLengths[i])
}
totalFileSize = 0
for i := range entryList.ModifiedEntries {
fileSize := generateRandomFileSize()
entryList.ModifiedEntries[i].Size = fileSize
totalFileSize += fileSize
}
uploadedChunks, uploadedChunksLengths = generateRandomChunks(totalFileSize)
for i, chunk := range uploadedChunks {
entryList.AddUploadedChunk(i, chunk, uploadedChunksLengths[i])
}
totalEntries = 0
err = entryList.ReadEntries(func(entry *Entry) error {
totalEntries++
return nil
})
if err != nil {
t.Errorf("ReadEntries returned an error: %s", err)
return
}
if totalEntries != numberOfEntries {
t.Errorf("EntryList contains %d entries instead of %d", totalEntries, numberOfEntries)
return
}
}
func TestEntryList(t *testing.T) {
testEntryList(t, 1024, 1024)
testEntryList(t, 1024, 512)
testEntryList(t, 1024, 0)
}

View File

@@ -8,17 +8,22 @@ import (
"encoding/hex"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"os"
"path"
"path/filepath"
"strconv"
"strings"
"time"
"sort"
"bytes"
"github.com/vmihailenco/msgpack"
)
// Snapshot represents a backup of the repository.
type Snapshot struct {
Version int
ID string // the snapshot id; must be different for different repositories
Revision int // the revision number
Options string // options used to create this snapshot (some not included)
@@ -37,14 +42,11 @@ type Snapshot struct {
// A sequence of chunks whose aggregated content is the json representation of 'ChunkLengths'.
LengthSequence []string
Files []*Entry // list of files and subdirectories
ChunkHashes []string // a sequence of chunks representing the file content
ChunkLengths []int // the length of each chunk
Flag bool // used to mark certain snapshots for deletion or copy
discardAttributes bool
}
// CreateEmptySnapshot creates an empty snapshot.
@@ -56,16 +58,14 @@ func CreateEmptySnapshot(id string) (snapshto *Snapshot) {
}
}
// CreateSnapshotFromDirectory creates a snapshot from the local directory 'top'. Only 'Files'
// will be constructed, while 'ChunkHashes' and 'ChunkLengths' can only be populated after uploading.
func CreateSnapshotFromDirectory(id string, top string, nobackupFile string, filtersFile string, excludeByAttribute bool) (snapshot *Snapshot, skippedDirectories []string,
skippedFiles []string, err error) {
type DirectoryListing struct {
directory string
files *[]Entry
}
snapshot = &Snapshot{
ID: id,
Revision: 0,
StartTime: time.Now().Unix(),
}
func (snapshot *Snapshot) ListLocalFiles(top string, nobackupFile string,
filtersFile string, excludeByAttribute bool, listingChannel chan *Entry,
skippedDirectories *[]string, skippedFiles *[]string) {
var patterns []string
@@ -77,45 +77,128 @@ func CreateSnapshotFromDirectory(id string, top string, nobackupFile string, fil
directories := make([]*Entry, 0, 256)
directories = append(directories, CreateEntry("", 0, 0, 0))
snapshot.Files = make([]*Entry, 0, 256)
attributeThreshold := 1024 * 1024
if attributeThresholdValue, found := os.LookupEnv("DUPLICACY_ATTRIBUTE_THRESHOLD"); found && attributeThresholdValue != "" {
attributeThreshold, _ = strconv.Atoi(attributeThresholdValue)
}
for len(directories) > 0 {
directory := directories[len(directories)-1]
directories = directories[:len(directories)-1]
snapshot.Files = append(snapshot.Files, directory)
subdirectories, skipped, err := ListEntries(top, directory.Path, &snapshot.Files, patterns, nobackupFile, snapshot.discardAttributes, excludeByAttribute)
subdirectories, skipped, err := ListEntries(top, directory.Path, patterns, nobackupFile, excludeByAttribute, listingChannel)
if err != nil {
if directory.Path == "" {
LOG_ERROR("LIST_FAILURE", "Failed to list the repository root: %v", err)
return nil, nil, nil, err
return
}
LOG_WARN("LIST_FAILURE", "Failed to list subdirectory %s: %v", directory.Path, err)
skippedDirectories = append(skippedDirectories, directory.Path)
if skippedDirectories != nil {
*skippedDirectories = append(*skippedDirectories, directory.Path)
}
continue
}
directories = append(directories, subdirectories...)
skippedFiles = append(skippedFiles, skipped...)
if !snapshot.discardAttributes && len(snapshot.Files) > attributeThreshold {
LOG_INFO("LIST_ATTRIBUTES", "Discarding file attributes")
snapshot.discardAttributes = true
for _, file := range snapshot.Files {
file.Attributes = nil
}
if skippedFiles != nil {
*skippedFiles = append(*skippedFiles, skipped...)
}
}
close(listingChannel)
}
func (snapshot *Snapshot)ListRemoteFiles(config *Config, chunkOperator *ChunkOperator, entryOut func(*Entry) bool) {
var chunks []string
for _, chunkHash := range snapshot.FileSequence {
chunks = append(chunks, chunkOperator.config.GetChunkIDFromHash(chunkHash))
}
// Remove the root entry
snapshot.Files = snapshot.Files[1:]
var chunk *Chunk
reader := sequenceReader{
sequence: snapshot.FileSequence,
buffer: new(bytes.Buffer),
refillFunc: func(chunkHash string) []byte {
if chunk != nil {
config.PutChunk(chunk)
}
chunk = chunkOperator.Download(chunkHash, 0, true)
return chunk.GetBytes()
},
}
if snapshot.Version == 0 {
LOG_INFO("SNAPSHOT_VERSION", "snapshot %s at revision %d is encoded in an old version format", snapshot.ID, snapshot.Revision)
files := make([]*Entry, 0)
decoder := json.NewDecoder(&reader)
// read open bracket
_, err := decoder.Token()
if err != nil {
LOG_ERROR("SNAPSHOT_PARSE", "Failed to open the snapshot %s at revision %d: not a list of entries",
snapshot.ID, snapshot.Revision)
return
}
for decoder.More() {
var entry Entry
err = decoder.Decode(&entry)
if err != nil {
LOG_ERROR("SNAPSHOT_PARSE", "Failed to load files specified in the snapshot %s at revision %d: %v",
snapshot.ID, snapshot.Revision, err)
return
}
files = append(files, &entry)
}
sort.Sort(ByName(files))
for _, file := range files {
if !entryOut(file) {
return
}
}
} else if snapshot.Version == 1 {
decoder := msgpack.NewDecoder(&reader)
lastEndChunk := 0
// while the array contains values
for _, err := decoder.PeekCode(); err != io.EOF; _, err = decoder.PeekCode() {
if err != nil {
LOG_ERROR("SNAPSHOT_PARSE", "Failed to parse the snapshot %s at revision %d: %v",
snapshot.ID, snapshot.Revision, err)
return
}
var entry Entry
err = decoder.Decode(&entry)
if err != nil {
LOG_ERROR("SNAPSHOT_PARSE", "Failed to load the snapshot %s at revision %d: %v",
snapshot.ID, snapshot.Revision, err)
return
}
if entry.IsFile() {
entry.StartChunk += lastEndChunk
entry.EndChunk += entry.StartChunk
lastEndChunk = entry.EndChunk
}
err = entry.check(snapshot.ChunkLengths)
if err != nil {
LOG_ERROR("SNAPSHOT_ENTRY", "Failed to load the snapshot %s at revision %d: %v",
snapshot.ID, snapshot.Revision, err)
return
}
if !entryOut(&entry) {
return
}
}
} else {
LOG_ERROR("SNAPSHOT_VERSION", "snapshot %s at revision %d is encoded in unsupported version %d format",
snapshot.ID, snapshot.Revision, snapshot.Version)
return
}
return snapshot, skippedDirectories, skippedFiles, nil
}
func AppendPattern(patterns []string, new_pattern string) (new_patterns []string) {
@@ -215,100 +298,6 @@ func ProcessFilterLines(patternFileLines []string, includedFiles []string) (patt
return patterns
}
// This is the struct used to save/load incomplete snapshots
type IncompleteSnapshot struct {
Files []*Entry
ChunkHashes []string
ChunkLengths []int
}
// LoadIncompleteSnapshot loads the incomplete snapshot if it exists
func LoadIncompleteSnapshot() (snapshot *Snapshot) {
snapshotFile := path.Join(GetDuplicacyPreferencePath(), "incomplete")
description, err := ioutil.ReadFile(snapshotFile)
if err != nil {
LOG_DEBUG("INCOMPLETE_LOCATE", "Failed to locate incomplete snapshot: %v", err)
return nil
}
var incompleteSnapshot IncompleteSnapshot
err = json.Unmarshal(description, &incompleteSnapshot)
if err != nil {
LOG_DEBUG("INCOMPLETE_PARSE", "Failed to parse incomplete snapshot: %v", err)
return nil
}
var chunkHashes []string
for _, chunkHash := range incompleteSnapshot.ChunkHashes {
hash, err := hex.DecodeString(chunkHash)
if err != nil {
LOG_DEBUG("INCOMPLETE_DECODE", "Failed to decode incomplete snapshot: %v", err)
return nil
}
chunkHashes = append(chunkHashes, string(hash))
}
snapshot = &Snapshot{
Files: incompleteSnapshot.Files,
ChunkHashes: chunkHashes,
ChunkLengths: incompleteSnapshot.ChunkLengths,
}
LOG_INFO("INCOMPLETE_LOAD", "Incomplete snapshot loaded from %s", snapshotFile)
return snapshot
}
// SaveIncompleteSnapshot saves the incomplete snapshot under the preference directory
func SaveIncompleteSnapshot(snapshot *Snapshot) {
var files []*Entry
for _, file := range snapshot.Files {
// All unprocessed files will have a size of -1
if file.Size >= 0 {
file.Attributes = nil
files = append(files, file)
} else {
break
}
}
var chunkHashes []string
for _, chunkHash := range snapshot.ChunkHashes {
chunkHashes = append(chunkHashes, hex.EncodeToString([]byte(chunkHash)))
}
incompleteSnapshot := IncompleteSnapshot{
Files: files,
ChunkHashes: chunkHashes,
ChunkLengths: snapshot.ChunkLengths,
}
description, err := json.MarshalIndent(incompleteSnapshot, "", " ")
if err != nil {
LOG_WARN("INCOMPLETE_ENCODE", "Failed to encode the incomplete snapshot: %v", err)
return
}
snapshotFile := path.Join(GetDuplicacyPreferencePath(), "incomplete")
err = ioutil.WriteFile(snapshotFile, description, 0644)
if err != nil {
LOG_WARN("INCOMPLETE_WRITE", "Failed to save the incomplete snapshot: %v", err)
return
}
LOG_INFO("INCOMPLETE_SAVE", "Incomplete snapshot saved to %s", snapshotFile)
}
func RemoveIncompleteSnapshot() {
snapshotFile := path.Join(GetDuplicacyPreferencePath(), "incomplete")
if stat, err := os.Stat(snapshotFile); err == nil && !stat.IsDir() {
err = os.Remove(snapshotFile)
if err != nil {
LOG_INFO("INCOMPLETE_SAVE", "Failed to remove ncomplete snapshot: %v", err)
} else {
LOG_INFO("INCOMPLETE_SAVE", "Removed incomplete snapshot %s", snapshotFile)
}
}
}
// CreateSnapshotFromDescription creates a snapshot from json decription.
func CreateSnapshotFromDescription(description []byte) (snapshot *Snapshot, err error) {
@@ -321,6 +310,14 @@ func CreateSnapshotFromDescription(description []byte) (snapshot *Snapshot, err
snapshot = &Snapshot{}
if value, ok := root["version"]; !ok {
snapshot.Version = 0
} else if version, ok := value.(float64); !ok {
return nil, fmt.Errorf("Invalid version is specified in the snapshot")
} else {
snapshot.Version = int(version)
}
if value, ok := root["id"]; !ok {
return nil, fmt.Errorf("No id is specified in the snapshot")
} else if snapshot.ID, ok = value.(string); !ok {
@@ -437,6 +434,7 @@ func (snapshot *Snapshot) MarshalJSON() ([]byte, error) {
object := make(map[string]interface{})
object["version"] = 1
object["id"] = snapshot.ID
object["revision"] = snapshot.Revision
object["options"] = snapshot.Options
@@ -458,9 +456,7 @@ func (snapshot *Snapshot) MarshalJSON() ([]byte, error) {
// MarshalSequence creates a json represetion for the specified chunk sequence.
func (snapshot *Snapshot) MarshalSequence(sequenceType string) ([]byte, error) {
if sequenceType == "files" {
return json.Marshal(snapshot.Files)
} else if sequenceType == "chunks" {
if sequenceType == "chunks" {
return json.Marshal(encodeSequence(snapshot.ChunkHashes))
} else {
return json.Marshal(snapshot.ChunkLengths)
@@ -489,3 +485,4 @@ func encodeSequence(sequence []string) []string {
return sequenceInHex
}

View File

@@ -20,6 +20,8 @@ import (
"strings"
"text/tabwriter"
"time"
"sync"
"sync/atomic"
"github.com/aryann/difflib"
)
@@ -189,7 +191,6 @@ type SnapshotManager struct {
fileChunk *Chunk
snapshotCache *FileStorage
chunkDownloader *ChunkDownloader
chunkOperator *ChunkOperator
}
@@ -268,72 +269,26 @@ func (reader *sequenceReader) Read(data []byte) (n int, err error) {
return reader.buffer.Read(data)
}
func (manager *SnapshotManager) CreateChunkDownloader() {
if manager.chunkDownloader == nil {
manager.chunkDownloader = CreateChunkDownloader(manager.config, manager.storage, manager.snapshotCache, false, 1, false)
func (manager *SnapshotManager) CreateChunkOperator(resurrect bool, threads int, allowFailures bool) {
if manager.chunkOperator == nil {
manager.chunkOperator = CreateChunkOperator(manager.config, manager.storage, manager.snapshotCache, resurrect, threads, allowFailures)
}
}
// DownloadSequence returns the content represented by a sequence of chunks.
func (manager *SnapshotManager) DownloadSequence(sequence []string) (content []byte) {
manager.CreateChunkDownloader()
manager.CreateChunkOperator(false, 1, false)
for _, chunkHash := range sequence {
i := manager.chunkDownloader.AddChunk(chunkHash)
chunk := manager.chunkDownloader.WaitForChunk(i)
chunk := manager.chunkOperator.Download(chunkHash, 0, true)
content = append(content, chunk.GetBytes()...)
manager.config.PutChunk(chunk)
}
return content
}
func (manager *SnapshotManager) DownloadSnapshotFileSequence(snapshot *Snapshot, patterns []string, attributesNeeded bool) bool {
manager.CreateChunkDownloader()
reader := sequenceReader{
sequence: snapshot.FileSequence,
buffer: new(bytes.Buffer),
refillFunc: func(chunkHash string) []byte {
i := manager.chunkDownloader.AddChunk(chunkHash)
chunk := manager.chunkDownloader.WaitForChunk(i)
return chunk.GetBytes()
},
}
files := make([]*Entry, 0)
decoder := json.NewDecoder(&reader)
// read open bracket
_, err := decoder.Token()
if err != nil {
LOG_ERROR("SNAPSHOT_PARSE", "Failed to load files specified in the snapshot %s at revision %d: not a list of entries",
snapshot.ID, snapshot.Revision)
return false
}
// while the array contains values
for decoder.More() {
var entry Entry
err = decoder.Decode(&entry)
if err != nil {
LOG_ERROR("SNAPSHOT_PARSE", "Failed to load files specified in the snapshot %s at revision %d: %v",
snapshot.ID, snapshot.Revision, err)
return false
}
// If we don't need the attributes or the file isn't included we clear the attributes to save memory
if !attributesNeeded || (len(patterns) != 0 && !MatchPath(entry.Path, patterns)) {
entry.Attributes = nil
}
files = append(files, &entry)
}
snapshot.Files = files
return true
}
// DownloadSnapshotSequence downloads the content represented by a sequence of chunks, and then unmarshal the content
// using the specified 'loadFunction'. It purpose is to decode the chunk sequences representing chunk hashes or chunk lengths
// using the specified 'loadFunction'. Its purpose is to decode the chunk sequences representing chunk hashes or chunk lengths
// in a snapshot.
func (manager *SnapshotManager) DownloadSnapshotSequence(snapshot *Snapshot, sequenceType string) bool {
@@ -362,30 +317,21 @@ func (manager *SnapshotManager) DownloadSnapshotSequence(snapshot *Snapshot, seq
return true
}
// DownloadSnapshotContents loads all chunk sequences in a snapshot. A snapshot, when just created, only contains
// some metadata and theree sequence representing files, chunk hashes, and chunk lengths. This function must be called
// for the actual content of the snapshot to be usable.
func (manager *SnapshotManager) DownloadSnapshotContents(snapshot *Snapshot, patterns []string, attributesNeeded bool) bool {
// DownloadSnapshotSequences loads all chunk sequences in a snapshot. A snapshot, when just created, only contains
// some metadata and three sequence representing files, chunk hashes, and chunk lengths. This function must be called
// for the chunk hash sequence and chunk length sequence to be usable.
func (manager *SnapshotManager) DownloadSnapshotSequences(snapshot *Snapshot) bool {
manager.DownloadSnapshotFileSequence(snapshot, patterns, attributesNeeded)
manager.DownloadSnapshotSequence(snapshot, "chunks")
manager.DownloadSnapshotSequence(snapshot, "lengths")
err := manager.CheckSnapshot(snapshot)
if err != nil {
LOG_ERROR("SNAPSHOT_CHECK", "The snapshot %s at revision %d contains an error: %v",
snapshot.ID, snapshot.Revision, err)
return false
}
return true
}
// ClearSnapshotContents removes contents loaded by DownloadSnapshotContents
func (manager *SnapshotManager) ClearSnapshotContents(snapshot *Snapshot) {
// ClearSnapshotContents removes sequences loaded by DownloadSnapshotSequences
func (manager *SnapshotManager) ClearSnapshotSequences(snapshot *Snapshot) {
snapshot.ChunkHashes = nil
snapshot.ChunkLengths = nil
snapshot.Files = nil
}
// CleanSnapshotCache removes all files not referenced by the specified 'snapshot' in the snapshot cache.
@@ -577,10 +523,6 @@ func (manager *SnapshotManager) downloadLatestSnapshot(snapshotID string) (remot
remote = manager.DownloadSnapshot(snapshotID, latest)
}
if remote != nil {
manager.DownloadSnapshotContents(remote, nil, false)
}
return remote
}
@@ -712,6 +654,12 @@ func (manager *SnapshotManager) ListSnapshots(snapshotID string, revisionsToList
LOG_DEBUG("LIST_PARAMETERS", "id: %s, revisions: %v, tag: %s, showFiles: %t, showChunks: %t",
snapshotID, revisionsToList, tag, showFiles, showChunks)
manager.CreateChunkOperator(false, 1, false)
defer func() {
manager.chunkOperator.Stop()
manager.chunkOperator = nil
}()
var snapshotIDs []string
var err error
@@ -749,14 +697,16 @@ func (manager *SnapshotManager) ListSnapshots(snapshotID string, revisionsToList
if len(snapshot.Tag) > 0 {
tagWithSpace = snapshot.Tag + " "
}
LOG_INFO("SNAPSHOT_INFO", "Snapshot %s revision %d created at %s %s%s",
snapshotID, revision, creationTime, tagWithSpace, snapshot.Options)
if showFiles {
manager.DownloadSnapshotFileSequence(snapshot, nil, false)
options := snapshot.Options
if snapshot.Version == 0 {
options += " (0)"
}
LOG_INFO("SNAPSHOT_INFO", "Snapshot %s revision %d created at %s %s%s",
snapshotID, revision, creationTime, tagWithSpace, options)
if showFiles {
// We need to fill in ChunkHashes and ChunkLengths to verify that each entry is valid
manager.DownloadSnapshotSequences(snapshot)
if snapshot.NumberOfFiles > 0 {
LOG_INFO("SNAPSHOT_STATS", "Files: %d", snapshot.NumberOfFiles)
@@ -768,7 +718,7 @@ func (manager *SnapshotManager) ListSnapshots(snapshotID string, revisionsToList
totalFileSize := int64(0)
lastChunk := 0
for _, file := range snapshot.Files {
snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func(file *Entry)bool {
if file.IsFile() {
totalFiles++
totalFileSize += file.Size
@@ -780,17 +730,18 @@ func (manager *SnapshotManager) ListSnapshots(snapshotID string, revisionsToList
lastChunk = file.EndChunk
}
}
}
return true
})
for _, file := range snapshot.Files {
snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func(file *Entry)bool {
if file.IsFile() {
LOG_INFO("SNAPSHOT_FILE", "%s", file.String(maxSizeDigits))
}
}
return true
})
metaChunks := len(snapshot.FileSequence) + len(snapshot.ChunkSequence) + len(snapshot.LengthSequence)
LOG_INFO("SNAPSHOT_STATS", "Files: %d, total size: %d, file chunks: %d, metadata chunks: %d",
totalFiles, totalFileSize, lastChunk+1, metaChunks)
LOG_INFO("SNAPSHOT_STATS", "Total size: %d, file chunks: %d, metadata chunks: %d", totalFileSize, lastChunk+1, metaChunks)
}
if showChunks {
@@ -807,11 +758,15 @@ func (manager *SnapshotManager) ListSnapshots(snapshotID string, revisionsToList
}
// ListSnapshots shows the information about a snapshot.
// CheckSnapshots checks if there is any problem with a snapshot.
func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToCheck []int, tag string, showStatistics bool, showTabular bool,
checkFiles bool, checkChunks, searchFossils bool, resurrect bool, threads int, allowFailures bool) bool {
manager.chunkDownloader = CreateChunkDownloader(manager.config, manager.storage, manager.snapshotCache, false, threads, allowFailures)
manager.CreateChunkOperator(resurrect, threads, allowFailures)
defer func() {
manager.chunkOperator.Stop()
manager.chunkOperator = nil
}()
LOG_DEBUG("LIST_PARAMETERS", "id: %s, revisions: %v, tag: %s, showStatistics: %t, showTabular: %t, checkFiles: %t, searchFossils: %t, resurrect: %t",
snapshotID, revisionsToCheck, tag, showStatistics, showTabular, checkFiles, searchFossils, resurrect)
@@ -911,9 +866,9 @@ func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToChe
for _, snapshot := range snapshotMap[snapshotID] {
if checkFiles {
manager.DownloadSnapshotContents(snapshot, nil, false)
manager.DownloadSnapshotSequences(snapshot)
manager.VerifySnapshot(snapshot)
manager.ClearSnapshotContents(snapshot)
manager.ClearSnapshotSequences(snapshot)
continue
}
@@ -1026,6 +981,7 @@ func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToChe
// .duplicacy/cache/storage/verified_chunks. Note that it contains the chunk ids not chunk
// hashes.
verifiedChunks := make(map[string]int64)
var verifiedChunksLock sync.Mutex
verifiedChunksFile := "verified_chunks"
manager.fileChunk.Reset(false)
@@ -1061,16 +1017,11 @@ func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToChe
defer saveVerifiedChunks()
RunAtError = saveVerifiedChunks
manager.chunkDownloader.snapshotCache = nil
LOG_INFO("SNAPSHOT_VERIFY", "Verifying %d chunks", len(*allChunkHashes))
startTime := time.Now()
var chunkHashes []string
// The index of the first chunk to add to the downloader, which may have already downloaded
// some metadata chunks so the index doesn't start with 0.
chunkIndex := -1
skippedChunks := 0
for chunkHash := range *allChunkHashes {
if len(verifiedChunks) > 0 {
@@ -1081,38 +1032,65 @@ func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToChe
}
}
chunkHashes = append(chunkHashes, chunkHash)
if chunkIndex == -1 {
chunkIndex = manager.chunkDownloader.AddChunk(chunkHash)
} else {
manager.chunkDownloader.AddChunk(chunkHash)
}
}
if skippedChunks > 0 {
LOG_INFO("SNAPSHOT_VERIFY", "Skipped %d chunks that have already been verified before", skippedChunks)
}
var downloadedChunkSize int64
totalChunks := len(chunkHashes)
for i := 0; i < totalChunks; i++ {
chunk := manager.chunkDownloader.WaitForChunk(i + chunkIndex)
chunkID := manager.config.GetChunkIDFromHash(chunkHashes[i])
if chunk.isBroken {
continue
}
verifiedChunks[chunkID] = startTime.Unix()
downloadedChunkSize += int64(chunk.GetLength())
var totalDownloadedChunkSize int64
var totalDownloadedChunks int64
totalChunks := int64(len(chunkHashes))
elapsedTime := time.Now().Sub(startTime).Seconds()
speed := int64(float64(downloadedChunkSize) / elapsedTime)
remainingTime := int64(float64(totalChunks - i - 1) / float64(i + 1) * elapsedTime)
percentage := float64(i + 1) / float64(totalChunks) * 100.0
LOG_INFO("VERIFY_PROGRESS", "Verified chunk %s (%d/%d), %sB/s %s %.1f%%",
chunkID, i + 1, totalChunks, PrettySize(speed), PrettyTime(remainingTime), percentage)
chunkChannel := make(chan int, threads)
var wg sync.WaitGroup
wg.Add(threads)
for i := 0; i < threads; i++ {
go func() {
defer CatchLogException()
for {
chunkIndex, ok := <- chunkChannel
if !ok {
wg.Done()
return
}
chunk := manager.chunkOperator.Download(chunkHashes[chunkIndex], chunkIndex, false)
if chunk == nil {
continue
}
chunkID := manager.config.GetChunkIDFromHash(chunkHashes[chunkIndex])
verifiedChunksLock.Lock()
verifiedChunks[chunkID] = startTime.Unix()
verifiedChunksLock.Unlock()
downloadedChunkSize := atomic.AddInt64(&totalDownloadedChunkSize, int64(chunk.GetLength()))
downloadedChunks := atomic.AddInt64(&totalDownloadedChunks, 1)
elapsedTime := time.Now().Sub(startTime).Seconds()
speed := int64(float64(downloadedChunkSize) / elapsedTime)
remainingTime := int64(float64(totalChunks - downloadedChunks) / float64(downloadedChunks) * elapsedTime)
percentage := float64(downloadedChunks) / float64(totalChunks) * 100.0
LOG_INFO("VERIFY_PROGRESS", "Verified chunk %s (%d/%d), %sB/s %s %.1f%%",
chunkID, downloadedChunks, totalChunks, PrettySize(speed), PrettyTime(remainingTime), percentage)
manager.config.PutChunk(chunk)
}
} ()
}
if manager.chunkDownloader.NumberOfFailedChunks > 0 {
LOG_ERROR("SNAPSHOT_VERIFY", "%d out of %d chunks are corrupted", manager.chunkDownloader.NumberOfFailedChunks, len(*allChunkHashes))
for chunkIndex := range chunkHashes {
chunkChannel <- chunkIndex
}
close(chunkChannel)
wg.Wait()
manager.chunkOperator.WaitForCompletion()
if manager.chunkOperator.NumberOfFailedChunks > 0 {
LOG_ERROR("SNAPSHOT_VERIFY", "%d out of %d chunks are corrupted", manager.chunkOperator.NumberOfFailedChunks, len(*allChunkHashes))
} else {
LOG_INFO("SNAPSHOT_VERIFY", "All %d chunks have been successfully verified", len(*allChunkHashes))
}
@@ -1280,14 +1258,6 @@ func (manager *SnapshotManager) PrintSnapshot(snapshot *Snapshot) bool {
object["chunks"] = manager.ConvertSequence(snapshot.ChunkHashes)
object["lengths"] = snapshot.ChunkLengths
// By default the json serialization of a file entry contains the path in base64 format. This is
// to convert every file entry into an object which include the path in a more readable format.
var files []map[string]interface{}
for _, file := range snapshot.Files {
files = append(files, file.convertToObject(false))
}
object["files"] = files
description, err := json.MarshalIndent(object, "", " ")
if err != nil {
@@ -1296,8 +1266,24 @@ func (manager *SnapshotManager) PrintSnapshot(snapshot *Snapshot) bool {
return false
}
fmt.Printf("%s\n", string(description))
// Don't print the ending bracket
fmt.Printf("%s", string(description[:len(description) - 2]))
fmt.Printf(",\n \"files\": [\n")
isFirstFile := true
snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func (file *Entry) bool {
fileDescription, _ := json.MarshalIndent(file.convertToObject(false), "", " ")
if isFirstFile {
fmt.Printf("%s", fileDescription)
isFirstFile = false
} else {
fmt.Printf(",\n%s", fileDescription)
}
return true
})
fmt.Printf(" ]\n}\n")
return true
}
@@ -1313,17 +1299,20 @@ func (manager *SnapshotManager) VerifySnapshot(snapshot *Snapshot) bool {
return false
}
files := make([]*Entry, 0, len(snapshot.Files)/2)
for _, file := range snapshot.Files {
files := make([]*Entry, 0)
snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func (file *Entry) bool {
if file.IsFile() && file.Size != 0 {
file.Attributes = nil
files = append(files, file)
}
}
return true
})
sort.Sort(ByChunk(files))
corruptedFiles := 0
var lastChunk *Chunk
for _, file := range files {
if !manager.RetrieveFile(snapshot, file, func([]byte) {}) {
if !manager.RetrieveFile(snapshot, file, &lastChunk, func([]byte) {}) {
corruptedFiles++
}
LOG_TRACE("SNAPSHOT_VERIFY", "%s", file.Path)
@@ -1341,21 +1330,13 @@ func (manager *SnapshotManager) VerifySnapshot(snapshot *Snapshot) bool {
}
// RetrieveFile retrieves the file in the specified snapshot.
func (manager *SnapshotManager) RetrieveFile(snapshot *Snapshot, file *Entry, output func([]byte)) bool {
func (manager *SnapshotManager) RetrieveFile(snapshot *Snapshot, file *Entry, lastChunk **Chunk, output func([]byte)) bool {
if file.Size == 0 {
return true
}
manager.CreateChunkDownloader()
// Temporarily disable the snapshot cache of the download so that downloaded file chunks won't be saved
// to the cache.
snapshotCache := manager.chunkDownloader.snapshotCache
manager.chunkDownloader.snapshotCache = nil
defer func() {
manager.chunkDownloader.snapshotCache = snapshotCache
}()
manager.CreateChunkOperator(false, 1, false)
fileHasher := manager.config.NewFileHasher()
alternateHash := false
@@ -1376,12 +1357,19 @@ func (manager *SnapshotManager) RetrieveFile(snapshot *Snapshot, file *Entry, ou
}
hash := snapshot.ChunkHashes[i]
lastChunk, lastChunkHash := manager.chunkDownloader.GetLastDownloadedChunk()
if lastChunkHash != hash {
i := manager.chunkDownloader.AddChunk(hash)
chunk = manager.chunkDownloader.WaitForChunk(i)
if lastChunk == nil {
chunk = manager.chunkOperator.Download(hash, 0, false)
} else if *lastChunk == nil {
chunk = manager.chunkOperator.Download(hash, 0, false)
*lastChunk = chunk
} else {
chunk = lastChunk
if (*lastChunk).GetHash() == hash {
chunk = *lastChunk
} else {
manager.config.PutChunk(*lastChunk)
chunk = manager.chunkOperator.Download(hash, 0, false)
*lastChunk = chunk
}
}
output(chunk.GetBytes()[start:end])
@@ -1405,10 +1393,18 @@ func (manager *SnapshotManager) RetrieveFile(snapshot *Snapshot, file *Entry, ou
// FindFile returns the file entry that has the given file name.
func (manager *SnapshotManager) FindFile(snapshot *Snapshot, filePath string, suppressError bool) *Entry {
for _, entry := range snapshot.Files {
var found *Entry
snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func (entry *Entry) bool {
if entry.Path == filePath {
return entry
found = entry
return false
}
return true
})
if found != nil {
return found
}
if !suppressError {
@@ -1440,13 +1436,8 @@ func (manager *SnapshotManager) PrintFile(snapshotID string, revision int, path
return false
}
patterns := []string{}
if path != "" {
patterns = []string{path}
}
// If no path is specified, we're printing the snapshot so we need all attributes
if !manager.DownloadSnapshotContents(snapshot, patterns, path == "") {
// If no path is specified, we're printing the snapshot
if !manager.DownloadSnapshotSequences(snapshot) {
return false
}
@@ -1456,7 +1447,7 @@ func (manager *SnapshotManager) PrintFile(snapshotID string, revision int, path
}
file := manager.FindFile(snapshot, path, false)
if !manager.RetrieveFile(snapshot, file, func(chunk []byte) {
if !manager.RetrieveFile(snapshot, file, nil, func(chunk []byte) {
fmt.Printf("%s", chunk)
}) {
LOG_ERROR("SNAPSHOT_RETRIEVE", "File %s is corrupted in snapshot %s at revision %d",
@@ -1474,22 +1465,38 @@ func (manager *SnapshotManager) Diff(top string, snapshotID string, revisions []
LOG_DEBUG("DIFF_PARAMETERS", "top: %s, id: %s, revision: %v, path: %s, compareByHash: %t",
top, snapshotID, revisions, filePath, compareByHash)
manager.CreateChunkOperator(false, 1, false)
defer func() {
manager.chunkOperator.Stop()
manager.chunkOperator = nil
} ()
var leftSnapshot *Snapshot
var rightSnapshot *Snapshot
var err error
leftSnapshotFiles := make([]*Entry, 0, 1024)
rightSnapshotFiles := make([]*Entry, 0, 1024)
// If no or only one revision is specified, use the on-disk version for the right-hand side.
if len(revisions) <= 1 {
// Only scan the repository if filePath is not provided
if len(filePath) == 0 {
rightSnapshot, _, _, err = CreateSnapshotFromDirectory(snapshotID, top, nobackupFile, filtersFile, excludeByAttribute)
if err != nil {
LOG_ERROR("SNAPSHOT_LIST", "Failed to list the directory %s: %v", top, err)
return false
rightSnapshot = CreateEmptySnapshot(snapshotID)
localListingChannel := make(chan *Entry)
go func() {
defer CatchLogException()
rightSnapshot.ListLocalFiles(top, nobackupFile, filtersFile, excludeByAttribute, localListingChannel, nil, nil)
} ()
for entry := range localListingChannel {
entry.Attributes = nil // attributes are not compared
rightSnapshotFiles = append(rightSnapshotFiles, entry)
}
}
} else {
rightSnapshot = manager.DownloadSnapshot(snapshotID, revisions[1])
manager.DownloadSnapshotSequences(rightSnapshot)
}
// If no revision is specified, use the latest revision as the left-hand side.
@@ -1503,15 +1510,11 @@ func (manager *SnapshotManager) Diff(top string, snapshotID string, revisions []
leftSnapshot = manager.DownloadSnapshot(snapshotID, revisions[0])
}
manager.DownloadSnapshotSequences(leftSnapshot)
if len(filePath) > 0 {
manager.DownloadSnapshotContents(leftSnapshot, nil, false)
if rightSnapshot != nil && rightSnapshot.Revision != 0 {
manager.DownloadSnapshotContents(rightSnapshot, nil, false)
}
var leftFile []byte
if !manager.RetrieveFile(leftSnapshot, manager.FindFile(leftSnapshot, filePath, false), func(content []byte) {
if !manager.RetrieveFile(leftSnapshot, manager.FindFile(leftSnapshot, filePath, false), nil, func(content []byte) {
leftFile = append(leftFile, content...)
}) {
LOG_ERROR("SNAPSHOT_DIFF", "File %s is corrupted in snapshot %s at revision %d",
@@ -1521,7 +1524,7 @@ func (manager *SnapshotManager) Diff(top string, snapshotID string, revisions []
var rightFile []byte
if rightSnapshot != nil {
if !manager.RetrieveFile(rightSnapshot, manager.FindFile(rightSnapshot, filePath, false), func(content []byte) {
if !manager.RetrieveFile(rightSnapshot, manager.FindFile(rightSnapshot, filePath, false), nil, func(content []byte) {
rightFile = append(rightFile, content...)
}) {
LOG_ERROR("SNAPSHOT_DIFF", "File %s is corrupted in snapshot %s at revision %d",
@@ -1582,24 +1585,32 @@ func (manager *SnapshotManager) Diff(top string, snapshotID string, revisions []
return true
}
// We only need to decode the 'files' sequence, not 'chunkhashes' or 'chunklengthes'
manager.DownloadSnapshotFileSequence(leftSnapshot, nil, false)
if rightSnapshot != nil && rightSnapshot.Revision != 0 {
manager.DownloadSnapshotFileSequence(rightSnapshot, nil, false)
leftSnapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func(entry *Entry) bool {
entry.Attributes = nil
leftSnapshotFiles = append(leftSnapshotFiles, entry)
return true
})
if rightSnapshot.Revision != 0 {
rightSnapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func(entry *Entry) bool {
entry.Attributes = nil
rightSnapshotFiles = append(rightSnapshotFiles, entry)
return true
})
}
maxSize := int64(9)
maxSizeDigits := 1
// Find the max Size value in order for pretty alignment.
for _, file := range leftSnapshot.Files {
for _, file := range leftSnapshotFiles {
for !file.IsDir() && file.Size > maxSize {
maxSize = maxSize*10 + 9
maxSizeDigits += 1
}
}
for _, file := range rightSnapshot.Files {
for _, file := range rightSnapshotFiles {
for !file.IsDir() && file.Size > maxSize {
maxSize = maxSize*10 + 9
maxSizeDigits += 1
@@ -1609,22 +1620,22 @@ func (manager *SnapshotManager) Diff(top string, snapshotID string, revisions []
buffer := make([]byte, 32*1024)
var i, j int
for i < len(leftSnapshot.Files) || j < len(rightSnapshot.Files) {
for i < len(leftSnapshotFiles) || j < len(rightSnapshotFiles) {
if i >= len(leftSnapshot.Files) {
if rightSnapshot.Files[j].IsFile() {
LOG_INFO("SNAPSHOT_DIFF", "+ %s", rightSnapshot.Files[j].String(maxSizeDigits))
if i >= len(leftSnapshotFiles) {
if rightSnapshotFiles[j].IsFile() {
LOG_INFO("SNAPSHOT_DIFF", "+ %s", rightSnapshotFiles[j].String(maxSizeDigits))
}
j++
} else if j >= len(rightSnapshot.Files) {
if leftSnapshot.Files[i].IsFile() {
LOG_INFO("SNAPSHOT_DIFF", "- %s", leftSnapshot.Files[i].String(maxSizeDigits))
} else if j >= len(rightSnapshotFiles) {
if leftSnapshotFiles[i].IsFile() {
LOG_INFO("SNAPSHOT_DIFF", "- %s", leftSnapshotFiles[i].String(maxSizeDigits))
}
i++
} else {
left := leftSnapshot.Files[i]
right := rightSnapshot.Files[j]
left := leftSnapshotFiles[i]
right := rightSnapshotFiles[j]
if !left.IsFile() {
i++
@@ -1679,6 +1690,12 @@ func (manager *SnapshotManager) ShowHistory(top string, snapshotID string, revis
LOG_DEBUG("HISTORY_PARAMETERS", "top: %s, id: %s, revisions: %v, path: %s, showLocalHash: %t",
top, snapshotID, revisions, filePath, showLocalHash)
manager.CreateChunkOperator(false, 1, false)
defer func() {
manager.chunkOperator.Stop()
manager.chunkOperator = nil
} ()
var err error
if len(revisions) == 0 {
@@ -1693,7 +1710,7 @@ func (manager *SnapshotManager) ShowHistory(top string, snapshotID string, revis
sort.Ints(revisions)
for _, revision := range revisions {
snapshot := manager.DownloadSnapshot(snapshotID, revision)
manager.DownloadSnapshotFileSequence(snapshot, nil, false)
manager.DownloadSnapshotSequences(snapshot)
file := manager.FindFile(snapshot, filePath, true)
if file != nil {
@@ -1801,8 +1818,11 @@ func (manager *SnapshotManager) PruneSnapshots(selfID string, snapshotID string,
LOG_WARN("DELETE_OPTIONS", "Tags or retention policy will be ignored if at least one revision is specified")
}
manager.chunkOperator = CreateChunkOperator(manager.storage, threads)
defer manager.chunkOperator.Stop()
manager.CreateChunkOperator(false, threads, false)
defer func() {
manager.chunkOperator.Stop()
manager.chunkOperator = nil
} ()
prefPath := GetDuplicacyPreferencePath()
logDir := path.Join(prefPath, "logs")
@@ -2184,7 +2204,7 @@ func (manager *SnapshotManager) PruneSnapshots(selfID string, snapshotID string,
return false
}
manager.chunkOperator.Stop()
manager.chunkOperator.WaitForCompletion()
for _, fossil := range manager.chunkOperator.fossils {
collection.AddFossil(fossil)
}
@@ -2265,6 +2285,7 @@ func (manager *SnapshotManager) PruneSnapshots(selfID string, snapshotID string,
} else {
manager.CleanSnapshotCache(nil, allSnapshots)
}
manager.chunkOperator.WaitForCompletion()
return true
}
@@ -2477,8 +2498,6 @@ func (manager *SnapshotManager) pruneSnapshotsExhaustive(referencedFossils map[s
// CheckSnapshot performs sanity checks on the given snapshot.
func (manager *SnapshotManager) CheckSnapshot(snapshot *Snapshot) (err error) {
lastChunk := 0
lastOffset := 0
var lastEntry *Entry
numberOfChunks := len(snapshot.ChunkHashes)
@@ -2488,57 +2507,39 @@ func (manager *SnapshotManager) CheckSnapshot(snapshot *Snapshot) (err error) {
numberOfChunks, len(snapshot.ChunkLengths))
}
entries := make([]*Entry, len(snapshot.Files))
copy(entries, snapshot.Files)
sort.Sort(ByChunk(entries))
snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func (entry *Entry) bool {
for _, entry := range snapshot.Files {
if lastEntry != nil && lastEntry.Compare(entry) >= 0 && !strings.Contains(lastEntry.Path, "\ufffd") {
return fmt.Errorf("The entry %s appears before the entry %s", lastEntry.Path, entry.Path)
err = fmt.Errorf("The entry %s appears before the entry %s", lastEntry.Path, entry.Path)
return false
}
lastEntry = entry
}
for _, entry := range entries {
if !entry.IsFile() || entry.Size == 0 {
continue
return true
}
if entry.StartChunk < 0 {
return fmt.Errorf("The file %s starts at chunk %d", entry.Path, entry.StartChunk)
err = fmt.Errorf("The file %s starts at chunk %d", entry.Path, entry.StartChunk)
return false
}
if entry.EndChunk >= numberOfChunks {
return fmt.Errorf("The file %s ends at chunk %d while the number of chunks is %d",
err = fmt.Errorf("The file %s ends at chunk %d while the number of chunks is %d",
entry.Path, entry.EndChunk, numberOfChunks)
return false
}
if entry.EndChunk < entry.StartChunk {
return fmt.Errorf("The file %s starts at chunk %d and ends at chunk %d",
fmt.Errorf("The file %s starts at chunk %d and ends at chunk %d",
entry.Path, entry.StartChunk, entry.EndChunk)
return false
}
if entry.StartOffset > 0 {
if entry.StartChunk < lastChunk {
return fmt.Errorf("The file %s starts at chunk %d while the last chunk is %d",
entry.Path, entry.StartChunk, lastChunk)
}
if entry.StartChunk > lastChunk+1 {
return fmt.Errorf("The file %s starts at chunk %d while the last chunk is %d",
entry.Path, entry.StartChunk, lastChunk)
}
if entry.StartChunk == lastChunk && entry.StartOffset < lastOffset {
return fmt.Errorf("The file %s starts at offset %d of chunk %d while the last file ends at offset %d",
entry.Path, entry.StartOffset, entry.StartChunk, lastOffset)
}
if entry.StartChunk == entry.EndChunk && entry.StartOffset > entry.EndOffset {
return fmt.Errorf("The file %s starts at offset %d and ends at offset %d of the same chunk %d",
entry.Path, entry.StartOffset, entry.EndOffset, entry.StartChunk)
}
if entry.StartChunk == entry.EndChunk && entry.StartOffset > entry.EndOffset {
err = fmt.Errorf("The file %s starts at offset %d and ends at offset %d of the same chunk %d",
entry.Path, entry.StartOffset, entry.EndOffset, entry.StartChunk)
return false
}
fileSize := int64(0)
@@ -2558,22 +2559,13 @@ func (manager *SnapshotManager) CheckSnapshot(snapshot *Snapshot) (err error) {
}
if entry.Size != fileSize {
return fmt.Errorf("The file %s has a size of %d but the total size of chunks is %d",
err = fmt.Errorf("The file %s has a size of %d but the total size of chunks is %d",
entry.Path, entry.Size, fileSize)
return false
}
lastChunk = entry.EndChunk
lastOffset = entry.EndOffset
}
if len(entries) > 0 && entries[0].StartChunk != 0 {
return fmt.Errorf("The first file starts at chunk %d", entries[0].StartChunk)
}
// There may be a last chunk whose size is 0 so we allow this to happen
if lastChunk < numberOfChunks-2 {
return fmt.Errorf("The last file ends at chunk %d but the number of chunks is %d", lastChunk, numberOfChunks)
}
return true
})
return nil
}

View File

@@ -116,19 +116,18 @@ func createTestSnapshotManager(testDir string) *SnapshotManager {
func uploadTestChunk(manager *SnapshotManager, content []byte) string {
completionFunc := func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) {
chunkOperator := CreateChunkOperator(manager.config, manager.storage, nil, false, testThreads, false)
chunkOperator.UploadCompletionFunc = func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) {
LOG_INFO("UPLOAD_CHUNK", "Chunk %s size %d uploaded", chunk.GetID(), chunkSize)
}
chunkUploader := CreateChunkUploader(manager.config, manager.storage, nil, testThreads, nil)
chunkUploader.completionFunc = completionFunc
chunkUploader.Start()
chunk := CreateChunk(manager.config, true)
chunk.Reset(true)
chunk.Write(content)
chunkUploader.StartChunk(chunk, 0)
chunkUploader.Stop()
chunkOperator.Upload(chunk, 0, false)
chunkOperator.WaitForCompletion()
chunkOperator.Stop()
return chunk.GetHash()
}
@@ -180,6 +179,12 @@ func createTestSnapshot(manager *SnapshotManager, snapshotID string, revision in
func checkTestSnapshots(manager *SnapshotManager, expectedSnapshots int, expectedFossils int) {
manager.CreateChunkOperator(false, 1, false)
defer func() {
manager.chunkOperator.Stop()
manager.chunkOperator = nil
}()
var snapshotIDs []string
var err error

View File

@@ -14,6 +14,7 @@ import (
"strconv"
"strings"
"time"
"runtime"
"github.com/gilbertchen/gopass"
"golang.org/x/crypto/pbkdf2"
@@ -460,3 +461,16 @@ func AtoSize(sizeString string) int {
return size
}
func PrintMemoryUsage() {
for {
var m runtime.MemStats
runtime.ReadMemStats(&m)
LOG_INFO("MEMORY_STATS", "Currently allocated: %s, total allocated: %s, system memory: %s, number of GCs: %d",
PrettySize(int64(m.Alloc)), PrettySize(int64(m.TotalAlloc)), PrettySize(int64(m.Sys)), m.NumGC)
time.Sleep(time.Second)
}
}

View File

@@ -52,11 +52,11 @@ func (entry *Entry) ReadAttributes(top string) {
fullPath := filepath.Join(top, entry.Path)
attributes, _ := xattr.List(fullPath)
if len(attributes) > 0 {
entry.Attributes = make(map[string][]byte)
entry.Attributes = &map[string][]byte{}
for _, name := range attributes {
attribute, err := xattr.Get(fullPath, name)
if err == nil {
entry.Attributes[name] = attribute
(*entry.Attributes)[name] = attribute
}
}
}
@@ -68,19 +68,19 @@ func (entry *Entry) SetAttributesToFile(fullPath string) {
for _, name := range names {
newAttribute, found := entry.Attributes[name]
newAttribute, found := (*entry.Attributes)[name]
if found {
oldAttribute, _ := xattr.Get(fullPath, name)
if !bytes.Equal(oldAttribute, newAttribute) {
xattr.Set(fullPath, name, newAttribute)
}
delete(entry.Attributes, name)
delete(*entry.Attributes, name)
} else {
xattr.Remove(fullPath, name)
}
}
for name, attribute := range entry.Attributes {
for name, attribute := range *entry.Attributes {
xattr.Set(fullPath, name, attribute)
}