diff --git a/duplicacy_acdclient.go b/duplicacy_acdclient.go new file mode 100644 index 0000000..49e7056 --- /dev/null +++ b/duplicacy_acdclient.go @@ -0,0 +1,452 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "fmt" + "time" + "bytes" + "sync" + "io/ioutil" + "encoding/json" + "io" + "net/http" + "mime/multipart" + "math/rand" + + "golang.org/x/oauth2" +) + +type ACDError struct { + Status int + Message string `json:"message"` +} + +func (err ACDError) Error() string { + return fmt.Sprintf("%d %s", err.Status, err.Message) +} + +var ACDRefreshTokenURL = "https://duplicacy.com/acd_refresh" +type ACDClient struct { + HTTPClient *http.Client + + TokenFile string + Token *oauth2.Token + TokenLock *sync.Mutex + + ContentURL string + MetadataURL string + + TestMode bool +} + + +func NewACDClient(tokenFile string) (*ACDClient, error) { + + description, err := ioutil.ReadFile(tokenFile) + if err != nil { + return nil, err + } + + token := new(oauth2.Token) + if err := json.Unmarshal(description, token); err != nil { + return nil, err + } + + client := &ACDClient{ + HTTPClient: http.DefaultClient, + TokenFile: tokenFile, + Token: token, + TokenLock: &sync.Mutex{}, + } + + client.GetEndpoint() + + return client, nil +} + +func (client *ACDClient) call(url string, method string, input interface{}, contentType string) (io.ReadCloser, int64, error) { + + LOG_DEBUG("ACD_CALL", "Calling %s", url) + + var response *http.Response + + backoff := 1 + for i := 0; i < 8; i++ { + var inputReader io.Reader + + switch input.(type) { + default: + jsonInput, err := json.Marshal(input) + if err != nil { + return nil, 0, err + } + inputReader = bytes.NewReader(jsonInput) + case []byte: + inputReader = bytes.NewReader(input.([]byte)) + case int: + inputReader = bytes.NewReader([]byte("")) + case *bytes.Buffer: + inputReader = bytes.NewReader(input.(*bytes.Buffer).Bytes()) + case *RateLimitedReader: + input.(*RateLimitedReader).Reset() + inputReader = input.(*RateLimitedReader) + } + + request, err := http.NewRequest(method, url, inputReader) + if err != nil { + return nil, 0, err + } + + if reader, ok := inputReader.(*RateLimitedReader); ok { + request.ContentLength = reader.Length() + } + + if url != ACDRefreshTokenURL { + client.TokenLock.Lock() + request.Header.Set("Authorization", "Bearer " + client.Token.AccessToken) + client.TokenLock.Unlock() + } + if contentType != "" { + request.Header.Set("Content-Type", contentType) + } + + response, err = client.HTTPClient.Do(request) + if err != nil { + return nil, 0, err + } + + if response.StatusCode < 400 { + return response.Body, response.ContentLength, nil + } + + if response.StatusCode == 404 { + buffer := new(bytes.Buffer) + buffer.ReadFrom(response.Body) + response.Body.Close() + return nil, 0, ACDError { Status: response.StatusCode, Message: buffer.String()} + } + + if response.StatusCode == 400 { + defer response.Body.Close() + + e := &ACDError { + Status: response.StatusCode, + } + + if err := json.NewDecoder(response.Body).Decode(e); err == nil { + return nil, 0, e + } else { + return nil, 0, ACDError { Status: response.StatusCode, Message: "Bad input parameter"} + } + } + + response.Body.Close() + + if response.StatusCode == 401 { + + if url == ACDRefreshTokenURL { + return nil, 0, ACDError { Status: response.StatusCode, Message: "Unauthorized"} + } + + err = client.RefreshToken() + if err != nil { + return nil, 0, err + } + + continue + } else if response.StatusCode == 403 { + return nil, 0, ACDError { Status: response.StatusCode, Message: "Forbidden"} + } else if response.StatusCode == 404 { + return nil, 0, ACDError { Status: response.StatusCode, Message: "Resource not found"} + } else if response.StatusCode == 409 { + return nil, 0, ACDError { Status: response.StatusCode, Message: "Conflict"} + } else if response.StatusCode == 411 { + return nil, 0, ACDError { Status: response.StatusCode, Message: "Length required"} + } else if response.StatusCode == 412 { + return nil, 0, ACDError { Status: response.StatusCode, Message: "Precondition failed"} + } else if response.StatusCode == 429 || response.StatusCode == 500 { + reason := "Too many requests" + if response.StatusCode == 500 { + reason = "Internal server error" + } + retryAfter := time.Duration(rand.Float32() * 1000.0 * float32(backoff)) + LOG_INFO("ACD_RETRY", "%s; retry after %d milliseconds", reason, retryAfter) + time.Sleep(retryAfter * time.Millisecond) + backoff *= 2 + continue + } else if response.StatusCode == 503 { + return nil, 0, ACDError { Status: response.StatusCode, Message: "Service unavailable"} + } else { + return nil, 0, ACDError { Status: response.StatusCode, Message: "Unknown error"} + } + } + + return nil, 0, fmt.Errorf("Maximum number of retries reached") +} + +func (client *ACDClient) RefreshToken() (err error) { + + client.TokenLock.Lock() + defer client.TokenLock.Unlock() + + readCloser, _, err := client.call(ACDRefreshTokenURL, "POST", client.Token, "") + if err != nil { + return err + } + + defer readCloser.Close() + + if err = json.NewDecoder(readCloser).Decode(client.Token); err != nil { + return err + } + + description, err := json.Marshal(client.Token) + if err != nil { + return err + } + + err = ioutil.WriteFile(client.TokenFile, description, 0644) + if err != nil { + return err + } + + return nil +} + +type ACDGetEndpointOutput struct { + CustomerExists bool `json:"customerExists"` + ContentURL string `json:"contentUrl"` + MetadataURL string `json:"metadataUrl"` +} + +func (client *ACDClient) GetEndpoint() (err error) { + + readCloser, _, err := client.call("https://drive.amazonaws.com/drive/v1/account/endpoint", "GET", 0, "") + if err != nil { + return err + } + + defer readCloser.Close() + + output := &ACDGetEndpointOutput {} + + if err = json.NewDecoder(readCloser).Decode(&output); err != nil { + return err + } + + client.ContentURL = output.ContentURL + client.MetadataURL = output.MetadataURL + + return nil +} + +type ACDEntry struct { + Name string `json:"name"` + ID string `json:"id"` + Size int64 `json:"size"` + Kind string `json:"kind"` +} + +type ACDListEntriesOutput struct { + Count int `json:"count"` + NextToken string `json:"nextToken"` + Entries []ACDEntry `json:"data"` +} + +func (client *ACDClient) ListEntries(parentID string, listFiles bool) ([]ACDEntry, error) { + + startToken := "" + + entries := []ACDEntry{} + + for { + + url := client.MetadataURL + "nodes/" + parentID + "/children?filters=" + + if listFiles { + url += "kind:FILE" + } else { + url += "kind:FOLDER" + } + + if startToken != "" { + url += "&startToken=" + startToken + } + + if client.TestMode { + url += "&limit=8" + } + + readCloser, _, err := client.call(url, "GET", 0, "") + if err != nil { + return nil, err + } + + defer readCloser.Close() + + output := &ACDListEntriesOutput {} + + if err = json.NewDecoder(readCloser).Decode(&output); err != nil { + return nil, err + } + + entries = append(entries, output.Entries...) + + startToken = output.NextToken + if startToken == "" { + break + } + } + + return entries, nil +} + +func (client *ACDClient) ListByName(parentID string, name string) (string, bool, int64, error) { + + url := client.MetadataURL + "nodes" + + if parentID == "" { + url += "?filters=Kind:FOLDER+AND+isRoot:true" + } else { + url += "/" + parentID + "/children?filters=name:" + name + } + + readCloser, _, err := client.call(url, "GET", 0, "") + if err != nil { + return "", false, 0, err + } + + defer readCloser.Close() + + output := &ACDListEntriesOutput {} + + if err = json.NewDecoder(readCloser).Decode(&output); err != nil { + return "", false, 0, err + } + + if len(output.Entries) == 0 { + return "", false, 0, nil + } + + return output.Entries[0].ID, output.Entries[0].Kind == "FOLDER", output.Entries[0].Size, nil +} + +func (client *ACDClient) DownloadFile(fileID string) (io.ReadCloser, int64, error) { + + url := client.ContentURL + "nodes/" + fileID + "/content" + + return client.call(url, "GET", 0, "") +} + +func (client *ACDClient) UploadFile(parentID string, name string, content []byte, rateLimit int) (fileID string, err error) { + + url := client.ContentURL + "nodes?suppress=deduplication" + + body := &bytes.Buffer{} + writer := multipart.NewWriter(body) + + metadata := make(map[string]interface{}) + metadata["name"] = name + metadata["kind"] = "FILE" + metadata["parents"] = []string{ parentID } + + metadataJSON, err := json.Marshal(metadata) + if err != nil { + return "", err + } + + err = writer.WriteField("metadata", string(metadataJSON)) + if err != nil { + return "", err + } + + part, err := writer.CreateFormFile("content", name) + if err != nil { + return "", err + } + + _, err = part.Write(content) + if err != nil { + return "", err + } + + writer.Close() + + var input interface{} + input = body + if rateLimit > 0 { + input = CreateRateLimitedReader(body.Bytes(), rateLimit) + } + + readCloser, _, err := client.call(url, "POST", input, writer.FormDataContentType()) + + if err != nil { + return "", err + } + + defer readCloser.Close() + + entry := ACDEntry {} + if err = json.NewDecoder(readCloser).Decode(&entry); err != nil { + return "", err + } + + return entry.ID, nil +} + +func (client *ACDClient) DeleteFile(fileID string) error { + + url := client.MetadataURL + "trash/" + fileID + + readCloser, _, err := client.call(url, "PUT", 0, "") + if err != nil { + return err + } + + readCloser.Close() + return nil +} + +func (client *ACDClient) MoveFile(fileID string, fromParentID string, toParentID string) error { + + url := client.MetadataURL + "nodes/" + toParentID + "/children" + + parameters := make(map[string]string) + parameters["fromParent"] = fromParentID + parameters["childId"] = fileID + + readCloser, _, err := client.call(url, "POST", parameters, "") + if err != nil { + return err + } + + readCloser.Close() + return nil +} + +func (client *ACDClient) CreateDirectory(parentID string, name string) (string, error) { + + url := client.MetadataURL + "nodes" + + parameters := make(map[string]interface{}) + parameters["name"] = name + parameters["kind"] = "FOLDER" + parameters["parents"] = []string {parentID} + + readCloser, _, err := client.call(url, "POST", parameters, "") + if err != nil { + return "", err + } + + defer readCloser.Close() + + entry := ACDEntry {} + if err = json.NewDecoder(readCloser).Decode(&entry); err != nil { + return "", err + } + + return entry.ID, nil +} diff --git a/duplicacy_acdclient_test.go b/duplicacy_acdclient_test.go new file mode 100644 index 0000000..74a08de --- /dev/null +++ b/duplicacy_acdclient_test.go @@ -0,0 +1,153 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "io" + "fmt" + "testing" + "crypto/sha256" + "encoding/hex" + + crypto_rand "crypto/rand" + "math/rand" +) + +func TestACDClient(t *testing.T) { + + acdClient, err := NewACDClient("acd-token.json") + if err != nil { + t.Errorf("Failed to create the ACD client: %v", err) + return + } + + acdClient.TestMode = true + + rootID, _, _, err := acdClient.ListByName("", "") + if err != nil { + t.Errorf("Failed to get the root node: %v", err) + return + } + + if rootID == "" { + t.Errorf("No root node") + return + } + + testID, _, _, err := acdClient.ListByName(rootID, "test") + if err != nil { + t.Errorf("Failed to list the test directory: %v", err) + return + } + if testID == "" { + testID, err = acdClient.CreateDirectory(rootID, "test") + if err != nil { + t.Errorf("Failed to create the test directory: %v", err) + return + } + } + + test1ID, _, _, err := acdClient.ListByName(testID, "test1") + if err != nil { + t.Errorf("Failed to list the test1 directory: %v", err) + return + } + if test1ID == "" { + test1ID, err = acdClient.CreateDirectory(testID, "test1") + if err != nil { + t.Errorf("Failed to create the test1 directory: %v", err) + return + } + } + + test2ID, _, _, err := acdClient.ListByName(testID, "test2") + if err != nil { + t.Errorf("Failed to list the test2 directory: %v", err) + return + } + if test2ID == "" { + test2ID, err = acdClient.CreateDirectory(testID, "test2") + if err != nil { + t.Errorf("Failed to create the test2 directory: %v", err) + return + } + } + + fmt.Printf("test1: %s, test2: %s\n", test1ID, test2ID) + + numberOfFiles := 20 + maxFileSize := 64 * 1024 + + for i := 0; i < numberOfFiles; i++ { + content := make([]byte, rand.Int() % maxFileSize + 1) + _, err = crypto_rand.Read(content) + if err != nil { + t.Errorf("Error generating random content: %v", err) + return + } + + hasher := sha256.New() + hasher.Write(content) + filename := hex.EncodeToString(hasher.Sum(nil)) + + fmt.Printf("file: %s\n", filename) + + _, err = acdClient.UploadFile(test1ID, filename, content, 100) + if err != nil { + /*if e, ok := err.(ACDError); !ok || e.Status != 409 */ { + t.Errorf("Failed to upload the file %s: %v", filename, err) + return + } + } + } + + entries, err := acdClient.ListEntries(test1ID, true) + if err != nil { + t.Errorf("Error list randomly generated files: %v", err) + return + } + + for _, entry := range entries { + err = acdClient.MoveFile(entry.ID, test1ID, test2ID) + if err != nil { + t.Errorf("Failed to move %s: %v", entry.Name, err) + return + } + } + + entries, err = acdClient.ListEntries(test2ID, true) + if err != nil { + t.Errorf("Error list randomly generated files: %v", err) + return + } + + for _, entry := range entries { + readCloser, _, err := acdClient.DownloadFile(entry.ID) + if err != nil { + t.Errorf("Error downloading file %s: %v", entry.Name, err) + return + } + + hasher := sha256.New() + io.Copy(hasher, readCloser) + hash := hex.EncodeToString(hasher.Sum(nil)) + + if hash != entry.Name { + t.Errorf("File %s, hash %s", entry.Name, hash) + } + + readCloser.Close() + } + + for _, entry := range entries { + + err = acdClient.DeleteFile(entry.ID) + if err != nil { + t.Errorf("Failed to delete the file %s: %v", entry.Name, err) + return + } + } + +} diff --git a/duplicacy_acdstorage.go b/duplicacy_acdstorage.go new file mode 100644 index 0000000..4525006 --- /dev/null +++ b/duplicacy_acdstorage.go @@ -0,0 +1,404 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "fmt" + "path" + "strings" + "sync" +) + +type ACDStorage struct { + RateLimitedStorage + + client *ACDClient + idCache map[string]string + idCacheLock *sync.Mutex + numberOfThreads int +} + +// CreateACDStorage creates an ACD storage object. +func CreateACDStorage(tokenFile string, storagePath string, threads int) (storage *ACDStorage, err error) { + + client, err := NewACDClient(tokenFile) + if err != nil { + return nil, err + } + + storage = &ACDStorage { + client: client, + idCache: make(map[string]string), + idCacheLock: &sync.Mutex{}, + numberOfThreads: threads, + } + + storagePathID, _, _, err := storage.getIDFromPath(0, storagePath) + if err != nil { + return nil, err + } + + storage.idCache[""] = storagePathID + + for _, dir := range []string { "chunks", "fossils", "snapshots" } { + dirID, isDir, _, err := client.ListByName(storagePathID, dir) + if err != nil { + return nil, err + } + if dirID == "" { + dirID, err = client.CreateDirectory(storagePathID, dir) + if err != nil { + return nil, err + } + } else if !isDir { + return nil, fmt.Errorf("%s/%s is not a directory", storagePath + "/" + dir) + } + storage.idCache[dir] = dirID + } + + return storage, nil + +} + +func (storage *ACDStorage) getPathID(path string) string { + storage.idCacheLock.Lock() + pathID := storage.idCache[path] + storage.idCacheLock.Unlock() + return pathID +} + +func (storage *ACDStorage) findPathID(path string) (string, bool) { + storage.idCacheLock.Lock() + pathID, ok := storage.idCache[path] + storage.idCacheLock.Unlock() + return pathID, ok +} + +func (storage *ACDStorage) savePathID(path string, pathID string) { + storage.idCacheLock.Lock() + storage.idCache[path] = pathID + storage.idCacheLock.Unlock() +} + +func (storage *ACDStorage) deletePathID(path string) { + storage.idCacheLock.Lock() + delete(storage.idCache, path) + storage.idCacheLock.Unlock() +} + + +func (storage *ACDStorage) convertFilePath(filePath string) (string) { + if strings.HasPrefix(filePath, "chunks/") && strings.HasSuffix(filePath, ".fsl") { + return "fossils/" + filePath[len("chunks/"):len(filePath) - len(".fsl")] + } + return filePath +} + +func (storage *ACDStorage) getIDFromPath(threadIndex int, path string) (fileID string, isDir bool, size int64, err error) { + + parentID, ok := storage.findPathID("") + if !ok { + parentID, isDir, size, err = storage.client.ListByName("", "") + if err != nil { + return "", false, 0, err + } + } + + names := strings.Split(path, "/") + for i, name := range names { + parentID, isDir, _, err = storage.client.ListByName(parentID, name) + if err != nil { + return "", false, 0, err + } + if parentID == "" { + if i == len(names) - 1 { + return "", false, 0, nil + } else { + return "", false, 0, fmt.Errorf("File path '%s' does not exist", path) + } + } + if i != len(names) - 1 && !isDir { + return "", false, 0, fmt.Errorf("Invalid path %s", path) + } + } + + return parentID, isDir, size, err +} + +// ListFiles return the list of files and subdirectories under 'dir' (non-recursively) +func (storage *ACDStorage) ListFiles(threadIndex int, dir string) ([]string, []int64, error) { + var err error + + for len(dir) > 0 && dir[len(dir) - 1] == '/' { + dir = dir[:len(dir) - 1] + } + + if dir == "snapshots" { + + entries, err := storage.client.ListEntries(storage.getPathID(dir), false) + if err != nil { + return nil, nil, err + } + + subDirs := []string{} + + for _, entry := range entries { + storage.savePathID(entry.Name, entry.ID) + subDirs = append(subDirs, entry.Name + "/") + } + return subDirs, nil, nil + } else if strings.HasPrefix(dir, "snapshots/") { + name := dir[len("snapshots/"):] + pathID, ok := storage.findPathID(dir) + if !ok { + pathID, _, _, err = storage.client.ListByName(storage.getPathID("snapshots"), name) + if err != nil { + return nil, nil, err + } + if pathID == "" { + return nil, nil, nil + } + } + + entries, err := storage.client.ListEntries(pathID, true) + if err != nil { + return nil, nil, err + } + + files := []string{} + + for _, entry := range entries { + storage.savePathID(dir + "/" + entry.Name, entry.ID) + files = append(files, entry.Name) + } + return files, nil, nil + } else { + files := []string{} + sizes := []int64{} + for _, parent := range []string {"chunks", "fossils" } { + entries, err := storage.client.ListEntries(storage.getPathID(parent), true) + if err != nil { + return nil, nil, err + } + + + for _, entry := range entries { + name := entry.Name + if parent == "fossils" { + name += ".fsl" + } + + storage.savePathID(parent + "/" + entry.Name, entry.ID) + files = append(files, name) + sizes = append(sizes, entry.Size) + } + } + return files, sizes, nil + } + +} + +// DeleteFile deletes the file or directory at 'filePath'. +func (storage *ACDStorage) DeleteFile(threadIndex int, filePath string) (err error) { + filePath = storage.convertFilePath(filePath) + fileID, ok := storage.findPathID(filePath) + if !ok { + fileID, _, _, err = storage.getIDFromPath(threadIndex, filePath) + if err != nil { + return err + } + if fileID == "" { + LOG_TRACE("ACD_STORAGE", "File %s has disappeared before deletion", filePath) + return nil + } + storage.savePathID(filePath, fileID) + } + + err = storage.client.DeleteFile(fileID) + if e, ok := err.(ACDError); ok && e.Status == 409 { + LOG_DEBUG("ACD_DELETE", "Ignore 409 conflict error") + return nil + } + return err +} + +// MoveFile renames the file. +func (storage *ACDStorage) MoveFile(threadIndex int, from string, to string) (err error) { + from = storage.convertFilePath(from) + to = storage.convertFilePath(to) + + fileID, ok := storage.findPathID(from) + if !ok { + return fmt.Errorf("Attempting to rename file %s with unknown id", from) + } + + fromParentID := storage.getPathID("chunks") + toParentID := storage.getPathID("fossils") + + if strings.HasPrefix(from, "fossils") { + fromParentID, toParentID = toParentID, fromParentID + } + + err = storage.client.MoveFile(fileID, fromParentID, toParentID) + if err != nil { + if e, ok := err.(ACDError); ok && e.Status == 409 { + LOG_DEBUG("ACD_MOVE", "Ignore 409 conflict error") + } else { + return err + } + } + + storage.savePathID(to, storage.getPathID(from)) + storage.deletePathID(from) + + return nil +} + +// CreateDirectory creates a new directory. +func (storage *ACDStorage) CreateDirectory(threadIndex int, dir string) (err error) { + + for len(dir) > 0 && dir[len(dir) - 1] == '/' { + dir = dir[:len(dir) - 1] + } + + if dir == "chunks" || dir == "snapshots" { + return nil + } + + if strings.HasPrefix(dir, "snapshots/") { + name := dir[len("snapshots/"):] + dirID, err := storage.client.CreateDirectory(storage.getPathID("snapshots"), name) + if err != nil { + if e, ok := err.(ACDError); ok && e.Status == 409 { + return nil + } else { + return err + } + } + storage.savePathID(dir, dirID) + return nil + + } + + return nil +} + +// GetFileInfo returns the information about the file or directory at 'filePath'. +func (storage *ACDStorage) GetFileInfo(threadIndex int, filePath string) (exist bool, isDir bool, size int64, err error) { + + for len(filePath) > 0 && filePath[len(filePath) - 1] == '/' { + filePath = filePath[:len(filePath) - 1] + } + + filePath = storage.convertFilePath(filePath) + fileID := "" + fileID, isDir, size, err = storage.getIDFromPath(threadIndex, filePath) + if err != nil { + return false, false, 0, err + } + if fileID == "" { + return false, false, 0, nil + } + + return true, isDir, size, nil +} + +// FindChunk finds the chunk with the specified id. If 'isFossil' is true, it will search for chunk files with +// the suffix '.fsl'. +func (storage *ACDStorage) FindChunk(threadIndex int, chunkID string, isFossil bool) (filePath string, exist bool, size int64, err error) { + parentID := "" + filePath = "chunks/" + chunkID + realPath := filePath + if isFossil { + parentID = storage.getPathID("fossils") + filePath += ".fsl" + realPath = "fossils/" + chunkID + ".fsl" + } else { + parentID = storage.getPathID("chunks") + } + + fileID := "" + fileID, _, size, err = storage.client.ListByName(parentID, chunkID) + if fileID != "" { + storage.savePathID(realPath, fileID) + } + return filePath, fileID != "", size, err +} + +// DownloadFile reads the file at 'filePath' into the chunk. +func (storage *ACDStorage) DownloadFile(threadIndex int, filePath string, chunk *Chunk) (err error) { + fileID, ok := storage.findPathID(filePath) + if !ok { + fileID, _, _, err = storage.getIDFromPath(threadIndex, filePath) + if err != nil { + return err + } + if fileID == "" { + return fmt.Errorf("File path '%s' does not exist", filePath) + } + storage.savePathID(filePath, fileID) + } + + readCloser, _, err := storage.client.DownloadFile(fileID) + if err != nil { + return err + } + + defer readCloser.Close() + + _, err = RateLimitedCopy(chunk, readCloser, storage.DownloadRateLimit / storage.numberOfThreads) + return err +} + +// UploadFile writes 'content' to the file at 'filePath'. +func (storage *ACDStorage) UploadFile(threadIndex int, filePath string, content []byte) (err error) { + parent := path.Dir(filePath) + + if parent == "." { + parent = "" + } + + parentID, ok := storage.findPathID(parent) + + if !ok { + parentID, _, _, err = storage.getIDFromPath(threadIndex, parent) + if err != nil { + return err + } + if parentID == "" { + return fmt.Errorf("File path '%s' does not exist", parent) + } + storage.savePathID(parent, parentID) + } + + fileID, err := storage.client.UploadFile(parentID, path.Base(filePath), content, storage.UploadRateLimit / storage.numberOfThreads) + if err == nil { + storage.savePathID(filePath, fileID) + return nil + } + + if e, ok := err.(ACDError); ok && e.Status == 409 { + LOG_TRACE("ACD_UPLOAD", "File %s already exists", filePath) + return nil + } else { + return err + } +} + +// If a local snapshot cache is needed for the storage to avoid downloading/uploading chunks too often when +// managing snapshots. +func (storage *ACDStorage) IsCacheNeeded() (bool) { return true } + +// If the 'MoveFile' method is implemented. +func (storage *ACDStorage) IsMoveFileImplemented() (bool) { return true } + +// If the storage can guarantee strong consistency. +func (storage *ACDStorage) IsStrongConsistent() (bool) { return true } + +// If the storage supports fast listing of files names. +func (storage *ACDStorage) IsFastListing() (bool) { return true } + +// Enable the test mode. +func (storage *ACDStorage) EnableTestMode() {} diff --git a/duplicacy_azurestorage.go b/duplicacy_azurestorage.go new file mode 100644 index 0000000..a60cd8b --- /dev/null +++ b/duplicacy_azurestorage.go @@ -0,0 +1,202 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "fmt" + "strings" + + "github.com/gilbertchen/azure-sdk-for-go/storage" +) + +type AzureStorage struct { + RateLimitedStorage + + clients []*storage.BlobStorageClient + container string +} + +func CreateAzureStorage(accountName string, accountKey string, + container string, threads int) (azureStorage *AzureStorage, err error) { + + var clients []*storage.BlobStorageClient + for i := 0; i < threads; i++ { + + client, err := storage.NewBasicClient(accountName, accountKey) + + if err != nil { + return nil, err + } + + blobService := client.GetBlobService() + clients = append(clients, &blobService) + } + + exist, err := clients[0].ContainerExists(container) + if err != nil { + return nil, err + } + + if !exist { + return nil, fmt.Errorf("container %s does not exist", container) + } + + azureStorage = &AzureStorage { + clients: clients, + container: container, + } + + return +} + +// ListFiles return the list of files and subdirectories under 'dir' (non-recursively) +func (azureStorage *AzureStorage) ListFiles(threadIndex int, dir string) (files []string, sizes []int64, err error) { + + type ListBlobsParameters struct { + Prefix string + Delimiter string + Marker string + Include string + MaxResults uint + Timeout uint + } + + if len(dir) > 0 && dir[len(dir) - 1] != '/' { + dir += "/" + } + dirLength := len(dir) + + parameters := storage.ListBlobsParameters { + Prefix: dir, + Delimiter: "", + } + + subDirs := make(map[string]bool) + + for { + + results, err := azureStorage.clients[threadIndex].ListBlobs(azureStorage.container, parameters) + if err != nil { + return nil, nil, err + } + + if dir == "snapshots/" { + for _, blob := range results.Blobs { + name := strings.Split(blob.Name[dirLength:], "/")[0] + subDirs[name + "/"] = true + } + } else { + for _, blob := range results.Blobs { + files = append(files, blob.Name[dirLength:]) + sizes = append(sizes, blob.Properties.ContentLength) + } + } + + if results.NextMarker == "" { + break + } + + parameters.Marker = results.NextMarker + } + + if dir == "snapshots/" { + + for subDir, _ := range subDirs { + files = append(files, subDir) + } + + } + + return files, sizes, nil + +} + +// DeleteFile deletes the file or directory at 'filePath'. +func (storage *AzureStorage) DeleteFile(threadIndex int, filePath string) (err error) { + _, err = storage.clients[threadIndex].DeleteBlobIfExists(storage.container, filePath) + return err +} + +// MoveFile renames the file. +func (storage *AzureStorage) MoveFile(threadIndex int, from string, to string) (err error) { + source := storage.clients[threadIndex].GetBlobURL(storage.container, from) + err = storage.clients[threadIndex].CopyBlob(storage.container, to, source) + if err != nil { + return err + } + return storage.DeleteFile(threadIndex, from) +} + +// CreateDirectory creates a new directory. +func (storage *AzureStorage) CreateDirectory(threadIndex int, dir string) (err error) { + return nil +} + +// GetFileInfo returns the information about the file or directory at 'filePath'. +func (storage *AzureStorage) GetFileInfo(threadIndex int, filePath string) (exist bool, isDir bool, size int64, err error) { + properties, err := storage.clients[threadIndex].GetBlobProperties(storage.container, filePath) + if err != nil { + if strings.Contains(err.Error(), "404") { + return false, false, 0, nil + } else { + return false, false, 0, err + } + } + + return true, false, properties.ContentLength, nil +} + +// FindChunk finds the chunk with the specified id. If 'isFossil' is true, it will search for chunk files with +// the suffix '.fsl'. +func (storage *AzureStorage) FindChunk(threadIndex int, chunkID string, isFossil bool) (filePath string, exist bool, size int64, err error) { + filePath = "chunks/" + chunkID + if isFossil { + filePath += ".fsl" + } + + exist, _, size, err = storage.GetFileInfo(threadIndex, filePath) + + if err != nil { + return "", false, 0, err + } else { + return filePath, exist, size, err + } +} + +// DownloadFile reads the file at 'filePath' into the chunk. +func (storage *AzureStorage) DownloadFile(threadIndex int, filePath string, chunk *Chunk) (err error) { + readCloser, err := storage.clients[threadIndex].GetBlob(storage.container, filePath) + if err != nil { + return err + } + + defer readCloser.Close() + + _, err = RateLimitedCopy(chunk, readCloser, storage.DownloadRateLimit / len(storage.clients)) + return err +} + +// UploadFile writes 'content' to the file at 'filePath'. +func (storage *AzureStorage) UploadFile(threadIndex int, filePath string, content []byte) (err error) { + reader := CreateRateLimitedReader(content, storage.UploadRateLimit / len(storage.clients)) + return storage.clients[threadIndex].CreateBlockBlobFromReader(storage.container, filePath, uint64(len(content)), reader, nil) + +} + +// If a local snapshot cache is needed for the storage to avoid downloading/uploading chunks too often when +// managing snapshots. +func (storage *AzureStorage) IsCacheNeeded() (bool) { return true } + +// If the 'MoveFile' method is implemented. +func (storage *AzureStorage) IsMoveFileImplemented() (bool) { return true } + +// If the storage can guarantee strong consistency. +func (storage *AzureStorage) IsStrongConsistent() (bool) { return true } + +// If the storage supports fast listing of files names. +func (storage *AzureStorage) IsFastListing() (bool) { return true } + +// Enable the test mode. +func (storage *AzureStorage) EnableTestMode() {} diff --git a/duplicacy_b2client.go b/duplicacy_b2client.go new file mode 100644 index 0000000..29f9cd6 --- /dev/null +++ b/duplicacy_b2client.go @@ -0,0 +1,516 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "fmt" + "time" + "bytes" + "strconv" + "io/ioutil" + "encoding/json" + "encoding/base64" + "encoding/hex" + "io" + "net/http" + "strings" + "crypto/sha1" + "math/rand" +) + +type B2Error struct { + Status int + Code string + Message string +} + +func (err *B2Error) Error() string { + return fmt.Sprintf("%d %s", err.Status, err.Message) +} + +type B2UploadArgument struct { + URL string + Token string +} + +var B2AuthorizationURL = "https://api.backblazeb2.com/b2api/v1/b2_authorize_account" + +type B2Client struct { + HTTPClient *http.Client + AccountID string + ApplicationKey string + AuthorizationToken string + APIURL string + DownloadURL string + BucketName string + BucketID string + + UploadURL string + UploadToken string + + TestMode bool + +} + +func NewB2Client(accountID string, applicationKey string) *B2Client { + client := &B2Client{ + HTTPClient: http.DefaultClient, + AccountID: accountID, + ApplicationKey: applicationKey, + } + return client +} + +func (client *B2Client) retry(backoff int, response *http.Response) int { + if response != nil { + if backoffList, found := response.Header["Retry-After"]; found && len(backoffList) > 0 { + retryAfter, _ := strconv.Atoi(backoffList[0]) + if retryAfter >= 1 { + time.Sleep(time.Duration(retryAfter) * time.Second) + return 0 + } + } + } + if backoff == 0 { + backoff = 1 + } else { + backoff *= 2 + } + time.Sleep(time.Duration(backoff) * time.Second) + return backoff +} + +func (client *B2Client) call(url string, input interface{}) (io.ReadCloser, int64, error) { + + var response *http.Response + + backoff := 0 + for i := 0; i < 8; i++ { + var inputReader *bytes.Reader + method := "POST" + + switch input.(type) { + default: + jsonInput, err := json.Marshal(input) + if err != nil { + return nil, 0, err + } + inputReader = bytes.NewReader(jsonInput) + case []byte: + inputReader = bytes.NewReader(input.([]byte)) + case int: + method = "GET" + inputReader = bytes.NewReader([]byte("")) + } + + request, err := http.NewRequest(method, url, inputReader) + if err != nil { + return nil, 0, err + } + + if url == B2AuthorizationURL { + request.Header.Set("Authorization", "Basic " + base64.StdEncoding.EncodeToString([]byte(client.AccountID + ":" + client.ApplicationKey))) + } else { + request.Header.Set("Authorization", client.AuthorizationToken) + } + + if client.TestMode { + r := rand.Float32() + if r < 0.5 { + request.Header.Set("X-Bz-Test-Mode", "expire_some_account_authorization_tokens") + } else { + request.Header.Set("X-Bz-Test-Mode", "force_cap_exceeded") + } + } + + response, err = client.HTTPClient.Do(request) + if err != nil { + if url != B2AuthorizationURL { + LOG_DEBUG("BACKBLAZE_CALL", "URL request '%s' returned an error: %v", url, err) + backoff = client.retry(backoff, response) + continue + } + return nil, 0, err + } + + if response.StatusCode < 400 { + return response.Body, response.ContentLength, nil + } + + LOG_DEBUG("BACKBLAZE_CALL", "URL request '%s' returned status code %d", url, response.StatusCode) + + io.Copy(ioutil.Discard, response.Body) + response.Body.Close() + if response.StatusCode == 401 { + if url == B2AuthorizationURL { + return nil, 0, fmt.Errorf("Authorization failure") + } + client.AuthorizeAccount() + continue + } else if response.StatusCode == 403 { + if !client.TestMode { + return nil, 0, fmt.Errorf("B2 cap exceeded") + } + continue + } else if response.StatusCode == 429 || response.StatusCode == 408 { + backoff = client.retry(backoff, response) + continue + } else if response.StatusCode >= 500 && response.StatusCode <= 599 { + backoff = client.retry(backoff, response) + continue + } + + defer response.Body.Close() + + e := &B2Error { + } + + if err := json.NewDecoder(response.Body).Decode(e); err != nil { + return nil, 0, err + } + + return nil, 0, e + } + + return nil, 0, fmt.Errorf("Maximum backoff reached") +} + +type B2AuthorizeAccountOutput struct { + AccountID string + AuthorizationToken string + APIURL string + DownloadURL string +} + +func (client *B2Client) AuthorizeAccount() (err error) { + + readCloser, _, err := client.call(B2AuthorizationURL, make(map[string]string)) + if err != nil { + return err + } + + defer readCloser.Close() + + output := &B2AuthorizeAccountOutput {} + + if err = json.NewDecoder(readCloser).Decode(&output); err != nil { + return err + } + + client.AuthorizationToken = output.AuthorizationToken + client.APIURL = output.APIURL + client.DownloadURL = output.DownloadURL + + return nil +} + +type ListBucketOutput struct { + AccoundID string + BucketID string + BucketName string + BucketType string +} + +func (client *B2Client) FindBucket(bucketName string) (err error) { + + input := make(map[string]string) + input["accountId"] = client.AccountID + + url := client.APIURL + "/b2api/v1/b2_list_buckets" + + readCloser, _, err := client.call(url, input) + if err != nil { + return err + } + + defer readCloser.Close() + + output := make(map[string][]ListBucketOutput, 0) + + if err = json.NewDecoder(readCloser).Decode(&output); err != nil { + return err + } + + for _, bucket := range output["buckets"] { + if bucket.BucketName == bucketName { + client.BucketName = bucket.BucketName + client.BucketID = bucket.BucketID + break + } + } + + if client.BucketID == "" { + return fmt.Errorf("Bucket %s not found", bucketName) + } + + return nil +} + +type B2Entry struct { + FileID string + FileName string + Action string + Size int64 + UploadTimestamp int64 +} + +type B2ListFileNamesOutput struct { + Files []*B2Entry + NextFileName string + NextFileId string +} + +func (client *B2Client) ListFileNames(startFileName string, singleFile bool, includeVersions bool) (files []*B2Entry, err error) { + + maxFileCount := 1000 + if singleFile { + if includeVersions { + maxFileCount = 4 + if client.TestMode { + maxFileCount = 1 + } + } else { + maxFileCount = 1 + } + } else if client.TestMode { + maxFileCount = 10 + } + + input := make(map[string]interface{}) + input["bucketId"] = client.BucketID + input["startFileName"] = startFileName + input["maxFileCount"] = maxFileCount + + for { + url := client.APIURL + "/b2api/v1/b2_list_file_names" + if includeVersions { + url = client.APIURL + "/b2api/v1/b2_list_file_versions" + } + readCloser, _, err := client.call(url, input) + if err != nil { + return nil, err + } + + defer readCloser.Close() + + output := B2ListFileNamesOutput { + } + + if err = json.NewDecoder(readCloser).Decode(&output); err != nil { + return nil, err + } + + ioutil.ReadAll(readCloser) + + if startFileName == "" { + files = append(files, output.Files...) + } else { + for _, file := range output.Files { + if singleFile { + if file.FileName == startFileName { + files = append(files, file) + if !includeVersions { + output.NextFileName = "" + break + } + } else { + output.NextFileName = "" + break + } + } else { + if strings.HasPrefix(file.FileName, startFileName) { + files = append(files, file) + } else { + output.NextFileName = "" + break + } + } + } + + } + + if len(output.NextFileName) == 0 { + break + } + + input["startFileName"] = output.NextFileName + if includeVersions { + input["startFileId"] = output.NextFileId + } + } + + return files, nil +} + +func (client *B2Client) DeleteFile(fileName string, fileID string) (err error) { + + input := make(map[string]string) + input["fileName"] = fileName + input["fileId"] = fileID + + url := client.APIURL + "/b2api/v1/b2_delete_file_version" + readCloser, _, err := client.call(url, input) + if err != nil { + return err + } + + readCloser.Close() + return nil +} + +type B2HideFileOutput struct { + FileID string +} + +func (client *B2Client) HideFile(fileName string) (fileID string, err error) { + + input := make(map[string]string) + input["bucketId"] = client.BucketID + input["fileName"] = fileName + + url := client.APIURL + "/b2api/v1/b2_hide_file" + readCloser, _, err := client.call(url, input) + if err != nil { + return "", err + } + + defer readCloser.Close() + + output := & B2HideFileOutput {} + + if err = json.NewDecoder(readCloser).Decode(&output); err != nil { + return "", err + } + + readCloser.Close() + return output.FileID, nil +} + +func (client *B2Client) DownloadFile(filePath string) (io.ReadCloser, int64, error) { + + url := client.DownloadURL + "/file/" + client.BucketName + "/" + filePath + + return client.call(url, 0) +} + +type B2GetUploadArgumentOutput struct { + BucketID string + UploadURL string + AuthorizationToken string +} + +func (client *B2Client) getUploadURL() (error) { + input := make(map[string]string) + input["bucketId"] = client.BucketID + + url := client.APIURL + "/b2api/v1/b2_get_upload_url" + readCloser, _, err := client.call(url, input) + if err != nil { + return err + } + + defer readCloser.Close() + + output := & B2GetUploadArgumentOutput {} + + if err = json.NewDecoder(readCloser).Decode(&output); err != nil { + return err + } + + client.UploadURL = output.UploadURL + client.UploadToken = output.AuthorizationToken + + return nil +} + +func (client *B2Client) UploadFile(filePath string, content []byte, rateLimit int) (err error) { + + + hasher := sha1.New() + hasher.Write(content) + hash := hex.EncodeToString(hasher.Sum(nil)) + + headers := make(map[string]string) + headers["X-Bz-File-Name"] = filePath + headers["Content-Type"] = "application/octet-stream" + headers["X-Bz-Content-Sha1"] = hash + + var response *http.Response + + backoff := 0 + for i := 0; i < 8; i++ { + + if client.UploadURL == "" || client.UploadToken == "" { + err = client.getUploadURL() + if err != nil { + return err + } + } + + request, err := http.NewRequest("POST", client.UploadURL, CreateRateLimitedReader(content, rateLimit)) + if err != nil { + return err + } + request.ContentLength = int64(len(content)) + + request.Header.Set("Authorization", client.UploadToken) + request.Header.Set("X-Bz-File-Name", filePath) + request.Header.Set("Content-Type", "application/octet-stream") + request.Header.Set("X-Bz-Content-Sha1", hash) + + for key, value := range headers { + request.Header.Set(key, value) + } + + if client.TestMode { + r := rand.Float32() + if r < 0.8 { + request.Header.Set("X-Bz-Test-Mode", "fail_some_uploads") + } else if r < 0.9 { + request.Header.Set("X-Bz-Test-Mode", "expire_some_account_authorization_tokens") + } else { + request.Header.Set("X-Bz-Test-Mode", "force_cap_exceeded") + } + } + + response, err = client.HTTPClient.Do(request) + if err != nil { + LOG_DEBUG("BACKBLAZE_UPLOAD", "URL request '%s' returned an error: %v", client.UploadURL, err) + backoff = client.retry(backoff, response) + client.UploadURL = "" + client.UploadToken = "" + continue + } + + io.Copy(ioutil.Discard, response.Body) + response.Body.Close() + + if response.StatusCode < 400 { + return nil + } + + LOG_DEBUG("BACKBLAZE_UPLOAD", "URL request '%s' returned status code %d", client.UploadURL, response.StatusCode) + + if response.StatusCode == 401 { + LOG_INFO("BACKBLAZE_UPLOAD", "Re-authorizatoin required") + client.UploadURL = "" + client.UploadToken = "" + continue + } else if response.StatusCode == 403 { + if !client.TestMode { + return fmt.Errorf("B2 cap exceeded") + } + continue + } else { + LOG_INFO("BACKBLAZE_UPLOAD", "URL request '%s' returned status code %d", client.UploadURL, response.StatusCode) + backoff = client.retry(backoff, response) + client.UploadURL = "" + client.UploadToken = "" + } + } + + return fmt.Errorf("Maximum backoff reached") +} + diff --git a/duplicacy_b2client_test.go b/duplicacy_b2client_test.go new file mode 100644 index 0000000..839a523 --- /dev/null +++ b/duplicacy_b2client_test.go @@ -0,0 +1,133 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "testing" + "crypto/sha256" + "encoding/hex" + "encoding/json" + + crypto_rand "crypto/rand" + "math/rand" + "io" + "io/ioutil" +) + +func createB2ClientForTest(t *testing.T) (*B2Client, string) { + config, err := ioutil.ReadFile("test_storage.conf") + if err != nil { + t.Errorf("Failed to read config file: %v", err) + return nil, "" + } + + storages := make(map[string]map[string]string) + + err = json.Unmarshal(config, &storages) + if err != nil { + t.Errorf("Failed to parse config file: %v", err) + return nil, "" + } + + b2, found := storages["b2"] + if !found { + t.Errorf("Failed to find b2 config") + return nil, "" + } + + return NewB2Client(b2["account"], b2["key"]), b2["bucket"] + +} + +func TestB2Client(t *testing.T) { + + b2Client, bucket := createB2ClientForTest(t) + if b2Client == nil { + return + } + + b2Client.TestMode = true + + err := b2Client.AuthorizeAccount() + if err != nil { + t.Errorf("Failed to authorize the b2 account: %v", err) + return + } + + err = b2Client.FindBucket(bucket) + if err != nil { + t.Errorf("Failed to find bucket '%s': %v", bucket, err) + return + } + + testDirectory := "b2client_test/" + + files, err := b2Client.ListFileNames(testDirectory, false, false) + if err != nil { + t.Errorf("Failed to list files: %v", err) + return + } + + for _, file := range files { + err = b2Client.DeleteFile(file.FileName, file.FileID) + if err != nil { + t.Errorf("Failed to delete file '%s': %v", file.FileName, err) + } + } + + maxSize := 10000 + for i := 0; i < 20; i++ { + size := rand.Int() % maxSize + 1 + content := make([]byte, size) + _, err := crypto_rand.Read(content) + if err != nil { + t.Errorf("Error generating random content: %v", err) + return + } + + hash := sha256.Sum256(content) + name := hex.EncodeToString(hash[:]) + + err = b2Client.UploadFile(testDirectory + name, content, 100) + if err != nil { + t.Errorf("Error uploading file '%s': %v", name, err) + return + } + } + + files, err = b2Client.ListFileNames(testDirectory, false, false) + if err != nil { + t.Errorf("Failed to list files: %v", err) + return + } + + for _, file := range files { + + readCloser, _, err := b2Client.DownloadFile(file.FileName) + if err != nil { + t.Errorf("Error downloading file '%s': %v", file.FileName, err) + return + } + + defer readCloser.Close() + + hasher := sha256.New() + _, err = io.Copy(hasher, readCloser) + + hash := hex.EncodeToString(hasher.Sum(nil)) + + if testDirectory + hash != file.FileName { + t.Errorf("File %s has hash %s", file.FileName, hash) + } + + } + + for _, file := range files { + err = b2Client.DeleteFile(file.FileName, file.FileID) + if err != nil { + t.Errorf("Failed to delete file '%s': %v", file.FileName, err) + } + } +} diff --git a/duplicacy_b2storage.go b/duplicacy_b2storage.go new file mode 100644 index 0000000..3a3dfeb --- /dev/null +++ b/duplicacy_b2storage.go @@ -0,0 +1,255 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "strings" +) + +type B2Storage struct { + RateLimitedStorage + + clients []*B2Client +} + +// CreateB2Storage creates a B2 storage object. +func CreateB2Storage(accountID string, applicationKey string, bucket string, threads int) (storage *B2Storage, err error) { + + var clients []*B2Client + + for i := 0; i < threads; i++ { + client := NewB2Client(accountID, applicationKey) + + err = client.AuthorizeAccount() + if err != nil { + return nil, err + } + + err = client.FindBucket(bucket) + if err != nil { + return nil, err + } + + clients = append(clients, client) + } + + storage = &B2Storage { + clients: clients, + } + return storage, nil +} + +// ListFiles return the list of files and subdirectories under 'dir' (non-recursively) +func (storage *B2Storage) ListFiles(threadIndex int, dir string) (files []string, sizes []int64, err error) { + for len(dir) > 0 && dir[len(dir) - 1] == '/' { + dir = dir[:len(dir) - 1] + } + length := len(dir) + 1 + + includeVersions := false + if dir == "chunks" { + includeVersions = true + } + + entries, err := storage.clients[threadIndex].ListFileNames(dir, false, includeVersions) + if err != nil { + return nil, nil, err + } + + if dir == "snapshots" { + + subDirs := make(map[string]bool) + + for _, entry := range entries { + name := entry.FileName[length:] + subDir := strings.Split(name, "/")[0] + subDirs[subDir + "/"] = true + } + + for subDir, _ := range subDirs { + files = append(files, subDir) + } + } else if dir == "chunks" { + lastFile := "" + for _, entry := range entries { + if entry.FileName == lastFile { + continue + } + lastFile = entry.FileName + if entry.Action == "hide" { + files = append(files, entry.FileName[length:] + ".fsl") + } else { + files = append(files, entry.FileName[length:]) + } + sizes = append(sizes, entry.Size) + } + } else { + for _, entry := range entries { + files = append(files, entry.FileName[length:]) + } + } + + return files, sizes, nil +} + +// DeleteFile deletes the file or directory at 'filePath'. +func (storage *B2Storage) DeleteFile(threadIndex int, filePath string) (err error) { + + if strings.HasSuffix(filePath, ".fsl") { + filePath = filePath[:len(filePath) - len(".fsl")] + entries, err := storage.clients[threadIndex].ListFileNames(filePath, true, true) + if err != nil { + return err + } + + toBeDeleted := false + + for _, entry := range entries { + if entry.FileName != filePath || (!toBeDeleted && entry.Action != "hide" ) { + continue + } + + toBeDeleted = true + + err = storage.clients[threadIndex].DeleteFile(filePath, entry.FileID) + if err != nil { + return err + } + } + + return nil + + } else { + entries, err := storage.clients[threadIndex].ListFileNames(filePath, true, false) + if err != nil { + return err + } + + if len(entries) == 0 { + return nil + } + return storage.clients[threadIndex].DeleteFile(filePath, entries[0].FileID) + } +} + +// MoveFile renames the file. +func (storage *B2Storage) MoveFile(threadIndex int, from string, to string) (err error) { + + filePath := "" + + if strings.HasSuffix(from, ".fsl") { + filePath = to + if from != to + ".fsl" { + filePath = "" + } + } else if strings.HasSuffix(to, ".fsl") { + filePath = from + if to != from + ".fsl" { + filePath = "" + } + } + + if filePath == "" { + LOG_FATAL("STORAGE_MOVE", "Moving file '%s' to '%s' is not supported", from, to) + return nil + } + + if filePath == from { + _, err = storage.clients[threadIndex].HideFile(from) + return err + } else { + entries, err := storage.clients[threadIndex].ListFileNames(filePath, true, true) + if err != nil { + return err + } + if len(entries) == 0 || entries[0].FileName != filePath || entries[0].Action != "hide" { + return nil + } + + return storage.clients[threadIndex].DeleteFile(filePath, entries[0].FileID) + } +} + +// CreateDirectory creates a new directory. +func (storage *B2Storage) CreateDirectory(threadIndex int, dir string) (err error) { + return nil +} + +// GetFileInfo returns the information about the file or directory at 'filePath'. +func (storage *B2Storage) GetFileInfo(threadIndex int, filePath string) (exist bool, isDir bool, size int64, err error) { + isFossil := false + if strings.HasSuffix(filePath, ".fsl") { + isFossil = true + filePath = filePath[:len(filePath) - len(".fsl")] + } + + entries, err := storage.clients[threadIndex].ListFileNames(filePath, true, isFossil) + if err != nil { + return false, false, 0, err + } + + if len(entries) == 0 || entries[0].FileName != filePath { + return false, false, 0, nil + } + + if isFossil { + if entries[0].Action == "hide" { + return true, false, entries[0].Size, nil + } else { + return false, false, 0, nil + } + } + return true, false, entries[0].Size, nil +} + +// FindChunk finds the chunk with the specified id. If 'isFossil' is true, it will search for chunk files with +// the suffix '.fsl'. +func (storage *B2Storage) FindChunk(threadIndex int, chunkID string, isFossil bool) (filePath string, exist bool, size int64, err error) { + filePath = "chunks/" + chunkID + if isFossil { + filePath += ".fsl" + } + exist, _, size, err = storage.GetFileInfo(threadIndex, filePath) + return filePath, exist, size, err +} + +// DownloadFile reads the file at 'filePath' into the chunk. +func (storage *B2Storage) DownloadFile(threadIndex int, filePath string, chunk *Chunk) (err error) { + + readCloser, _, err := storage.clients[threadIndex].DownloadFile(filePath) + if err != nil { + return err + } + + defer readCloser.Close() + + _, err = RateLimitedCopy(chunk, readCloser, storage.DownloadRateLimit / len(storage.clients)) + return err +} + +// UploadFile writes 'content' to the file at 'filePath'. +func (storage *B2Storage) UploadFile(threadIndex int, filePath string, content []byte) (err error) { + return storage.clients[threadIndex].UploadFile(filePath, content, storage.UploadRateLimit / len(storage.clients)) +} + +// If a local snapshot cache is needed for the storage to avoid downloading/uploading chunks too often when +// managing snapshots. +func (storage *B2Storage) IsCacheNeeded() (bool) { return true } + +// If the 'MoveFile' method is implemented. +func (storage *B2Storage) IsMoveFileImplemented() (bool) { return true } + +// If the storage can guarantee strong consistency. +func (storage *B2Storage) IsStrongConsistent() (bool) { return true } + +// If the storage supports fast listing of files names. +func (storage *B2Storage) IsFastListing() (bool) { return true } + +// Enable the test mode. +func (storage *B2Storage) EnableTestMode() { + for _, client := range storage.clients { + client.TestMode = true + } +} diff --git a/duplicacy_backupmanager.go b/duplicacy_backupmanager.go new file mode 100644 index 0000000..1f27205 --- /dev/null +++ b/duplicacy_backupmanager.go @@ -0,0 +1,1365 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "encoding/json" + "bytes" + "os" + "io" + "fmt" + "path" + "time" + "sort" + "sync/atomic" + "strings" + "strconv" + "encoding/hex" + "path/filepath" +) + +// BackupManager performs the two major operations, backup and restore, and passes other operations, mostly related to +// snapshot management, to the snapshot manager. + +type BackupManager struct { + snapshotID string // Unique id for each repository + storage Storage // the storage for storing backups + + SnapshotManager *SnapshotManager // the snapshot manager + snapshotCache *FileStorage // for copies of chunks needed by snapshots + + config *Config // contains a number of options +} + + + +// CreateBackupManager creates a backup manager using the specified 'storage'. 'snapshotID' is a unique id to +// identify snapshots created for this repository. 'top' is the top directory of the repository. 'password' is the +// master key which can be nil if encryption is not enabled. +func CreateBackupManager(snapshotID string, storage Storage, top string, password string) *BackupManager { + + config, _, err := DownloadConfig(storage, password) + if err != nil { + LOG_ERROR("STORAGE_CONFIG", "Failed to download the configuration file from the storage: %v", err) + return nil + } + if config == nil { + LOG_ERROR("STORAGE_NOT_CONFIGURED", "The storage has not been initialized") + return nil + } + + snapshotManager := CreateSnapshotManager(config, storage) + + backupManager := &BackupManager { + snapshotID: snapshotID, + storage: storage, + + SnapshotManager: snapshotManager, + + config: config, + } + + if IsDebugging() { + config.Print() + } + + return backupManager +} + +// SetupSnapshotCache creates the snapshot cache, which is merely a local storage under the default .duplicacy +// directory +func (manager *BackupManager) SetupSnapshotCache(top string, storageName string) bool { + + cacheDir := path.Join(top, DUPLICACY_DIRECTORY, "cache", storageName) + + storage, err := CreateFileStorage(cacheDir, 1) + if err != nil { + LOG_ERROR("BACKUP_CACHE", "Failed to create the snapshot cache dir: %v", err) + return false + } + + for _, subdir := range [] string { "chunks", "snapshots" } { + err := os.Mkdir(path.Join(cacheDir, subdir), 0744) + if err != nil && !os.IsExist(err) { + LOG_ERROR("BACKUP_CACHE", "Failed to create the snapshot cache subdir: %v", err) + return false + } + } + + manager.snapshotCache = storage + manager.SnapshotManager.snapshotCache = storage + return true +} + +func setEntryContent(entries[] *Entry, chunkLengths[]int, offset int) { + if len(entries) == 0 { + return + } + + i := 0 + totalChunkSize := int64(0) + totalFileSize := entries[i].Size + entries[i].StartChunk = 0 + offset + entries[i].StartOffset = 0 + for j, length := range chunkLengths { + + for totalChunkSize + int64(length) >= totalFileSize { + entries[i].EndChunk = j + offset + entries[i].EndOffset = int(totalFileSize - totalChunkSize) + + i++ + if i >= len(entries) { + break + } + + if totalChunkSize + int64(length) == totalFileSize { + entries[i].StartChunk = j + 1 + offset + entries[i].StartOffset = 0 + } else { + entries[i].StartChunk = j + offset + entries[i].StartOffset = int(totalFileSize - totalChunkSize) + } + + totalFileSize += entries[i].Size + } + + totalChunkSize += int64(length) + } +} + +// Backup creates a snapshot for the repository 'top'. If 'quickMode' is true, only files with different sizes +// or timestamps since last backup will be uploaded (however the snapshot is still a full snapshot that shares +// unmodified files with last backup). Otherwise (or if this is the first backup), the entire repository will +// be scanned to create the snapshot. 'tag' is the tag assigned to the new snapshot. +func (manager *BackupManager) Backup(top string, quickMode bool, threads int, tag string, + showStatistics bool, shadowCopy bool) bool { + + var err error + top, err = filepath.Abs(top) + if err != nil { + LOG_ERROR("REPOSITORY_ERR", "Failed to obtain the absolute path of the repository: %v", err) + return false + } + + startTime := time.Now().Unix() + + LOG_DEBUG("BACKUP_PARAMETERS", "top: %s, quick: %t, tag: %s", top, quickMode, tag) + + remoteSnapshot := manager.SnapshotManager.downloadLatestSnapshot(manager.snapshotID) + if remoteSnapshot == nil { + quickMode = false + remoteSnapshot = CreateEmptySnapshot(manager.snapshotID) + LOG_INFO("BACKUP_START", "No previous backup found") + } else { + LOG_INFO("BACKUP_START", "Last backup at revision %d found", remoteSnapshot.Revision) + } + + shadowTop := CreateShadowCopy(top, shadowCopy) + defer DeleteShadowCopy() + + LOG_INFO("BACKUP_INDEXING", "Indexing %s", top) + localSnapshot, skippedDirectories, skippedFiles, err := CreateSnapshotFromDirectory(manager.snapshotID, shadowTop) + if err != nil { + LOG_ERROR("SNAPSHOT_LIST", "Failed to list the directory %s: %v", top, err) + return false + } + + // This cache contains all chunks referenced by last snasphot. Any other chunks will lead to a call to + // UploadChunk. + chunkCache := make(map[string]bool) + + if remoteSnapshot.Revision > 0 { + // Add all chunks in the last snapshot to the + for _, chunkID := range manager.SnapshotManager.GetSnapshotChunks(remoteSnapshot) { + chunkCache[chunkID] = true + } + } else if manager.storage.IsFastListing() { + // If the listing operation is fast, list all chunks and put them in the cache. + LOG_INFO("BACKUP_LIST", "Listing all chunks") + allChunks, _ := manager.SnapshotManager.ListAllFiles(manager.storage, "chunks/") + + for _, chunk := range allChunks { + if len(chunk) == 0 || chunk[len(chunk) - 1] == '/' { + continue + } + + if strings.HasSuffix(chunk, ".fsl") { + continue + } + + chunk = strings.Replace(chunk, "/", "", -1) + chunkCache[chunk] = true + } + } + + var numberOfNewFileChunks int // number of new file chunks + var totalUploadedFileChunkLength int64 // total length of uploaded file chunks + var totalUploadedFileChunkBytes int64 // how many actual bytes have been uploaded + + var numberOfNewSnapshotChunks int // number of new snapshot chunks + var totalUploadedSnapshotChunkLength int64 // size of uploaded snapshot chunks + var totalUploadedSnapshotChunkBytes int64 // how many actual bytes have been uploaded + + localSnapshot.Revision = remoteSnapshot.Revision + 1 + + var totalModifiedFileSize int64 // total size of modified files + var uploadedModifiedFileSize int64 // portions that have been uploaded (including cache hits) + + var modifiedEntries [] *Entry // Files that has been modified or newly created + var preservedEntries [] *Entry // Files unchanges + + // If the quick mode is enabled, we simply treat all files as if they were new, and break them into chunks. + // Otherwise, we need to find those that are new or recently modified + + if !quickMode { + modifiedEntries = localSnapshot.Files + for _, entry := range modifiedEntries { + totalModifiedFileSize += entry.Size + } + } else { + + var i, j int + for i < len(localSnapshot.Files) { + + local := localSnapshot.Files[i] + + if !local.IsFile() || local.Size == 0 { + i++ + continue + } + + var remote *Entry + if j >= len(remoteSnapshot.Files) { + totalModifiedFileSize += local.Size + modifiedEntries = append(modifiedEntries, local) + i++ + } else if remote = remoteSnapshot.Files[j]; !remote.IsFile() { + j++ + } else if local.Path == remote.Path { + if local.IsSameAs(remote) { + local.Hash = remote.Hash + local.StartChunk = remote.StartChunk + local.StartOffset = remote.StartOffset + local.EndChunk = remote.EndChunk + local.EndOffset = remote.EndOffset + preservedEntries = append(preservedEntries, local) + } else { + totalModifiedFileSize += local.Size + modifiedEntries = append(modifiedEntries, local) + } + i++ + j++ + } else if local.Compare(remote) < 0 { + totalModifiedFileSize += local.Size + modifiedEntries = append(modifiedEntries, local) + i++ + } else { + j++ + } + } + + // Must sort files by their 'StartChunk', so the chunk indices form a monotonically increasing sequence + sort.Sort(ByChunk(preservedEntries)) + } + + var preservedChunkHashes []string + var preservedChunkLengths []int + + // For each preserved file, adjust the indices StartChunk and EndChunk. This is done by finding gaps + // between these indices and subtracting the number of deleted chunks. + last := -1 + deletedChunks := 0 + for _, entry := range preservedEntries { + + if entry.StartChunk > last { + deletedChunks += entry.StartChunk - last - 1 + } + + for i := entry.StartChunk; i <= entry.EndChunk; i++ { + if i == last { + continue + } + preservedChunkHashes = append(preservedChunkHashes, remoteSnapshot.ChunkHashes[i]) + preservedChunkLengths = append(preservedChunkLengths, remoteSnapshot.ChunkLengths[i]) + } + + last = entry.EndChunk + + entry.StartChunk -= deletedChunks + entry.EndChunk -= deletedChunks + } + + var uploadedEntries [] *Entry + var uploadedChunkHashes []string + var uploadedChunkLengths []int + + // the file reader implements the Reader interface. When an EOF is encounter, it opens the next file unless it + // is the last file. + fileReader := CreateFileReader(shadowTop, modifiedEntries) + + startUploadingTime := time.Now().Unix() + + lastUploadingTime := time.Now().Unix() + + keepUploadAlive := int64(1800) + + if os.Getenv("DUPLICACY_UPLOAD_KEEPALIVE") != "" { + value, _ := strconv.Atoi(os.Getenv("DUPLICACY_UPLOAD_KEEPALIVE")) + if value < 10 { + value = 10 + } + LOG_INFO("UPLOAD_KEEPALIVE", "Setting KeepUploadAlive to %d", value) + keepUploadAlive = int64(value) + } + + chunkMaker := CreateChunkMaker(manager.config, false) + chunkUploader := CreateChunkUploader(manager.config, manager.storage, nil, threads, nil) + + if fileReader.CurrentFile != nil { + + LOG_TRACE("PACK_START", "Packing %s", fileReader.CurrentEntry.Path) + + chunkIndex := 0 + if threads < 1 { + threads = 1 + } + if threads > 1 { + LOG_INFO("BACKUP_THREADS", "Use %d uploading threads", threads) + } + + var numberOfCollectedChunks int64 + + completionFunc := func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) { + action := "Skipped" + if skipped { + LOG_DEBUG("CHUNK_CACHE", "Skipped chunk %s in cache", chunk.GetID()) + } else { + if uploadSize > 0 { + numberOfNewFileChunks++ + totalUploadedFileChunkLength += int64(chunkSize) + totalUploadedFileChunkBytes += int64(uploadSize) + action = "Uploaded" + } else { + LOG_DEBUG("CHUNK_EXIST", "Skipped chunk %s in the storage", chunk.GetID()) + } + } + + uploadedModifiedFileSize += int64(chunkSize) + + if IsTracing() || showStatistics { + now := time.Now().Unix() + if now <= startUploadingTime { + now = startUploadingTime + 1 + } + speed := uploadedModifiedFileSize / (now - startUploadingTime) + remainingTime := int64(0) + if speed > 0 { + remainingTime = (totalModifiedFileSize - uploadedModifiedFileSize) / speed + 1 + } + percentage := float32(uploadedModifiedFileSize * 1000 / totalModifiedFileSize) + LOG_INFO("UPLOAD_PROGRESS", "%s chunk %d size %d, %sB/s %s %.1f%%", action, chunkIndex, + chunkSize, PrettySize(speed), PrettyTime(remainingTime), percentage / 10) + } + + atomic.AddInt64(&numberOfCollectedChunks, 1) + manager.config.PutChunk(chunk) + } + chunkUploader.completionFunc = completionFunc + chunkUploader.Start() + + // Break files into chunks + chunkMaker.ForEachChunk( + fileReader.CurrentFile, + func (chunk *Chunk, final bool) { + + hash := chunk.GetHash() + chunkID := chunk.GetID() + chunkSize := chunk.GetLength() + + chunkIndex++ + + _, found := chunkCache[chunkID] + if found { + if time.Now().Unix() - lastUploadingTime > keepUploadAlive { + LOG_INFO("UPLOAD_KEEPALIVE", "Skip chunk cache to keep connection alive") + found = false + } + } + + if found { + completionFunc(chunk, chunkIndex, true, chunkSize, 0) + } else { + lastUploadingTime = time.Now().Unix() + chunkCache[chunkID] = true + + chunkUploader.StartChunk(chunk, chunkIndex) + } + + uploadedChunkHashes = append(uploadedChunkHashes, hash) + uploadedChunkLengths = append(uploadedChunkLengths, chunkSize) + + }, + func (fileSize int64, hash string) (io.Reader, bool) { + // This function is called when a new file is needed + entry := fileReader.CurrentEntry + entry.Hash = hash + if entry.Size != fileSize { + totalModifiedFileSize += fileSize - entry.Size + entry.Size = fileSize + } + uploadedEntries = append(uploadedEntries, entry) + + if !showStatistics || IsTracing() || RunInBackground { + LOG_INFO("PACK_END", "Packed %s (%d)", entry.Path, entry.Size) + } + + fileReader.NextFile() + + if fileReader.CurrentFile != nil { + LOG_TRACE("PACK_START", "Packing %s", fileReader.CurrentEntry.Path) + return fileReader.CurrentFile, true + } + return nil, false + }) + + chunkUploader.Stop() + + // We can't set the offsets in the ForEachChunk loop because in that loop, when switching to a new file, the + // data in the buffer may not have been pushed into chunks; it may happen that new chunks can be created + // aftwards, before reaching the end of the current file. + // + // Therefore, we saved uploaded entries and then do a loop here to set offsets for them. + setEntryContent(uploadedEntries, uploadedChunkLengths, len(preservedChunkHashes)) + } + + if len(preservedChunkHashes) > 0 { + localSnapshot.ChunkHashes = preservedChunkHashes + localSnapshot.ChunkHashes = append(localSnapshot.ChunkHashes, uploadedChunkHashes...) + localSnapshot.ChunkLengths = preservedChunkLengths + localSnapshot.ChunkLengths = append(localSnapshot.ChunkLengths, uploadedChunkLengths...) + } else { + localSnapshot.ChunkHashes = uploadedChunkHashes + localSnapshot.ChunkLengths = uploadedChunkLengths + } + + localSnapshot.EndTime = time.Now().Unix() + + err = manager.SnapshotManager.CheckSnapshot(localSnapshot) + if err != nil { + LOG_ERROR("SNAPSHOT_CHECK", "The snapshot contains an error: %v", err) + return false + } + + localSnapshot.Tag = tag + localSnapshot.Options = "" + if !quickMode { + localSnapshot.Options = "-hash" + } + + if shadowCopy { + if localSnapshot.Options == "" { + localSnapshot.Options = "-vss" + } else { + localSnapshot.Options += " -vss" + } + } + + var preservedFileSize int64 + var uploadedFileSize int64 + var totalFileChunkLength int64 + for _, file := range preservedEntries { + preservedFileSize += file.Size + } + for _, file := range uploadedEntries { + uploadedFileSize += file.Size + } + for _, length := range localSnapshot.ChunkLengths { + totalFileChunkLength += int64(length) + } + + localSnapshot.FileSize = preservedFileSize + uploadedFileSize + localSnapshot.NumberOfFiles = int64(len(preservedEntries) + len(uploadedEntries)) + + totalSnapshotChunkLength, numberOfNewSnapshotChunks, + totalUploadedSnapshotChunkLength, totalUploadedSnapshotChunkBytes := + manager.UploadSnapshot(chunkMaker, chunkUploader, top, localSnapshot, chunkCache) + + if showStatistics && !RunInBackground { + for _, entry := range uploadedEntries { + LOG_INFO("UPLOAD_FILE", "Uploaded %s (%d)", entry.Path, entry.Size) + } + } + + for _, dir := range skippedDirectories { + LOG_WARN("SKIP_DIRECTORY", "Subdirecotry %s cannot be listed", dir) + } + + for _, file := range fileReader.SkippedFiles { + LOG_WARN("SKIP_FILE", "File %s cannot be opened", file) + } + skippedFiles = append(skippedFiles, fileReader.SkippedFiles...) + + manager.SnapshotManager.CleanSnapshotCache(localSnapshot, nil) + LOG_INFO("BACKUP_END", "Backup for %s at revision %d completed", top, localSnapshot.Revision) + + + totalSnapshotChunks := len(localSnapshot.FileSequence) + len(localSnapshot.ChunkSequence) + + len(localSnapshot.LengthSequence) + if showStatistics { + + LOG_INFO("BACKUP_STATS", "Files: %d total, %s bytes; %d new, %s bytes", + len(preservedEntries) + len(uploadedEntries), + PrettyNumber(preservedFileSize + uploadedFileSize), + len(uploadedEntries), PrettyNumber(uploadedFileSize)) + + LOG_INFO("BACKUP_STATS", "File chunks: %d total, %s bytes; %d new, %s bytes, %s bytes uploaded", + len(localSnapshot.ChunkHashes), PrettyNumber(totalFileChunkLength), + numberOfNewFileChunks, PrettyNumber(totalUploadedFileChunkLength), + PrettyNumber(totalUploadedFileChunkBytes)) + + LOG_INFO("BACKUP_STATS", "Metadata chunks: %d total, %s bytes; %d new, %s bytes, %s bytes uploaded", + totalSnapshotChunks, PrettyNumber(totalSnapshotChunkLength), + numberOfNewSnapshotChunks, PrettyNumber(totalUploadedSnapshotChunkLength), + PrettyNumber(totalUploadedSnapshotChunkBytes)) + + LOG_INFO("BACKUP_STATS", "All chunks: %d total, %s bytes; %d new, %s bytes, %s bytes uploaded", + len(localSnapshot.ChunkHashes) + totalSnapshotChunks, + PrettyNumber(totalFileChunkLength + totalSnapshotChunkLength), + numberOfNewFileChunks + numberOfNewSnapshotChunks, + PrettyNumber(totalUploadedFileChunkLength + totalUploadedSnapshotChunkLength), + PrettyNumber(totalUploadedFileChunkBytes + totalUploadedSnapshotChunkBytes)) + + now := time.Now().Unix() + if now == startTime { + now = startTime + 1 + } + LOG_INFO("BACKUP_STATS", "Total running time: %s", PrettyTime(now - startTime)) + } + + skipped := "" + if len(skippedDirectories) > 0 { + if len(skippedDirectories) == 1 { + skipped = "1 directory" + } else { + skipped = fmt.Sprintf("%d directories", len(skippedDirectories)) + } + } + + if len(skippedFiles) > 0 { + if len(skipped) > 0 { + skipped += " and " + } + if len(skippedFiles) == 1 { + skipped += "1 file" + } else { + skipped += fmt.Sprintf("%d files", len(skippedFiles)) + } + } + + if len(skipped) > 0 { + if len(skippedDirectories) + len(skippedFiles) == 1 { + skipped += " was" + } else { + skipped += " were" + } + + skipped += " not included due to access errors" + LOG_WARN("BACKUP_SKIPPED", skipped) + } + + return true +} + +// Restore downloads the specified snapshot, compares it with what's on the repository, and then downloads +// files that are different. 'base' is a directory that contains files at a different revision which can +// serve as a local cache to avoid download chunks available locally. It is perfectly ok for 'base' to be +// the same as 'top'. 'quickMode' will bypass files with unchanged sizes and timestamps. 'deleteMode' will +// remove local files that don't exist in the snapshot. 'patterns' is used to include/exclude certain files. +func (manager *BackupManager) Restore(top string, revision int, inPlace bool, quickMode bool, threads int, overwrite bool, + deleteMode bool, showStatistics bool, patterns [] string) bool { + + startTime := time.Now().Unix() + + LOG_DEBUG("RESTORE_PARAMETERS", "top: %s, revision: %d, in-place: %t, quick: %t, delete: %t", + top, revision, inPlace, quickMode, deleteMode) + + if len(patterns) > 0 { + for _, pattern := range patterns { + LOG_TRACE("RESTORE_PATTERN", "%s", pattern) + } + } + + _, err := os.Stat(top) + if os.IsNotExist(err) { + err = os.Mkdir(top, 0744) + if err != nil{ + LOG_ERROR("RESTORE_MKDIR", "Can't create the directory to be restored: %v", err) + return false + } + } + + err = os.Mkdir(path.Join(top, DUPLICACY_DIRECTORY), 0744) + if err != nil && !os.IsExist(err) { + LOG_ERROR("RESTORE_MKDIR", "Failed to create the preference directory: %v", err) + return false + } + + remoteSnapshot := manager.SnapshotManager.DownloadSnapshot(manager.snapshotID, revision) + manager.SnapshotManager.DownloadSnapshotContents(remoteSnapshot, patterns) + + localSnapshot, _, _, err := CreateSnapshotFromDirectory(manager.snapshotID, top) + if err != nil { + LOG_ERROR("SNAPSHOT_LIST", "Failed to list the repository: %v", err) + return false + } + + LOG_INFO("RESTORE_START", "Restoring %s to revision %d", top, revision) + + var includedFiles [] *Entry + + // Include/exclude some files if needed + if len(patterns) > 0 { + for _, file := range remoteSnapshot.Files { + + if MatchPath(file.Path, patterns) { + includedFiles = append(includedFiles, file) + } else { + LOG_TRACE("RESTORE_EXCLUDE", "Exclude %s", file.Path) + } + } + + remoteSnapshot.Files = includedFiles + } + + // local files that don't exist in the remote snapshot + var extraFiles []string + + // These will store files to be downloaded. + fileEntries := make([]*Entry, 0, len(remoteSnapshot.Files) / 2) + + var totalFileSize int64 + var downloadedFileSize int64 + + i := 0 + for _, entry := range remoteSnapshot.Files { + + // Find local files that don't exist in the remote snapshot + for i < len(localSnapshot.Files) { + local := localSnapshot.Files[i] + compare := entry.Compare(local) + if compare > 0 { + extraFiles = append(extraFiles, local.Path) + i++ + continue + } else { + if compare == 0 { + i++ + } + break + } + } + + fullPath := joinPath(top, entry.Path) + if entry.IsLink() { + stat, err := os.Lstat(fullPath) + if stat != nil { + if stat.Mode() & os.ModeSymlink != 0 { + isRegular, link, err := Readlink(fullPath) + if err == nil && link == entry.Link && !isRegular { + continue + } + } + + os.Remove(fullPath) + } + + err = os.Symlink(entry.Link, fullPath) + if err != nil { + LOG_ERROR("RESTORE_SYMLINK", "Can't create symlink %s: %v", entry.Path, err) + return false + } + LOG_TRACE("DOWNLOAD_DONE", "Symlink %s updated", entry.Path) + } else if entry.IsDir() { + stat, err := os.Stat(fullPath) + + if err == nil && !stat.IsDir() { + LOG_ERROR("RESTORE_NOTDIR", "The path %s is not a directory: %v", fullPath, err) + return false + } + + if os.IsNotExist(err) { + // In the first pass of directories, set the directory to be user readable so we can create new files + // under it. + err = os.MkdirAll(fullPath, 0700) + if err != nil && !os.IsExist(err) { + LOG_ERROR("RESTORE_MKDIR", "%v", err) + return false + } + } + } else { + // We can't download files here since fileEntries needs to be sorted + fileEntries = append(fileEntries, entry) + totalFileSize += entry.Size + } + } + + for i < len(localSnapshot.Files) { + extraFiles = append(extraFiles, localSnapshot.Files[i].Path) + i++ + } + + // Sort entries by their starting chunks in order to linearize the access to the chunk chain. + sort.Sort(ByChunk(fileEntries)) + + chunkDownloader := CreateChunkDownloader(manager.config, manager.storage, nil, showStatistics, threads) + chunkDownloader.AddFiles(remoteSnapshot, fileEntries) + + chunkMaker := CreateChunkMaker(manager.config, true) + + startDownloadingTime := time.Now().Unix() + + var downloadedFiles []*Entry + // Now download files one by one + for _, file := range fileEntries { + + fullPath := joinPath(top, file.Path) + stat, _ := os.Stat(fullPath) + if stat != nil { + if quickMode { + if file.IsSameAsFileInfo(stat) { + LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", file.Path) + file.RestoreMetadata(fullPath, &stat) + continue + } + } + + if file.Size == 0 && file.IsSameAsFileInfo(stat) { + LOG_TRACE("RESTORE_SKIP", "File %s unchanged (size 0)", file.Path) + continue + } + } else { + err = os.MkdirAll(path.Dir(fullPath), 0744) + if err != nil { + LOG_ERROR("DOWNLOAD_MKDIR", "Failed to create directory: %v", err) + } + } + + // Handle zero size files. + if file.Size == 0 { + newFile, err := os.OpenFile(fullPath, os.O_WRONLY | os.O_CREATE | os.O_TRUNC, file.GetPermissions()) + if err != nil { + LOG_ERROR("DOWNLOAD_OPEN", "Failed to create empty file: %v", err) + return false + } + newFile.Close() + + file.RestoreMetadata(fullPath, nil) + if !showStatistics { + LOG_INFO("DOWNLOAD_DONE", "Downloaded %s (0)", file.Path) + } + + continue + } + + if manager.RestoreFile(chunkDownloader, chunkMaker, file, top, inPlace, overwrite, showStatistics, + totalFileSize, downloadedFileSize, startDownloadingTime) { + downloadedFileSize += file.Size + downloadedFiles = append(downloadedFiles, file) + } + + file.RestoreMetadata(fullPath, nil) + } + + + if deleteMode && len(patterns) == 0 { + for _, file := range extraFiles { + fullPath := joinPath(top, file) + os.Remove(fullPath) + LOG_INFO("RESTORE_DELETE", "Deleted %s", file) + } + } + + for _, entry := range remoteSnapshot.Files { + if entry.IsDir() && !entry.IsLink() { + dir := joinPath(top, entry.Path) + entry.RestoreMetadata(dir, nil) + } + } + + RemoveEmptyDirectories(top) + + if showStatistics { + for _, file := range downloadedFiles { + LOG_INFO("DOWNLOAD_DONE", "Downloaded %s (%d)", file.Path, file.Size) + } + } + + LOG_INFO("RESTORE_END", "Restored %s to revision %d", top, revision) + if showStatistics { + LOG_INFO("RESTORE_STATS", "Files: %d total, %s bytes", len(fileEntries), PrettySize(totalFileSize)) + LOG_INFO("RESTORE_STATS", "Downloaded %d file, %s bytes, %d chunks", + len(downloadedFiles), PrettySize(downloadedFileSize), chunkDownloader.numberOfDownloadedChunks) + } + + runningTime := time.Now().Unix() - startTime + if runningTime == 0 { + runningTime = 1 + } + + LOG_INFO("RESTORE_STATS", "Total running time: %s", PrettyTime(runningTime)) + + chunkDownloader.Stop() + + return true +} + +// fileEncoder encodes one file at a time to avoid loading the full json description of the entire file tree +// in the memory +type fileEncoder struct { + top string + readAttributes bool + files []*Entry + currentIndex int + buffer *bytes.Buffer +} + +// Read reads data from the embeded buffer +func (encoder fileEncoder) Read(data []byte) (n int, err error) { + return encoder.buffer.Read(data) +} + +// NextFile switchs to the next file and generates its json description in the buffer. It also takes care of +// the ending ']' and the commas between files. +func (encoder *fileEncoder) NextFile() (io.Reader, bool) { + if encoder.currentIndex == len(encoder.files) { + return nil, false + } + if encoder.currentIndex == len(encoder.files) - 1 { + encoder.buffer.Write([]byte("]")) + encoder.currentIndex++ + return encoder, true + } + + encoder.currentIndex++ + entry := encoder.files[encoder.currentIndex] + if encoder.readAttributes { + entry.ReadAttributes(encoder.top) + } + description, err := json.Marshal(entry) + if err != nil { + LOG_FATAL("SNAPSHOT_ENCODE", "Failed to encode file %s: %v", encoder.files[encoder.currentIndex].Path, err) + return nil, false + } + + if encoder.readAttributes { + entry.Attributes = nil + } + + if encoder.currentIndex != 0 { + encoder.buffer.Write([]byte(",")) + } + encoder.buffer.Write(description) + return encoder, true +} + +// UploadSnapshot uploads the specified snapshot to the storage. It turns Files, ChunkHashes, and ChunkLengths into +// sequences of chunks, and uploads these chunks, and finally the snapshot file. +func (manager *BackupManager) UploadSnapshot(chunkMaker *ChunkMaker, uploader *ChunkUploader, top string, snapshot *Snapshot, + chunkCache map[string]bool, ) ( totalSnapshotChunkSize int64, + numberOfNewSnapshotChunks int, totalUploadedSnapshotChunkSize int64, + totalUploadedSnapshotChunkBytes int64) { + + uploader.snapshotCache = manager.snapshotCache + + completionFunc := func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) { + if skipped { + LOG_DEBUG("CHUNK_CACHE", "Skipped snapshot chunk %s in cache", chunk.GetID()) + } else { + if uploadSize > 0 { + numberOfNewSnapshotChunks++ + totalUploadedSnapshotChunkSize += int64(chunkSize) + totalUploadedSnapshotChunkBytes += int64(uploadSize) + } else { + LOG_DEBUG("CHUNK_EXIST", "Skipped snpashot chunk %s in the storage", chunk.GetID()) + } + } + + manager.config.PutChunk(chunk) + } + + uploader.completionFunc = completionFunc + uploader.Start() + + // uploadSequenceFunc uploads chunks read from 'reader'. + uploadSequenceFunc := func(reader io.Reader, + nextReader func(size int64, hash string)(io.Reader, bool)) (sequence[]string) { + + chunkMaker.ForEachChunk(reader, + func (chunk *Chunk, final bool) { + totalSnapshotChunkSize += int64(chunk.GetLength()) + chunkID := chunk.GetID() + if _, found := chunkCache[chunkID]; found { + completionFunc(chunk, 0, true, chunk.GetLength(), 0) + } else { + uploader.StartChunk(chunk, len(sequence)) + } + sequence = append(sequence, chunk.GetHash()) + }, + nextReader) + + return sequence + } + + sequences := []string { "chunks", "lengths" } + // The file list is assumed not to be too large when fixed-size chunking is used + if chunkMaker.minimumChunkSize == chunkMaker.maximumChunkSize { + sequences = append(sequences, "files") + } + + // Chunk and length sequences can be encoded and loaded into memory directly + for _, sequenceType := range sequences { + contents, err := snapshot.MarshalSequence(sequenceType) + + if err != nil { + LOG_ERROR("SNAPSHOT_MARSHAL", "Failed to encode the %s in the snapshot %s: %v", + sequenceType, manager.snapshotID, err) + return int64(0), 0, int64(0), int64(0) + } + + sequence := uploadSequenceFunc(bytes.NewReader(contents), + func (fileSize int64, hash string) (io.Reader, bool) { + return nil, false + }) + snapshot.SetSequence(sequenceType, sequence) + } + + // File sequence may be too big to fit into the memory. So we encode files one by one and take advantages of + // the multi-reader capability of the chunk maker. + if chunkMaker.minimumChunkSize != chunkMaker.maximumChunkSize { + encoder := fileEncoder { + top: top, + readAttributes: snapshot.discardAttributes, + files: snapshot.Files, + currentIndex: -1, + buffer: new(bytes.Buffer), + } + + encoder.buffer.Write([]byte("[")) + sequence := uploadSequenceFunc(encoder, + func (fileSize int64, hash string) (io.Reader, bool) { + return encoder.NextFile() + }) + snapshot.SetSequence("files", sequence) + } + + uploader.Stop() + + description, err := snapshot.MarshalJSON() + if err != nil { + LOG_ERROR("SNAPSHOT_MARSHAL", "Failed to encode the snapshot %s: %v", manager.snapshotID, err) + return int64(0), 0, int64(0), int64(0) + } + + path := fmt.Sprintf("snapshots/%s/%d", manager.snapshotID, snapshot.Revision) + manager.SnapshotManager.UploadFile(path, path, description) + + return totalSnapshotChunkSize, numberOfNewSnapshotChunks, totalUploadedSnapshotChunkSize, totalUploadedSnapshotChunkBytes +} + +// Restore downloads a file from the storage. If 'inPlace' is false, the download file is saved first to a temporary +// file under the .duplicacy directory and then replaces the existing one. Otherwise, the exising file will be +// overwritten directly. +func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chunkMaker *ChunkMaker, entry *Entry, top string, inPlace bool, overwrite bool, + showStatistics bool, totalFileSize int64, downloadedFileSize int64, startTime int64) bool { + + LOG_TRACE("DOWNLOAD_START", "Downloading %s", entry.Path) + + var existingFile, newFile *os.File + var err error + + temporaryPath := path.Join(top, DUPLICACY_DIRECTORY, "temporary") + fullPath := joinPath(top, entry.Path) + + defer func() { + if existingFile != nil { + existingFile.Close() + } + if newFile != nil { + newFile.Close() + } + + if temporaryPath != fullPath { + os.Remove(temporaryPath) + } + } () + + // These are used to break the existing file into chunks. + var existingChunks [] string + var existingLengths [] int + + // These are to enable fast lookup of what chunks are available in the existing file. + offsetMap := make(map[string] int64) + lengthMap := make(map[string] int) + var offset int64 + + existingFile, err = os.Open(fullPath) + if err != nil && !os.IsNotExist(err) { + LOG_TRACE("DOWNLOAD_OPEN", "Can't open the existing file: %v", err) + } + + fileHash := "" + if existingFile != nil { + // Break existing file into chunks. + chunkMaker.ForEachChunk( + existingFile, + func (chunk *Chunk, final bool) { + hash := chunk.GetHash() + chunkSize := chunk.GetLength() + existingChunks = append(existingChunks, hash) + existingLengths = append(existingLengths, chunkSize) + offsetMap[hash] = offset + lengthMap[hash] = chunkSize + offset += int64(chunkSize) + }, + func (fileSize int64, hash string) (io.Reader, bool) { + fileHash = hash + return nil, false + }) + if fileHash == entry.Hash { + LOG_TRACE("DOWNLOAD_SKIP", "File %s unchanged (by hash)", entry.Path) + return false + } + + if !overwrite { + LOG_ERROR("DOWNLOAD_OVERWRITE", + "File %s already exists. Please specify the -overwrite option to continue", entry.Path) + return false + } + } + + if inPlace { + if existingFile == nil { + inPlace = false + } + } + + for i := entry.StartChunk; i <= entry.EndChunk; i++ { + if _, found := offsetMap[chunkDownloader.taskList[i].chunkHash]; !found { + chunkDownloader.taskList[i].needed = true + } + } + + chunkDownloader.Prefetch(entry) + + if inPlace { + + LOG_TRACE("DOWNLOAD_INPLACE", "Updating %s in place", fullPath) + + existingFile.Close() + existingFile, err = os.OpenFile(fullPath, os.O_RDWR, 0) + if err != nil { + LOG_ERROR("DOWNLOAD_OPEN", "Failed to open the file %s for in-place writing", fullPath) + return false + } + + existingFile.Seek(0, 0) + + j := 0 + offset := int64(0) + existingOffset := int64(0) + hasher := manager.config.NewFileHasher() + + for i := entry.StartChunk; i <= entry.EndChunk; i++ { + + for existingOffset < offset && j < len(existingChunks) { + existingOffset += int64(existingLengths[j]) + j++ + } + + hash := chunkDownloader.taskList[i].chunkHash + + start := 0 + if i == entry.StartChunk { + start = entry.StartOffset + } + end := chunkDownloader.taskList[i].chunkLength + if i == entry.EndChunk { + end = entry.EndOffset + } + + _, err = existingFile.Seek(offset, 0) + if err != nil { + LOG_ERROR("DOWNLOAD_SEEK", "Failed to set the offset to %d for file %s: %v", offset, fullPath, err) + return false + } + + // Check if the chunk is available in the existing file + if existingOffset == offset && start == 0 && j < len(existingChunks) && + end == existingLengths[j] && existingChunks[j] == hash { + // Identical chunk found. Run it through the hasher in order to compute the file hash. + _, err := io.CopyN(hasher, existingFile, int64(existingLengths[j])) + if err != nil { + LOG_ERROR("DOWNLOAD_READ", "Failed to read the existing chunk %s: %v", hash, err) + return false + } + if IsDebugging() { + LOG_DEBUG("DOWNLOAD_UNCHANGED", "Chunk %s is unchanged", manager.config.GetChunkIDFromHash(hash)) + } + } else { + chunk := chunkDownloader.WaitForChunk(i) + _, err = existingFile.Write(chunk.GetBytes()[start: end]) + if err != nil { + LOG_ERROR("DOWNLOAD_WRITE", "Failed to write to the file: %v", err) + return false + } + hasher.Write(chunk.GetBytes()[start: end]) + } + + offset += int64(end - start) + } + + // Must truncate the file if the new size is smaller + if err = existingFile.Truncate(offset); err != nil { + LOG_ERROR("DOWNLOAD_TRUNCATE", "Failed to truncate the file at %d: %v", offset, err) + return false + } + + // Verify the download by hash + hash := hex.EncodeToString(hasher.Sum(nil)) + if hash != entry.Hash { + LOG_ERROR("DOWNLOAD_HASH", "File %s has a mismatched hash: %s instead of %s (in-place)", + fullPath, "", entry.Hash) + return false + } + + } else { + + // Create the temporary file. + newFile, err = os.OpenFile(temporaryPath, os.O_WRONLY | os.O_CREATE | os.O_TRUNC, 0600) + if err != nil { + LOG_ERROR("DOWNLOAD_OPEN", "Failed to open file for writing: %v", err) + return false + } + + hasher := manager.config.NewFileHasher() + + var localChunk *Chunk + defer chunkDownloader.config.PutChunk(localChunk) + + var offset int64 + for i := entry.StartChunk; i <= entry.EndChunk; i++ { + + hasLocalCopy := false + var data []byte + + hash := chunkDownloader.taskList[i].chunkHash + if existingFile != nil { + if offset, ok := offsetMap[hash]; ok { + // Retrieve the chunk from the existing file. + length := lengthMap[hash] + existingFile.Seek(offset, 0) + if localChunk == nil { + localChunk = chunkDownloader.config.GetChunk() + } + localChunk.Reset(true) + _, err = io.CopyN(localChunk, existingFile, int64(length)) + if err == nil { + hasLocalCopy = true + data = localChunk.GetBytes() + if IsDebugging() { + LOG_DEBUG("DOWNLOAD_LOCAL_COPY", "Local copy for chunk %s is available", + manager.config.GetChunkIDFromHash(hash)) + } + } + } + } + + if !hasLocalCopy { + chunk := chunkDownloader.WaitForChunk(i) + // If the chunk was downloaded from the storage, we may still need a portion of it. + start := 0 + if i == entry.StartChunk { + start = entry.StartOffset + } + end := chunk.GetLength() + if i == entry.EndChunk { + end = entry.EndOffset + } + data = chunk.GetBytes()[start: end] + } + + _, err = newFile.Write(data) + if err != nil { + LOG_ERROR("DOWNLOAD_WRITE", "Failed to write file: %v", err) + return false + } + + hasher.Write(data) + offset += int64(len(data)) + } + + hash := hex.EncodeToString(hasher.Sum(nil)) + if hash != entry.Hash { + LOG_ERROR("DOWNLOAD_HASH", "File %s has a mismatched hash: %s instead of %s", + entry.Path, hash, entry.Hash) + return false + } + + + if existingFile != nil { + existingFile.Close() + existingFile = nil + } + + newFile.Close() + newFile = nil + + err = os.Remove(fullPath) + if err != nil && !os.IsNotExist(err) { + LOG_ERROR("DOWNLOAD_REMOVE", "Failed to remove the old file: %v", err) + return false + } + + err = os.Rename(temporaryPath, fullPath) + if err != nil { + LOG_ERROR("DOWNLOAD_RENAME", "Failed to rename the file %s to %s: %v", temporaryPath, fullPath, err) + return false + } + } + + if !showStatistics { + LOG_INFO("DOWNLOAD_DONE", "Downloaded %s (%d)", entry.Path, entry.Size) + } + return true +} + +// CopySnapshots copies the specified snapshots from one storage to the other. +func (manager *BackupManager) CopySnapshots(otherManager *BackupManager, snapshotID string, + revisionsToBeCopied []int, threads int) bool { + + if !manager.config.IsCompatiableWith(otherManager.config) { + LOG_ERROR("CONFIG_INCOMPATIABLE", "Two storages are not compatiable for the copy operation") + return false + } + + revisionMap := make(map[int]bool) + for _, revision := range revisionsToBeCopied { + revisionMap[revision] = true + } + + var snapshots [] *Snapshot + var snapshotIDs [] string + var err error + if snapshotID == "" { + snapshotIDs, err = manager.SnapshotManager.ListSnapshotIDs() + if err != nil { + LOG_ERROR("COPY_LIST", "Failed to list all snapshot ids: %v", err) + return false + } + } else { + snapshotIDs = [] string { snapshotID } + } + + for _, id := range snapshotIDs { + revisions, err := manager.SnapshotManager.ListSnapshotRevisions(id) + if err != nil { + LOG_ERROR("SNAPSHOT_LIST", "Failed to list all revisions for snapshot %s: %v", id, err) + return false + } + + for _, revision := range revisions { + if len(revisionsToBeCopied) > 0 { + if _, found := revisionMap[revision]; !found { + continue + } + } + + snapshotPath := fmt.Sprintf("snapshots/%s/%d", id, revision) + exist, _, _, err := otherManager.storage.GetFileInfo(0, snapshotPath) + if err != nil { + LOG_ERROR("SNAPSHOT_INFO", "Failed to check if there is a snapshot %s at revision %d: %v", + id, revision, err) + return false + } + + if exist { + LOG_INFO("SNAPSHOT_EXIST", "Snapshot %s at revision %d already exists in the destination storage", + id, revision) + continue + } + + snapshot := manager.SnapshotManager.DownloadSnapshot(id, revision) + snapshots = append(snapshots, snapshot) + } + } + + chunks := make(map[string]bool) + + for _, snapshot := range snapshots { + + LOG_TRACE("SNAPSHOT_COPY", "Copying snapshot %s at revision %d", snapshot.ID, snapshot.Revision) + for _, chunkHash := range snapshot.FileSequence { + chunks[chunkHash] = true + } + + for _, chunkHash := range snapshot.ChunkSequence { + chunks[chunkHash] = true + } + + for _, chunkHash := range snapshot.LengthSequence { + chunks[chunkHash] = true + } + + description := manager.SnapshotManager.DownloadSequence(snapshot.ChunkSequence) + err := snapshot.LoadChunks(description) + if err != nil { + LOG_ERROR("SNAPSHOT_CHUNK", "Failed to load chunks for snapshot %s at revision %d: %v", + snapshot.ID, snapshot.Revision, err) + return false + } + + for _, chunkHash := range snapshot.ChunkHashes { + chunks[chunkHash] = true + } + } + + chunkDownloader := CreateChunkDownloader(manager.config, manager.storage, nil, false, threads) + + chunkUploader := CreateChunkUploader(otherManager.config, otherManager.storage, nil, threads, + func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) { + if skipped { + LOG_INFO("SNAPSHOT_COPY", "Chunk %s (%d/%d) exists in the destination", chunk.GetID(), chunkIndex, len(chunks)) + } else { + LOG_INFO("SNAPSHOT_COPY", "Copied chunk %s (%d/%d)", chunk.GetID(), chunkIndex, len(chunks)) + } + }) + chunkUploader.Start() + + chunkIndex := 0 + for chunkHash, _ := range chunks { + chunkIndex++ + chunkID := manager.config.GetChunkIDFromHash(chunkHash) + newChunkID := otherManager.config.GetChunkIDFromHash(chunkHash) + + LOG_DEBUG("SNAPSHOT_COPY", "Copying chunk %s to %s", chunkID, newChunkID) + + i := chunkDownloader.AddChunk(chunkHash) + chunk := chunkDownloader.WaitForChunk(i) + chunkUploader.StartChunk(chunk, chunkIndex) + } + + chunkDownloader.Stop() + chunkUploader.Stop() + + for _, snapshot := range snapshots { + otherManager.storage.CreateDirectory(0, fmt.Sprintf("snapshots/%s", manager.snapshotID)) + description, _ := snapshot.MarshalJSON() + path := fmt.Sprintf("snapshots/%s/%d", manager.snapshotID, snapshot.Revision) + otherManager.SnapshotManager.UploadFile(path, path, description) + LOG_INFO("SNAPSHOT_COPY", "Copied snapshot %s at revision %d", snapshot.ID, snapshot.Revision) + } + + return true +} diff --git a/duplicacy_backupmanager_test.go b/duplicacy_backupmanager_test.go new file mode 100644 index 0000000..056d3ec --- /dev/null +++ b/duplicacy_backupmanager_test.go @@ -0,0 +1,273 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "os" + "io" + "path" + "testing" + "math/rand" + "encoding/hex" + "time" + "crypto/sha256" + crypto_rand "crypto/rand" + + "runtime/debug" +) + +func createRandomFile(path string, maxSize int) { + file, err := os.OpenFile(path, os.O_WRONLY | os.O_CREATE | os.O_TRUNC, 0644) + if err != nil { + LOG_ERROR("RANDOM_FILE", "Can't open %s for writing: %v", path, err) + return + } + + defer file.Close() + + size := maxSize / 2 + rand.Int() % (maxSize / 2) + + buffer := make([]byte, 32 * 1024) + for size > 0 { + bytes := size + if bytes > cap(buffer) { + bytes = cap(buffer) + } + crypto_rand.Read(buffer[:bytes]) + bytes, err = file.Write(buffer[:bytes]) + if err != nil { + LOG_ERROR("RANDOM_FILE", "Failed to write to %s: %v", path, err) + return + } + size -= bytes + } +} + +func modifyFile(path string, portion float32) { + + stat, err := os.Stat(path) + if err != nil { + LOG_ERROR("MODIFY_FILE", "Can't stat the file %s: %v", path, err) + return + } + + modifiedTime := stat.ModTime() + + file, err := os.OpenFile(path, os.O_WRONLY, 0644) + if err != nil { + LOG_ERROR("MODIFY_FILE", "Can't open %s for writing: %v", path, err) + return + } + + defer func() { + if file != nil { + file.Close() + } + } () + + size, err := file.Seek(0, 2) + if err != nil { + LOG_ERROR("MODIFY_FILE", "Can't seek to the end of the file %s: %v", path, err) + return + } + + length := int (float32(size) * portion) + start := rand.Int() % (int(size) - length) + + _, err = file.Seek(int64(start), 0) + if err != nil { + LOG_ERROR("MODIFY_FILE", "Can't seek to the offset %d: %v", start, err) + return + } + + buffer := make([]byte, length) + crypto_rand.Read(buffer) + + _, err = file.Write(buffer) + if err != nil { + LOG_ERROR("MODIFY_FILE", "Failed to write to %s: %v", path, err) + return + } + + file.Close() + file = nil + + // Add 2 seconds to the modified time for the changes to be detectable in quick mode. + modifiedTime = modifiedTime.Add(time.Second * 2) + err = os.Chtimes(path, modifiedTime, modifiedTime) + + if err != nil { + LOG_ERROR("MODIFY_FILE", "Failed to change the modification time of %s: %v", path, err) + return + } +} + +func truncateFile(path string) { + file, err := os.OpenFile(path, os.O_WRONLY, 0644) + if err != nil { + LOG_ERROR("TRUNCATE_FILE", "Can't open %s for writing: %v", path, err) + return + } + + defer file.Close() + + oldSize, err := file.Seek(0, 2) + if err != nil { + LOG_ERROR("TRUNCATE_FILE", "Can't seek to the end of the file %s: %v", path, err) + return + } + + newSize := rand.Int63() % oldSize + + err = file.Truncate(newSize) + if err != nil { + LOG_ERROR("TRUNCATE_FILE", "Can't truncate the file %s to size %d: %v", path, newSize, err) + return + } +} + +func getFileHash(path string) (hash string) { + + file, err := os.Open(path) + if err != nil { + LOG_ERROR("FILE_HASH", "Can't open %s for reading: %v", path, err) + return "" + } + + defer file.Close() + + hasher := sha256.New() + _, err = io.Copy(hasher, file) + if err != nil { + LOG_ERROR("FILE_HASH", "Can't read file %s: %v", path, err) + return "" + } + + return hex.EncodeToString(hasher.Sum(nil)) +} + +func TestBackupManager(t *testing.T) { + + rand.Seed(time.Now().UnixNano()) + setTestingT(t) + SetLoggingLevel(INFO) + + defer func() { + if r := recover(); r != nil { + switch e := r.(type) { + case Exception: + t.Errorf("%s %s", e.LogID, e.Message) + debug.PrintStack() + default: + t.Errorf("%v", e) + debug.PrintStack() + } + } + } () + + testDir := path.Join(os.TempDir(), "duplicacy_test") + os.RemoveAll(testDir) + os.MkdirAll(testDir, 0700) + + os.Mkdir(testDir + "/repository1", 0700) + os.Mkdir(testDir + "/repository1/dir1", 0700) + + maxFileSize := 1000000 + //maxFileSize := 200000 + + createRandomFile(testDir + "/repository1/file1", maxFileSize) + createRandomFile(testDir + "/repository1/file2", maxFileSize) + createRandomFile(testDir + "/repository1/dir1/file3", maxFileSize) + + threads := 1 + + storage, err := loadStorage(testDir + "/storage", threads) + if err != nil { + t.Errorf("Failed to create storage: %v", err) + return + } + + delay := 0 + if _, ok := storage.(*ACDStorage); ok { + delay = 1 + } + if _, ok := storage.(*OneDriveStorage); ok { + delay = 5 + } + + password := "duplicacy" + + cleanStorage(storage) + + time.Sleep(time.Duration(delay) * time.Second) + if testFixedChunkSize { + if !ConfigStorage(storage, 100, 64 * 1024, 64 * 1024, 64 * 1024, password, nil) { + t.Errorf("Failed to initialize the storage") + } + } else { + if !ConfigStorage(storage, 100, 64 * 1024, 256 * 1024, 16 * 1024, password, nil) { + t.Errorf("Failed to initialize the storage") + } + } + + + time.Sleep(time.Duration(delay) * time.Second) + + backupManager := CreateBackupManager("host1", storage, testDir, password) + backupManager.SetupSnapshotCache(testDir + "/repository1", "default") + + backupManager.Backup(testDir + "/repository1", /*quickMode=*/true, threads, "first", false, false) + time.Sleep(time.Duration(delay) * time.Second) + backupManager.Restore(testDir + "/repository2", threads, /*inPlace=*/false, /*quickMode=*/false, threads, /*overwrite=*/true, + /*deleteMode=*/false, /*showStatistics=*/false, /*patterns=*/nil) + + for _, f := range []string{ "file1", "file2", "dir1/file3" } { + if _, err := os.Stat(testDir + "/repository2/" + f); os.IsNotExist(err) { + t.Errorf("File %s does not exist", f) + continue + } + + hash1 := getFileHash(testDir + "/repository1/" + f) + hash2 := getFileHash(testDir + "/repository2/" + f) + if hash1 != hash2 { + t.Errorf("File %s has different hashes: %s vs %s", f, hash1, hash2) + } + } + + modifyFile(testDir + "/repository1/file1", 0.1) + modifyFile(testDir + "/repository1/file2", 0.2) + modifyFile(testDir + "/repository1/dir1/file3", 0.3) + + backupManager.Backup(testDir + "/repository1", /*quickMode=*/true, threads, "second", false, false) + time.Sleep(time.Duration(delay) * time.Second) + backupManager.Restore(testDir + "/repository2", 2, /*inPlace=*/true, /*quickMode=*/true, threads, /*overwrite=*/true, + /*deleteMode=*/false, /*showStatistics=*/false, /*patterns=*/nil) + + for _, f := range []string{ "file1", "file2", "dir1/file3" } { + hash1 := getFileHash(testDir + "/repository1/" + f) + hash2 := getFileHash(testDir + "/repository2/" + f) + if hash1 != hash2 { + t.Errorf("File %s has different hashes: %s vs %s", f, hash1, hash2) + } + } + + truncateFile(testDir + "/repository1/file2") + backupManager.Backup(testDir + "/repository1", /*quickMode=*/false, threads, "third", false, false) + time.Sleep(time.Duration(delay) * time.Second) + backupManager.Restore(testDir + "/repository2", 3, /*inPlace=*/true, /*quickMode=*/false, threads, /*overwrite=*/true, + /*deleteMode=*/false, /*showStatistics=*/false, /*patterns=*/nil) + + for _, f := range []string{ "file1", "file2", "dir1/file3" } { + hash1 := getFileHash(testDir + "/repository1/" + f) + hash2 := getFileHash(testDir + "/repository2/" + f) + if hash1 != hash2 { + t.Errorf("File %s has different hashes: %s vs %s", f, hash1, hash2) + } + } + + /*buf := make([]byte, 1<<16) + runtime.Stack(buf, true) + fmt.Printf("%s", buf)*/ +} diff --git a/duplicacy_chunk.go b/duplicacy_chunk.go new file mode 100644 index 0000000..5b1644f --- /dev/null +++ b/duplicacy_chunk.go @@ -0,0 +1,370 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "io" + "fmt" + "hash" + "bytes" + "runtime" + "crypto/cipher" + "crypto/aes" + "crypto/rand" + "encoding/hex" + "compress/zlib" + + "github.com/bkaradzic/go-lz4" + +) + +// A chunk needs to acquire a new buffer and return the old one for every encrypt/decrypt operation, therefore +// we maintain a pool of previously used buffers. +var chunkBufferPool chan *bytes.Buffer = make(chan *bytes.Buffer, runtime.NumCPU() * 16) + +func AllocateChunkBuffer() (buffer *bytes.Buffer) { + select { + case buffer = <- chunkBufferPool: + default: + buffer = new(bytes.Buffer) + } + return buffer +} + +func ReleaseChunkBuffer(buffer *bytes.Buffer) { + select { + case chunkBufferPool <- buffer: + default: + LOG_INFO("CHUNK_BUFFER", "Discarding a free chunk buffer due to a full pool") + } +} + +// Chunk is the object being passed between the chunk maker, the chunk uploader, and chunk downloader. It can be +// read and written like a bytes.Buffer, and provides convenient functions to calculate the hash and id of the chunk. +type Chunk struct { + buffer *bytes.Buffer // Where the actual data is stored. It may be nil for hash-only chunks, where chunks + // are only used to compute the hashes + + size int // The size of data stored. This field is needed if buffer is nil + + hasher hash.Hash // Keeps track of the hash of data stored in the buffer. It may be nil, since sometimes + // it isn't necessary to compute the hash, for instance, when the encrypted data is being + // read into the primary buffer + + hash []byte // The hash of the chunk data. It is always in the binary format + id string // The id of the chunk data (used as the file name for saving the chunk); always in hex format + + config *Config // Every chunk is associated with a Config object. Which hashing algorithm to use is determined + // by the config +} + +// Magic word to identify a duplicacy format encrypted file, plus a version number. +var ENCRYPTION_HEADER = "duplicacy\000" + +// CreateChunk creates a new chunk. +func CreateChunk(config *Config, bufferNeeded bool) *Chunk { + + var buffer *bytes.Buffer + + if bufferNeeded { + buffer = AllocateChunkBuffer() + buffer.Reset() + if buffer.Cap() < config.MaximumChunkSize { + buffer.Grow(config.MaximumChunkSize - buffer.Cap()) + } + } + + return &Chunk { + buffer : buffer, + config : config, + } +} + +// GetLength returns the length of available data +func (chunk *Chunk) GetLength() int { + if chunk.buffer != nil { + return len(chunk.buffer.Bytes()) + } else { + return chunk.size + } +} + +// GetBytes returns data available in this chunk +func (chunk *Chunk) GetBytes() [] byte { + return chunk.buffer.Bytes() +} + +// Reset makes the chunk reusable by clearing the existing data in the buffers. 'hashNeeded' indicates whether the +// hash of the new data to be read is needed. If the data to be read in is encrypted, there is no need to +// calculate the hash so hashNeeded should be 'false'. +func (chunk *Chunk) Reset(hashNeeded bool) { + if chunk.buffer != nil { + chunk.buffer.Reset() + } + if hashNeeded { + chunk.hasher = chunk.config.NewKeyedHasher(chunk.config.HashKey) + } else { + chunk.hasher = nil + } + chunk.hash = nil + chunk.id = "" + chunk.size = 0 +} + +// Write implements the Writer interface. +func (chunk *Chunk) Write(p []byte) (int, error){ + + // buffer may be nil, when the chunk is used for computing the hash only. + if chunk.buffer == nil { + chunk.size += len(p) + } else { + chunk.buffer.Write(p) + } + + // hasher may be nil, when the chunk is used to stored encrypted content + if chunk.hasher != nil { + chunk.hasher.Write(p) + } + return len(p), nil +} + +// GetHash returns the chunk hash. +func (chunk *Chunk) GetHash() string { + if (len(chunk.hash) == 0) { + chunk.hash = chunk.hasher.Sum(nil) + } + + return string(chunk.hash) +} + +// GetID returns the chunk id. +func (chunk *Chunk) GetID() string { + if len(chunk.id) == 0 { + if len(chunk.hash) == 0 { + chunk.hash = chunk.hasher.Sum(nil) + } + + hasher := chunk.config.NewKeyedHasher(chunk.config.IDKey) + hasher.Write([]byte(chunk.hash)) + chunk.id = hex.EncodeToString(hasher.Sum(nil)) + } + + return chunk.id +} + +// Encrypt encrypts the plain data stored in the chunk buffer. If derivationKey is not nil, the actual +// encryption key will be HMAC-SHA256(encryptionKey, derivationKey). +func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string) (err error) { + + var aesBlock cipher.Block + var gcm cipher.AEAD + var nonce []byte + var offset int + + encryptedBuffer := AllocateChunkBuffer() + encryptedBuffer.Reset() + defer func() { + ReleaseChunkBuffer(encryptedBuffer) + } () + + if len(encryptionKey) > 0 { + + key := encryptionKey + + if len(derivationKey) > 0 { + hasher := chunk.config.NewKeyedHasher([]byte(derivationKey)) + hasher.Write(encryptionKey) + key = hasher.Sum(nil) + } + + aesBlock, err = aes.NewCipher(key) + if err != nil { + return err + } + + gcm, err = cipher.NewGCM(aesBlock) + if err != nil { + return err + } + + // Start with the magic number and the version number. + encryptedBuffer.Write([]byte(ENCRYPTION_HEADER)) + + // Followed by the nonce + nonce = make([]byte, gcm.NonceSize()) + _, err := rand.Read(nonce) + if err != nil { + return err + } + encryptedBuffer.Write(nonce) + offset = encryptedBuffer.Len() + + } + + // offset is either 0 or the length of header + nonce + + if chunk.config.CompressionLevel >= -1 && chunk.config.CompressionLevel <= 9 { + deflater, _ := zlib.NewWriterLevel(encryptedBuffer, chunk.config.CompressionLevel) + deflater.Write(chunk.buffer.Bytes()) + deflater.Close() + } else if chunk.config.CompressionLevel == DEFAULT_COMPRESSION_LEVEL { + encryptedBuffer.Write([]byte("LZ4 ")) + // Make sure we have enough space in encryptedBuffer + availableLength := encryptedBuffer.Cap() - len(encryptedBuffer.Bytes()) + maximumLength := lz4.CompressBound(len(chunk.buffer.Bytes())) + if availableLength < maximumLength { + encryptedBuffer.Grow(maximumLength - availableLength) + } + written, err := lz4.Encode(encryptedBuffer.Bytes()[offset + 4:], chunk.buffer.Bytes()) + if err != nil { + return fmt.Errorf("LZ4 compression error: %v", err) + } + // written is actually encryptedBuffer[offset + 4:], but we need to move the write pointer + // and this seems to be the only way + encryptedBuffer.Write(written) + } else { + return fmt.Errorf("Invalid compression level: %d", chunk.config.CompressionLevel) + } + + if len(encryptionKey) == 0 { + chunk.buffer, encryptedBuffer = encryptedBuffer, chunk.buffer + return nil + } + + // PKCS7 is used. Compressed chunk sizes leaks information about the original chunks so we want the padding sizes + // to be the maximum allowed by PKCS7 + dataLength := encryptedBuffer.Len() - offset + paddingLength := dataLength % 256 + if paddingLength == 0 { + paddingLength = 256 + } + + encryptedBuffer.Write(bytes.Repeat([]byte{byte(paddingLength)}, paddingLength)) + encryptedBuffer.Write(bytes.Repeat([]byte{0}, gcm.Overhead())) + + // The encrypted data will be appended to the duplicacy header and the once. + encryptedBytes := gcm.Seal(encryptedBuffer.Bytes()[:offset], nonce, + encryptedBuffer.Bytes()[offset: offset + dataLength + paddingLength], nil) + + encryptedBuffer.Truncate(len(encryptedBytes)) + + chunk.buffer, encryptedBuffer = encryptedBuffer, chunk.buffer + + return nil + +} + +// Decrypt decrypts the encrypted data stored in the chunk buffer. If derivationKey is not nil, the actual +// encryption key will be HMAC-SHA256(encryptionKey, derivationKey). +func (chunk *Chunk) Decrypt(encryptionKey []byte, derivationKey string) (err error) { + + var offset int + + encryptedBuffer := AllocateChunkBuffer() + encryptedBuffer.Reset() + defer func() { + ReleaseChunkBuffer(encryptedBuffer) + } () + + chunk.buffer, encryptedBuffer = encryptedBuffer, chunk.buffer + + if len(encryptionKey) > 0 { + + key := encryptionKey + + if len(derivationKey) > 0 { + hasher := chunk.config.NewKeyedHasher([]byte(derivationKey)) + hasher.Write(encryptionKey) + key = hasher.Sum(nil) + } + + aesBlock, err := aes.NewCipher(key) + if err != nil { + return err + } + + gcm, err := cipher.NewGCM(aesBlock) + if err != nil { + return err + } + + headerLength := len(ENCRYPTION_HEADER) + offset = headerLength + gcm.NonceSize() + + if len(encryptedBuffer.Bytes()) < offset { + return fmt.Errorf("No enough encrypted data (%d bytes) provided", len(encryptedBuffer.Bytes())) + } + + if string(encryptedBuffer.Bytes()[:headerLength - 1]) != ENCRYPTION_HEADER[:headerLength - 1] { + return fmt.Errorf("The storage doesn't seem to be encrypted") + } + + if encryptedBuffer.Bytes()[headerLength - 1] != 0 { + return fmt.Errorf("Unsupported encryption version %d", encryptedBuffer.Bytes()[headerLength - 1]) + } + + nonce := encryptedBuffer.Bytes()[headerLength: offset] + + decryptedBytes, err := gcm.Open(encryptedBuffer.Bytes()[:offset], nonce, + encryptedBuffer.Bytes()[offset:], nil) + + if err != nil { + return err + } + + paddingLength := int(decryptedBytes[len(decryptedBytes) - 1]) + if paddingLength == 0 { + paddingLength = 256 + } + if len(decryptedBytes) <= paddingLength { + return fmt.Errorf("Incorrect padding length %d out of %d bytes", paddingLength, len(decryptedBytes)) + } + + for i := 0; i < paddingLength; i++ { + padding := decryptedBytes[len(decryptedBytes) - 1 - i] + if padding != byte(paddingLength) { + return fmt.Errorf("Incorrect padding of length %d: %x", paddingLength, + decryptedBytes[len(decryptedBytes) - paddingLength:]) + } + } + + encryptedBuffer.Truncate(len(decryptedBytes) - paddingLength) + } + + encryptedBuffer.Read(encryptedBuffer.Bytes()[:offset]) + + compressed := encryptedBuffer.Bytes() + if len(compressed) > 4 && string(compressed[:4]) == "LZ4 " { + chunk.buffer.Reset() + decompressed, err := lz4.Decode(chunk.buffer.Bytes(), encryptedBuffer.Bytes()[4:]) + if err != nil { + return err + } + + chunk.buffer.Write(decompressed) + chunk.hasher = chunk.config.NewKeyedHasher(chunk.config.HashKey) + chunk.hasher.Write(decompressed) + chunk.hash = nil + return nil + } + inflater, err := zlib.NewReader(encryptedBuffer) + if err != nil { + return err + } + + defer inflater.Close() + + chunk.buffer.Reset() + chunk.hasher = chunk.config.NewKeyedHasher(chunk.config.HashKey) + chunk.hash = nil + + if _, err = io.Copy(chunk, inflater); err != nil { + return err + } + + return nil + +} + diff --git a/duplicacy_chunk_test.go b/duplicacy_chunk_test.go new file mode 100644 index 0000000..ba5d4ce --- /dev/null +++ b/duplicacy_chunk_test.go @@ -0,0 +1,73 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "testing" + "bytes" + crypto_rand "crypto/rand" + "math/rand" +) + +func TestChunk(t *testing.T) { + + key := []byte("duplicacydefault") + + config := CreateConfig() + config.HashKey = key + config.IDKey = key + config.MinimumChunkSize = 100 + config.CompressionLevel = DEFAULT_COMPRESSION_LEVEL + maxSize := 1000000 + + for i := 0; i < 500; i++ { + + size := rand.Int() % maxSize + + plainData := make([]byte, size) + crypto_rand.Read(plainData) + chunk := CreateChunk(config, true) + chunk.Reset(true) + chunk.Write(plainData) + + hash := chunk.GetHash() + id := chunk.GetID() + + err := chunk.Encrypt(key, "") + if err != nil { + t.Errorf("Failed to encrypt the data: %v", err) + continue + } + + encryptedData := make([]byte, chunk.GetLength()) + copy(encryptedData, chunk.GetBytes()) + + chunk.Reset(false) + chunk.Write(encryptedData) + err = chunk.Decrypt(key, "") + if err != nil { + t.Errorf("Failed to decrypt the data: %v", err) + continue + } + + decryptedData := chunk.GetBytes() + + if hash != chunk.GetHash() { + t.Errorf("Original hash: %x, decrypted hash: %x", hash, chunk.GetHash()) + } + + if id != chunk.GetID() { + t.Errorf("Original id: %s, decrypted hash: %s", id, chunk.GetID()) + } + + if bytes.Compare(plainData, decryptedData) != 0 { + t.Logf("orginal length: %d, decrypted length: %d", len(plainData), len(decryptedData)) + t.Errorf("Original data:\n%x\nDecrypted data:\n%x\n", plainData, decryptedData) + } + + + } + +} diff --git a/duplicacy_chunkdownloader.go b/duplicacy_chunkdownloader.go new file mode 100644 index 0000000..689206c --- /dev/null +++ b/duplicacy_chunkdownloader.go @@ -0,0 +1,380 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "sync/atomic" + "time" +) + +// ChunkDownloadTask encapsulates information need to download a chunk. +type ChunkDownloadTask struct { + chunk *Chunk // The chunk that will be downloaded; initially nil + chunkIndex int // The index of this chunk in the chunk list + chunkHash string // The chunk hash + chunkLength int // The length of the chunk; may be zero + needed bool // Whether this chunk can be skipped if a local copy exists + isDownloading bool // 'true' means the chunk has been downloaded or is being downloaded +} + +// ChunkDownloadCompletion represents the nofication when a chunk has been downloaded. +type ChunkDownloadCompletion struct { + chunkIndex int // The index of this chunk in the chunk list + chunk *Chunk // The chunk that has been downloaded +} + +// ChunkDownloader is capable of performing multi-threaded downloading. Chunks to be downloaded are first organized +// as a list of ChunkDownloadTasks, with only the chunkHash field initialized. When a chunk is needed, the +// corresponding ChunkDownloadTask is sent to the dowloading goroutine. Once a chunk is downloaded, it will be +// inserted in the completed task list. +type ChunkDownloader struct { + config *Config // Associated config + storage Storage // Download from this storage + snapshotCache *FileStorage // Used as cache if not nil; usually for downloading snapshot chunks + showStatistics bool // Show a stats log for each chunk if true + threads int // Number of threads + + taskList [] ChunkDownloadTask // The list of chunks to be downloaded + completedTasks map[int]bool // Store downloaded chunks + lastChunkIndex int // a monotonically increasing number indicating the last chunk to be downloaded + + taskQueue chan ChunkDownloadTask // Downloading goroutines are waiting on this channel for input + stopChannel chan bool // Used to stop the dowloading goroutines + completionChannel chan ChunkDownloadCompletion // A downloading goroutine sends back the chunk via this channel after downloading + + startTime int64 // The time it starts downloading + totalFileSize int64 // Total file size + downloadedFileSize int64 // Downloaded file size + numberOfDownloadedChunks int // The number of chunks that have been downloaded + numberOfDownloadingChunks int // The number of chunks still being downloaded + numberOfActiveChunks int // The number of chunks that is being downloaded or has been downloaded but not reclaimed +} + +func CreateChunkDownloader(config *Config, storage Storage, snapshotCache *FileStorage, showStatistics bool, threads int) *ChunkDownloader { + downloader := &ChunkDownloader { + config: config, + storage: storage, + snapshotCache: snapshotCache, + showStatistics: showStatistics, + threads: threads, + + taskList: nil, + completedTasks: make(map[int]bool), + lastChunkIndex: 0, + + taskQueue: make(chan ChunkDownloadTask, threads), + stopChannel: make(chan bool), + completionChannel: make(chan ChunkDownloadCompletion), + + startTime: time.Now().Unix(), + } + + // Start the downloading goroutines + for i := 0; i < downloader.threads; i++ { + go func(threadIndex int) { + defer CatchLogException() + for { + select { + case task := <- downloader.taskQueue: + downloader.Download(threadIndex, task) + case <- downloader.stopChannel: + return + } + } + } (i) + } + + return downloader +} + +// AddFiles adds chunks needed by the specified files to the download list. +func (downloader *ChunkDownloader) AddFiles(snapshot *Snapshot, files [] *Entry) { + + downloader.taskList = nil + lastChunkIndex := -1 + maximumChunks := 0 + downloader.totalFileSize = 0 + for _, file := range files { + if file.Size == 0 { + continue + } + for i := file.StartChunk; i <= file.EndChunk; i++ { + if lastChunkIndex != i { + task := ChunkDownloadTask { + chunkIndex: len(downloader.taskList), + chunkHash: snapshot.ChunkHashes[i], + chunkLength: snapshot.ChunkLengths[i], + needed: false, + } + downloader.taskList = append(downloader.taskList, task) + } else { + downloader.taskList[lastChunkIndex].needed = true + } + lastChunkIndex = i + } + file.StartChunk = len(downloader.taskList) - (file.EndChunk - file.StartChunk) - 1 + file.EndChunk = len(downloader.taskList) - 1 + if file.EndChunk - file.StartChunk > maximumChunks { + maximumChunks = file.EndChunk - file.StartChunk + } + downloader.totalFileSize += file.Size + } +} + +// AddChunk adds a single chunk the download list. +func (downloader *ChunkDownloader) AddChunk(chunkHash string) int { + task := ChunkDownloadTask { + chunkIndex: len(downloader.taskList), + chunkHash: chunkHash, + chunkLength: 0, + needed: true, + isDownloading: false, + } + downloader.taskList = append(downloader.taskList, task) + if downloader.numberOfActiveChunks < downloader.threads { + downloader.taskQueue <- task + downloader.numberOfDownloadingChunks++ + downloader.numberOfActiveChunks++ + downloader.taskList[len(downloader.taskList) - 1].isDownloading = true + } + return len(downloader.taskList) - 1 +} + +// Prefetch adds up to 'threads' chunks needed by a file to the download list +func (downloader *ChunkDownloader) Prefetch(file *Entry) { + + // Any chunks before the first chunk of this filea are not needed any more, so they can be reclaimed. + downloader.Reclaim(file.StartChunk) + + for i := file.StartChunk; i <= file.EndChunk; i++ { + task := &downloader.taskList[i] + if task.needed { + if !task.isDownloading { + if downloader.numberOfActiveChunks >= downloader.threads { + return + } + + LOG_DEBUG("DOWNLOAD_PREFETCH", "Prefetching %s chunk %s", file.Path, + downloader.config.GetChunkIDFromHash(task.chunkHash)) + downloader.taskQueue <- *task + task.isDownloading = true + downloader.numberOfDownloadingChunks++ + downloader.numberOfActiveChunks++ + } + } else{ + LOG_DEBUG("DOWNLOAD_PREFETCH", "%s chunk %s is not needed", file.Path, + downloader.config.GetChunkIDFromHash(task.chunkHash)) + } + } +} + +// Reclaim releases the downloaded chunk to the chunk pool +func (downloader *ChunkDownloader) Reclaim(chunkIndex int) { + + if downloader.lastChunkIndex == chunkIndex { + return + } + + for i := downloader.lastChunkIndex; i < chunkIndex; i++ { + if !downloader.taskList[i].isDownloading { + atomic.AddInt64(&downloader.downloadedFileSize, int64(downloader.taskList[i].chunkLength)) + } + } + + for i, _ := range downloader.completedTasks { + if i < chunkIndex && downloader.taskList[i].chunk != nil { + downloader.config.PutChunk(downloader.taskList[i].chunk) + downloader.taskList[i].chunk = nil + delete(downloader.completedTasks, i) + downloader.numberOfActiveChunks-- + } + } + + downloader.lastChunkIndex = chunkIndex +} + +// WaitForChunk waits until the specified chunk is ready +func (downloader *ChunkDownloader) WaitForChunk(chunkIndex int) (chunk *Chunk) { + + // Reclain any chunk not needed + downloader.Reclaim(chunkIndex) + + // If we haven't started download the specified chunk, download it now + if !downloader.taskList[chunkIndex].isDownloading { + LOG_DEBUG("DOWNLOAD_FETCH", "Fetching chunk %s", + downloader.config.GetChunkIDFromHash(downloader.taskList[chunkIndex].chunkHash)) + downloader.taskQueue <- downloader.taskList[chunkIndex] + downloader.taskList[chunkIndex].isDownloading = true + downloader.numberOfDownloadingChunks++ + downloader.numberOfActiveChunks++ + } + + // We also need to look ahead and prefetch other chunks as many as permitted by the number of threads + for i := chunkIndex + 1; i < len(downloader.taskList); i++ { + if downloader.numberOfActiveChunks >= downloader.threads { + break + } + task := &downloader.taskList[i] + if !task.needed { + break + } + + if !task.isDownloading { + LOG_DEBUG("DOWNLOAD_PREFETCH", "Prefetching chunk %s", downloader.config.GetChunkIDFromHash(task.chunkHash)) + downloader.taskQueue <- *task + task.isDownloading = true + downloader.numberOfDownloadingChunks++ + downloader.numberOfActiveChunks++ + } + } + + // Now wait until the chunk to be downloaded appears in the completed tasks + for _, found := downloader.completedTasks[chunkIndex]; !found; _, found = downloader.completedTasks[chunkIndex] { + completion := <- downloader.completionChannel + downloader.completedTasks[completion.chunkIndex] = true + downloader.taskList[completion.chunkIndex].chunk = completion.chunk + downloader.numberOfDownloadedChunks++ + downloader.numberOfDownloadingChunks-- + } + return downloader.taskList[chunkIndex].chunk +} + +// Stop terminates all downloading goroutines +func (downloader *ChunkDownloader) Stop() { + for downloader.numberOfDownloadingChunks > 0 { + completion := <- downloader.completionChannel + downloader.completedTasks[completion.chunkIndex] = true + downloader.taskList[completion.chunkIndex].chunk = completion.chunk + downloader.numberOfDownloadedChunks++ + downloader.numberOfDownloadingChunks-- + } + + for i, _ := range downloader.completedTasks { + downloader.config.PutChunk(downloader.taskList[i].chunk) + downloader.taskList[i].chunk = nil + downloader.numberOfActiveChunks-- + } + + for i := 0; i < downloader.threads; i++ { + downloader.stopChannel <- true + } +} + +// Download downloads a chunk from the storage. +func (downloader *ChunkDownloader) Download(threadIndex int, task ChunkDownloadTask) bool { + + cachedPath := "" + chunk := downloader.config.GetChunk() + chunkID := downloader.config.GetChunkIDFromHash(task.chunkHash) + + if downloader.snapshotCache != nil && downloader.storage.IsCacheNeeded() { + + var exist bool + var err error + + // Reset the chunk with a hasher -- we're reading from the cache where chunk are not encrypted or compressed + chunk.Reset(true) + + cachedPath, exist, _, err = downloader.snapshotCache.FindChunk(threadIndex, chunkID, false) + if err != nil { + LOG_WARN("DOWNLOAD_CACHE", "Failed to find the cache path for the chunk %s: %v", chunkID, err) + } else if exist { + err = downloader.snapshotCache.DownloadFile(0, cachedPath, chunk) + if err != nil { + LOG_WARN("DOWNLOAD_CACHE", "Failed to load the chunk %s from the snapshot cache: %v", chunkID, err) + } else { + actualChunkID := chunk.GetID() + if actualChunkID != chunkID { + LOG_WARN("DOWNLOAD_CACHE_CORRUPTED", + "The chunk %s load from the snapshot cache has a hash id of %s", chunkID, actualChunkID) + } else { + LOG_DEBUG("CHUNK_CACHE", "Chunk %s has been loaded from the snapshot cache", chunkID) + + downloader.completionChannel <- ChunkDownloadCompletion{ chunk: chunk, chunkIndex:task.chunkIndex } + return false + } + } + } + } + + // Reset the chunk without a hasher -- the downloaded content will be encrypted and/or compressed and the hasher + // will be set up before the encryption + chunk.Reset(false) + + // Find the chunk by ID first. + chunkPath, exist, _, err := downloader.storage.FindChunk(threadIndex, chunkID, false) + if err != nil { + LOG_ERROR("DOWNLOAD_CHUNK", "Failed to find the chunk %s: %v", chunkID, err) + return false + } + + if !exist { + // No chunk is found. Have to find it in the fossil pool again. + chunkPath, exist, _, err = downloader.storage.FindChunk(threadIndex, chunkID, true) + if err != nil { + LOG_ERROR("DOWNLOAD_CHUNK", "Failed to find the chunk %s: %v", chunkID, err) + return false + } + + if !exist { + // A chunk is not found. This is a serious error and hopefully it will never happen. + LOG_FATAL("DOWNLOAD_CHUNK", "Chunk %s can't be found", chunkID) + return false + } + LOG_DEBUG("CHUNK_FOSSIL", "Chunk %s has been marked as a fossil", chunkID) + } + + err = downloader.storage.DownloadFile(threadIndex, chunkPath, chunk) + if err != nil { + LOG_ERROR("UPLOAD_FATAL", "Failed to download the chunk %s: %v", chunkID, err) + return false + } + + + err = chunk.Decrypt(downloader.config.ChunkKey, task.chunkHash) + if err != nil { + LOG_ERROR("UPLOAD_CHUNK", "Failed to decrypt the chunk %s: %v", chunkID, err) + return false + } + + actualChunkID := chunk.GetID() + if actualChunkID != chunkID { + LOG_FATAL("UPLOAD_CORRUPTED", "The chunk %s has a hash id of %s", chunkID, actualChunkID) + return false + } + + if len(cachedPath) > 0 { + // Save a copy to the local snapshot cache + err = downloader.snapshotCache.UploadFile(threadIndex, cachedPath, chunk.GetBytes()) + if err != nil { + LOG_WARN("DOWNLOAD_CACHE", "Failed to add the chunk %s to the snapshot cache: %v", chunkID, err) + } + } + + if downloader.showStatistics || IsTracing() { + + atomic.AddInt64(&downloader.downloadedFileSize, int64(chunk.GetLength())) + downloadFileSize := atomic.LoadInt64(&downloader.downloadedFileSize) + + now := time.Now().Unix() + if now <= downloader.startTime { + now = downloader.startTime + 1 + } + speed := downloadFileSize / (now - downloader.startTime) + remainingTime := int64(0) + if speed > 0 { + remainingTime = (downloader.totalFileSize - downloadFileSize) / speed + 1 + } + percentage := float32(downloadFileSize * 1000 / downloader.totalFileSize) + LOG_INFO("DOWNLOAD_PROGRESS", "Downloaded chunk %d size %d, %sB/s %s %.1f%%", + task.chunkIndex + 1, chunk.GetLength(), + PrettySize(speed), PrettyTime(remainingTime), percentage / 10) + } else { + LOG_DEBUG("CHUNK_DOWNLOAD", "Chunk %s has been downloaded", chunkID) + } + + downloader.completionChannel <- ChunkDownloadCompletion{ chunk: chunk, chunkIndex:task.chunkIndex } + return true +} diff --git a/duplicacy_chunkmaker.go b/duplicacy_chunkmaker.go new file mode 100644 index 0000000..b141cab --- /dev/null +++ b/duplicacy_chunkmaker.go @@ -0,0 +1,295 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "io" + "crypto/sha256" + "encoding/hex" + "encoding/binary" +) + +// ChunkMaker breaks data into chunks using buzhash. To save memory, the chunk maker only use a circular buffer +// whose size is double the minimum chunk size. +type ChunkMaker struct { + maximumChunkSize int + minimumChunkSize int + bufferCapacity int + + hashMask uint64 + randomTable [256]uint64 + + buffer []byte + bufferSize int + bufferStart int + + config *Config + + hashOnly bool + hashOnlyChunk *Chunk +} + +// CreateChunkMaker creates a chunk maker. 'randomSeed' is used to generate the character-to-integer table needed by +// buzhash. +func CreateChunkMaker(config *Config, hashOnly bool) *ChunkMaker { + size := 1 + for size * 2 <= config.AverageChunkSize { + size *= 2 + } + + if size != config.AverageChunkSize { + LOG_FATAL("CHUNK_SIZE", "Invalid average chunk size: %d is not a power of 2", config.AverageChunkSize) + return nil + } + + maker := &ChunkMaker { + hashMask: uint64(config.AverageChunkSize - 1), + maximumChunkSize: config.MaximumChunkSize, + minimumChunkSize: config.MinimumChunkSize, + bufferCapacity: 2 * config.MinimumChunkSize, + config: config, + hashOnly: hashOnly, + } + + if hashOnly { + maker.hashOnlyChunk = CreateChunk(config, false) + } + + randomData := sha256.Sum256(config.ChunkSeed) + + for i := 0; i < 64; i++ { + for j := 0; j < 4; j++ { + maker.randomTable[4 * i + j] = binary.LittleEndian.Uint64(randomData[8 * j : 8 * j + 8]) + } + randomData = sha256.Sum256(randomData[:]) + } + + maker.buffer = make([]byte, 2 * config.MinimumChunkSize) + + return maker +} + +func rotateLeft(value uint64, bits uint) uint64 { + return (value << (bits & 0x3f)) | (value >> (64 - (bits & 0x3f))) +} + +func rotateLeftByOne(value uint64) uint64 { + return (value << 1) | (value >> 63) +} + +func (maker *ChunkMaker) buzhashSum(sum uint64, data [] byte) uint64 { + for i := 0; i < len(data); i++ { + sum = rotateLeftByOne(sum) ^ maker.randomTable[data[i]] + } + return sum +} + +func (maker *ChunkMaker) buzhashUpdate(sum uint64, out byte, in byte, length int) uint64 { + return rotateLeftByOne(sum) ^ rotateLeft(maker.randomTable[out], uint(length)) ^ maker.randomTable[in] +} + +// ForEachChunk reads data from 'reader'. If EOF is encountered, it will call 'nextReader' to ask for next file. If +// 'nextReader' returns false, it will process remaining data in the buffer and then quit. When a chunk is identified, +// it will call 'endOfChunk' to return the chunk size and a boolean flag indicating if it is the last chunk. +func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *Chunk, final bool), + nextReader func(size int64, hash string)(io.Reader, bool)) { + + maker.bufferStart = 0 + maker.bufferSize = 0 + + var minimumReached bool + var hashSum uint64 + var chunk *Chunk + + fileSize := int64(0) + fileHasher := maker.config.NewFileHasher() + + // Start a new chunk. + startNewChunk := func() { + hashSum = 0 + minimumReached = false + if maker.hashOnly { + chunk = maker.hashOnlyChunk + chunk.Reset(true) + } else { + chunk = maker.config.GetChunk() + chunk.Reset(true) + } + } + + // Move data from the buffer to the chunk. + fill := func(count int) { + if maker.bufferStart + count < maker.bufferCapacity { + chunk.Write(maker.buffer[maker.bufferStart : maker.bufferStart + count]) + maker.bufferStart += count + maker.bufferSize -= count + } else { + chunk.Write(maker.buffer[maker.bufferStart :]) + chunk.Write(maker.buffer[: count - (maker.bufferCapacity - maker.bufferStart)]) + maker.bufferStart = count - (maker.bufferCapacity - maker.bufferStart) + maker.bufferSize -= count + } + } + + startNewChunk() + + var err error + + isEOF := false + + if maker.minimumChunkSize == maker.maximumChunkSize { + + if maker.bufferCapacity < maker.minimumChunkSize { + maker.buffer = make([]byte, maker.minimumChunkSize) + } + + for { + startNewChunk() + maker.bufferStart = 0 + for maker.bufferStart < maker.minimumChunkSize && !isEOF { + count, err := reader.Read(maker.buffer[maker.bufferStart : maker.minimumChunkSize]) + + if err != nil { + if err != io.EOF { + LOG_ERROR("CHUNK_MAKER", "Failed to read %d bytes: %s", count, err.Error()) + return + } else { + isEOF = true + } + } + maker.bufferStart += count + } + + fileHasher.Write(maker.buffer[:maker.bufferStart]) + fileSize += int64(maker.bufferStart) + chunk.Write(maker.buffer[:maker.bufferStart]) + + if isEOF { + var ok bool + reader, ok = nextReader(fileSize, hex.EncodeToString(fileHasher.Sum(nil))) + if !ok { + endOfChunk(chunk, true) + return + } else { + endOfChunk(chunk, false) + fileSize = 0 + fileHasher = maker.config.NewFileHasher() + isEOF = false + } + } + } + + } + + for { + + // If the buffer still has some space left and EOF is not seen, read more data. + for maker.bufferSize < maker.bufferCapacity && !isEOF { + start := maker.bufferStart + maker.bufferSize + count := maker.bufferCapacity - start + if start >= maker.bufferCapacity { + start -= maker.bufferCapacity + count = maker.bufferStart - start + } + + count, err = reader.Read(maker.buffer[start : start + count]) + + if err != nil && err != io.EOF { + LOG_ERROR("CHUNK_MAKER", "Failed to read %d bytes: %s", count, err.Error()) + return + } + + maker.bufferSize += count + fileHasher.Write(maker.buffer[start : start + count]) + fileSize += int64(count) + + // if EOF is seen, try to switch to next file and continue + if err == io.EOF { + var ok bool + reader, ok = nextReader(fileSize, hex.EncodeToString(fileHasher.Sum(nil))) + if !ok { + isEOF = true + } else { + fileSize = 0 + fileHasher = maker.config.NewFileHasher() + isEOF = false + } + } + } + + // No eough data to meet the minimum chunk size requirement, so just return as a chunk. + if maker.bufferSize < maker.minimumChunkSize { + fill(maker.bufferSize) + endOfChunk(chunk, true) + return + } + + + // Minimum chunk size has been reached. Calculate the buzhash for the minimum size chunk. + if (!minimumReached) { + + bytes := maker.minimumChunkSize + + if maker.bufferStart + bytes < maker.bufferCapacity { + hashSum = maker.buzhashSum(0, maker.buffer[maker.bufferStart : maker.bufferStart + bytes]) + } else { + hashSum = maker.buzhashSum(0, maker.buffer[maker.bufferStart :]) + hashSum = maker.buzhashSum(hashSum, + maker.buffer[: bytes - (maker.bufferCapacity - maker.bufferStart)]) + } + + if (hashSum & maker.hashMask) == 0 { + // This is a minimum size chunk + fill(bytes) + endOfChunk(chunk, false) + startNewChunk() + continue + } + + minimumReached = true + } + + // Now check the buzhash of the data in the buffer, shifting one byte at a time. + bytes := maker.bufferSize - maker.minimumChunkSize + isEOC := false + maxSize := maker.maximumChunkSize - chunk.GetLength() + for i := 0; i < maker.bufferSize - maker.minimumChunkSize; i++ { + out := maker.bufferStart + i + if out >= maker.bufferCapacity { + out -= maker.bufferCapacity + } + in := maker.bufferStart + i + maker.minimumChunkSize + if in >= maker.bufferCapacity { + in -= maker.bufferCapacity + } + + hashSum = maker.buzhashUpdate(hashSum, maker.buffer[out], maker.buffer[in], maker.minimumChunkSize) + if (hashSum & maker.hashMask) == 0 || i == maxSize - maker.minimumChunkSize - 1 { + // A chunk is completed. + bytes = i + 1 + maker.minimumChunkSize + isEOC = true + break + } + } + + fill(bytes) + + if isEOC { + if isEOF && maker.bufferSize == 0 { + endOfChunk(chunk, true) + return + } + endOfChunk(chunk, false) + startNewChunk() + continue + } + + if isEOF { + fill(maker.bufferSize) + endOfChunk(chunk, true) + return + } + } +} diff --git a/duplicacy_chunkmaker_test.go b/duplicacy_chunkmaker_test.go new file mode 100644 index 0000000..6d0aa88 --- /dev/null +++ b/duplicacy_chunkmaker_test.go @@ -0,0 +1,132 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "testing" + "bytes" + crypto_rand "crypto/rand" + "math/rand" + "io" + "sort" +) + +func splitIntoChunks(content []byte, n, averageChunkSize, maxChunkSize, minChunkSize, + bufferCapacity int) ([]string, int) { + + config := CreateConfig() + + config.CompressionLevel = DEFAULT_COMPRESSION_LEVEL + config.AverageChunkSize = averageChunkSize + config.MaximumChunkSize = maxChunkSize + config.MinimumChunkSize = minChunkSize + config.ChunkSeed = []byte("duplicacy") + + config.HashKey = DEFAULT_KEY + config.IDKey = DEFAULT_KEY + + maker := CreateChunkMaker(config, false) + + var chunks [] string + totalChunkSize := 0 + totalFileSize := int64(0) + + //LOG_INFO("CHUNK_SPLIT", "bufferCapacity: %d", bufferCapacity) + + buffers := make([] *bytes.Buffer, n) + sizes := make([] int, n) + sizes[0] = 0 + for i := 1; i < n; i++ { + same := true + for same { + same = false + sizes[i] = rand.Int() % n + for j := 0; j < i; j++ { + if sizes[i] == sizes[j] { + same = true + break + } + } + } + } + + sort.Sort(sort.IntSlice(sizes)) + + for i := 0; i < n - 1; i++ { + buffers[i] = bytes.NewBuffer(content[sizes[i] : sizes[i + 1]]) + } + buffers[n - 1] = bytes.NewBuffer(content[sizes[n - 1]:]) + + i := 0 + + maker.ForEachChunk(buffers[0], + func (chunk *Chunk, final bool) { + //LOG_INFO("CHUNK_SPLIT", "i: %d, chunk: %s, size: %d", i, chunk.GetHash(), size) + chunks = append(chunks, chunk.GetHash()) + totalChunkSize += chunk.GetLength() + }, + func (size int64, hash string) (io.Reader, bool) { + totalFileSize += size + i++ + if i >= len(buffers) { + return nil, false + } + return buffers[i], true + }) + + if (totalFileSize != int64(totalChunkSize)) { + LOG_ERROR("CHUNK_SPLIT", "total chunk size: %d, total file size: %d", totalChunkSize, totalFileSize) + } + return chunks, totalChunkSize +} + +func TestChunkMaker(t *testing.T) { + + + //sizes := [...] int { 64 } + sizes := [...] int { 64, 256, 1024, 1024 * 10 } + + for _, size := range sizes { + + content := make([]byte, size) + _, err := crypto_rand.Read(content) + if err != nil { + t.Errorf("Error generating random content: %v", err) + continue + } + + chunkArray1, totalSize1 := splitIntoChunks(content, 10, 32, 64, 16, 32) + + capacities := [...]int { 32, 33, 34, 61, 62, 63, 64, 65, 66, 126, 127, 128, 129, 130, + 255, 256, 257, 511, 512, 513, 1023, 1024, 1025, + 32, 48, 64, 128, 256, 512, 1024, 2048, } + + //capacities := [...]int { 32 } + + for _, capacity := range capacities { + + for _, n := range [...]int { 6, 7, 8, 9, 10 } { + chunkArray2, totalSize2 := splitIntoChunks(content, n, 32, 64, 16, capacity) + + if totalSize1 != totalSize2 { + t.Errorf("[size %d, capacity %d] total size is %d instead of %d", + size, capacity, totalSize2, totalSize1) + } + + if len(chunkArray1) != len(chunkArray2) { + t.Errorf("[size %d, capacity %d] number of chunks is %d instead of %d", + size, capacity, len(chunkArray2), len(chunkArray1)) + } else { + for i := 0; i < len(chunkArray1); i++ { + if chunkArray1[i] != chunkArray2[i] { + t.Errorf("[size %d, capacity %d, chunk %d] chunk is different", size, capacity, i) + } + } + } + } + } + } + +} diff --git a/duplicacy_chunkuploader.go b/duplicacy_chunkuploader.go new file mode 100644 index 0000000..8aafc85 --- /dev/null +++ b/duplicacy_chunkuploader.go @@ -0,0 +1,142 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "sync/atomic" + "time" +) + +// ChunkUploadTask represents a chunk to be uploaded. +type ChunkUploadTask struct { + chunk * Chunk + chunkIndex int +} + +// ChunkUploader uploads chunks to the storage using one or more uploading goroutines. Chunks are added +// by the call to StartChunk(), and then passed to the uploading goroutines. The completion function is +// called when the downloading is completed. Note that ChunkUploader does not release chunks to the +// chunk pool; instead +type ChunkUploader struct { + config *Config // Associated config + storage Storage // Download from this storage + snapshotCache *FileStorage // Used as cache if not nil; usually for uploading snapshot chunks + threads int // Number of uploading goroutines + taskQueue chan ChunkUploadTask // Uploading goroutines are listening on this channel for upload jobs + stopChannel chan bool // Used to terminate uploading goroutines + + numberOfUploadingTasks int32 // The number of uploading tasks + + // Uploading goroutines call this function after having downloaded chunks + completionFunc func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) +} + +// CreateChunkUploader creates a chunk uploader. +func CreateChunkUploader(config *Config, storage Storage, snapshotCache *FileStorage, threads int, + completionFunc func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int)) *ChunkUploader { + uploader := &ChunkUploader { + config: config, + storage: storage, + snapshotCache: snapshotCache, + threads: threads, + taskQueue: make(chan ChunkUploadTask, 1), + stopChannel: make(chan bool), + completionFunc: completionFunc, + } + + return uploader +} + +// Starts starts uploading goroutines. +func (uploader *ChunkUploader) Start() { + for i := 0; i < uploader.threads; i++ { + go func(threadIndex int) { + defer CatchLogException() + for { + select { + case task := <- uploader.taskQueue: + uploader.Upload(threadIndex, task) + case <- uploader.stopChannel: + return + } + } + } (i) + } +} + +// StartChunk sends a chunk to be uploaded to a waiting uploading goroutine. It may block if all uploading goroutines are busy. +func (uploader *ChunkUploader) StartChunk(chunk *Chunk, chunkIndex int) { + atomic.AddInt32(&uploader.numberOfUploadingTasks, 1) + uploader.taskQueue <- ChunkUploadTask { + chunk: chunk, + chunkIndex: chunkIndex, + } +} + +// Stop stops all uploading goroutines. +func (uploader *ChunkUploader) Stop() { + for atomic.LoadInt32(&uploader.numberOfUploadingTasks) > 0 { + time.Sleep(100 * time.Millisecond) + } + for i := 0; i < uploader.threads; i++ { + uploader.stopChannel <- false + } +} + +// Upload is called by the uploading goroutines to perform the actual uploading +func (uploader *ChunkUploader) Upload(threadIndex int, task ChunkUploadTask) bool { + + chunk := task.chunk + chunkSize := chunk.GetLength() + chunkID := chunk.GetID() + + if uploader.snapshotCache != nil && uploader.storage.IsCacheNeeded() { + // Save a copy to the local snapshot. + chunkPath, exist, _, err := uploader.snapshotCache.FindChunk(threadIndex, chunkID, false) + if err != nil { + LOG_WARN("UPLOAD_CACHE", "Failed to find the cache path for the chunk %s: %v", chunkID, err) + } else if exist { + LOG_DEBUG("CHUNK_CACHE", "Chunk %s already exists in the snapshot cache", chunkID) + } else if err = uploader.snapshotCache.UploadFile(threadIndex, chunkPath, chunk.GetBytes()); err != nil { + LOG_WARN("UPLOAD_CACHE", "Failed to save the chunk %s to the snapshot cache: %v", chunkID, err) + } else { + LOG_DEBUG("CHUNK_CACHE", "Chunk %s has been saved to the snapshot cache", chunkID) + } + } + + // This returns the path the chunk file should be at. + chunkPath, exist, _, err := uploader.storage.FindChunk(threadIndex, chunkID, false) + if err != nil { + LOG_ERROR("UPLOAD_CHUNK", "Failed to find the path for the chunk %s: %v", chunkID, err) + return false + } + + if exist { + // Chunk deduplication by name in effect here. + LOG_DEBUG("CHUNK_DUPLICATE", "Chunk %s already exists", chunkID) + + uploader.completionFunc(chunk, task.chunkIndex, false, chunkSize, 0) + atomic.AddInt32(&uploader.numberOfUploadingTasks, -1) + return false + } + + // Encrypt the chunk only after we know that it must be uploaded. + err = chunk.Encrypt(uploader.config.ChunkKey, chunk.GetHash()) + if err != nil { + LOG_ERROR("UPLOAD_CHUNK", "Failed to encrypt the chunk %s: %v", chunkID, err) + return false + } + + err = uploader.storage.UploadFile(threadIndex, chunkPath, chunk.GetBytes()) + if err != nil { + LOG_ERROR("UPLOAD_CHUNK", "Failed to upload the chunk %s: %v", chunkID, err) + return false + } + + LOG_DEBUG("CHUNK_UPLOAD", "Chunk %s has been uploaded", chunkID) + uploader.completionFunc(chunk, task.chunkIndex, false, chunkSize, chunk.GetLength()) + atomic.AddInt32(&uploader.numberOfUploadingTasks, -1) + return true +} diff --git a/duplicacy_chunkuploader_test.go b/duplicacy_chunkuploader_test.go new file mode 100644 index 0000000..df95035 --- /dev/null +++ b/duplicacy_chunkuploader_test.go @@ -0,0 +1,130 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "os" + "time" + "path" + "testing" + "runtime/debug" + + crypto_rand "crypto/rand" + "math/rand" +) + +func TestUploaderAndDownloader(t *testing.T) { + + rand.Seed(time.Now().UnixNano()) + setTestingT(t) + SetLoggingLevel(INFO) + + defer func() { + if r := recover(); r != nil { + switch e := r.(type) { + case Exception: + t.Errorf("%s %s", e.LogID, e.Message) + debug.PrintStack() + default: + t.Errorf("%v", e) + debug.PrintStack() + } + } + } () + + testDir := path.Join(os.TempDir(), "duplicacy_test", "storage_test") + os.RemoveAll(testDir) + os.MkdirAll(testDir, 0700) + + t.Logf("storage: %s", testStorageName) + + storage, err := loadStorage(testDir, 1) + if err != nil { + t.Errorf("Failed to create storage: %v", err) + return + } + storage.EnableTestMode() + storage.SetRateLimits(testRateLimit, testRateLimit) + + for _, dir := range []string { "chunks", "snapshots" } { + err = storage.CreateDirectory(0, dir) + if err != nil { + t.Errorf("Failed to create directory %s: %v", dir, err) + return + } + } + + + numberOfChunks := 100 + maxChunkSize := 64 * 1024 + + if testQuickMode { + numberOfChunks = 10 + } + + var chunks []*Chunk + + config := CreateConfig() + config.MinimumChunkSize = 100 + config.chunkPool = make(chan *Chunk, numberOfChunks * 2) + totalFileSize := 0 + + for i := 0; i < numberOfChunks; i++ { + content := make([]byte, rand.Int() % maxChunkSize + 1) + _, err = crypto_rand.Read(content) + if err != nil { + t.Errorf("Error generating random content: %v", err) + return + } + + chunk := CreateChunk(config, true) + chunk.Reset(true) + chunk.Write(content) + chunks = append(chunks, chunk) + + t.Logf("Chunk: %s, size: %d", chunk.GetID(), chunk.GetLength()) + totalFileSize += chunk.GetLength() + } + + completionFunc := func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) { + t.Logf("Chunk %s size %d (%d/%d) uploaded", chunk.GetID(), chunkSize, chunkIndex, len(chunks)) + } + + chunkUploader := CreateChunkUploader(config, storage, nil, testThreads, nil) + chunkUploader.completionFunc = completionFunc + chunkUploader.Start() + + for i, chunk := range chunks { + chunkUploader.StartChunk(chunk, i) + } + + chunkUploader.Stop() + + + chunkDownloader := CreateChunkDownloader(config, storage, nil, true, testThreads) + chunkDownloader.totalFileSize = int64(totalFileSize) + + for _, chunk := range chunks { + chunkDownloader.AddChunk(chunk.GetHash()) + } + + for i, chunk := range chunks { + downloaded := chunkDownloader.WaitForChunk(i) + if downloaded.GetID() != chunk.GetID() { + t.Error("Uploaded: %s, downloaded: %s", chunk.GetID(), downloaded.GetID()) + } + } + + chunkDownloader.Stop() + + for _, file := range listChunks(storage) { + err = storage.DeleteFile(0, "chunks/" + file) + if err != nil { + t.Errorf("Failed to delete the file %s: %v", file, err) + return + } + } + +} diff --git a/duplicacy_config.go b/duplicacy_config.go new file mode 100644 index 0000000..e4401b8 --- /dev/null +++ b/duplicacy_config.go @@ -0,0 +1,394 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "encoding/json" + "bytes" + "os" + "fmt" + "hash" + "runtime" + "runtime/debug" + "sync/atomic" + "crypto/rand" + "crypto/hmac" + "crypto/sha256" + "encoding/hex" + + blake2 "github.com/minio/blake2b-simd" +) + +// If encryption is turned off, use this key for HMAC-SHA256 or chunk ID generation etc. +var DEFAULT_KEY = []byte("duplicacy") + +// The new default compression level is 100. However, in the early versions we use the +// standard zlib levels of -1 to 9. +var DEFAULT_COMPRESSION_LEVEL = 100 + +type Config struct { + CompressionLevel int `json:"compression-level"` + AverageChunkSize int `json:"average-chunk-size"` + MaximumChunkSize int `json:"max-chunk-size"` + MinimumChunkSize int `json:"min-chunk-size"` + + ChunkSeed []byte `json:"chunk-seed"` + + // Use HMAC-SHA256(hashKey, plaintext) as the chunk hash. + // Use HMAC-SHA256(idKey, chunk hash) as the file name of the chunk + // For chunks, use HMAC-SHA256(chunkKey, chunk hash) as the encryption key + // For files, use HMAC-SHA256(fileKey, file path) as the encryption key + + // the HMAC-SHA256 key of the chunk data + HashKey []byte `json:"-"` + + // used to generate an id from the chunk hash + IDKey []byte `json:"-"` + + // for encrypting a chunk + ChunkKey []byte `json:"-"` + + // for encrypting a non-chunk file + FileKey []byte `json:"-"` + + chunkPool chan *Chunk `json:"-"` + numberOfChunks int32 +} + + // Create an alias to avoid recursive calls on Config.MarshalJSON +type aliasedConfig Config + +type jsonableConfig struct { + *aliasedConfig + ChunkSeed string `json:"chunk-seed"` + HashKey string `json:"hash-key"` + IDKey string `json:"id-key"` + ChunkKey string `json:"chunk-key"` + FileKey string `json:"file-key"` +} + +func (config *Config) MarshalJSON() ([] byte, error) { + + return json.Marshal(&jsonableConfig { + aliasedConfig: (*aliasedConfig)(config), + ChunkSeed: hex.EncodeToString(config.ChunkSeed), + HashKey: hex.EncodeToString(config.HashKey), + IDKey: hex.EncodeToString(config.IDKey), + ChunkKey: hex.EncodeToString(config.ChunkKey), + FileKey: hex.EncodeToString(config.FileKey), + }) +} + +func (config *Config) UnmarshalJSON(description []byte) (err error) { + + aliased := &jsonableConfig { + aliasedConfig: (*aliasedConfig)(config), + } + + if err = json.Unmarshal(description, &aliased); err != nil { + return err + } + + if config.ChunkSeed, err = hex.DecodeString(aliased.ChunkSeed); err != nil { + return fmt.Errorf("Invalid representation of the chunk seed in the config") + } + if config.HashKey, err = hex.DecodeString(aliased.HashKey); err != nil { + return fmt.Errorf("Invalid representation of the hash key in the config") + } + if config.IDKey, err = hex.DecodeString(aliased.IDKey); err != nil { + return fmt.Errorf("Invalid representation of the id key in the config") + } + if config.ChunkKey, err = hex.DecodeString(aliased.ChunkKey); err != nil { + return fmt.Errorf("Invalid representation of the chunk key in the config") + } + if config.FileKey, err = hex.DecodeString(aliased.FileKey); err != nil { + return fmt.Errorf("Invalid representation of the file key in the config") + } + + return nil +} + +func (config *Config) IsCompatiableWith(otherConfig *Config) bool { + + return config.CompressionLevel == otherConfig.CompressionLevel && + config.AverageChunkSize == otherConfig.AverageChunkSize && + config.MaximumChunkSize == otherConfig.MaximumChunkSize && + config.MinimumChunkSize == otherConfig.MinimumChunkSize && + bytes.Equal(config.ChunkSeed, otherConfig.ChunkSeed) && + bytes.Equal(config.HashKey, otherConfig.HashKey) +} + +func (config *Config) Print() { + + LOG_INFO("CONFIG_INFO", "Compression level: %d", config.CompressionLevel) + LOG_INFO("CONFIG_INFO", "Average chunk size: %d", config.AverageChunkSize) + LOG_INFO("CONFIG_INFO", "Maximum chunk size: %d", config.MaximumChunkSize) + LOG_INFO("CONFIG_INFO", "Minimum chunk size: %d", config.MinimumChunkSize) + LOG_INFO("CONFIG_INFO", "Chunk seed: %x", config.ChunkSeed) +} + +func CreateConfigFromParameters(compressionLevel int, averageChunkSize int, maximumChunkSize int, mininumChunkSize int, + isEncrypted bool, copyFrom *Config) (config *Config) { + + config = &Config { + CompressionLevel: compressionLevel, + AverageChunkSize: averageChunkSize, + MaximumChunkSize: maximumChunkSize, + MinimumChunkSize: mininumChunkSize, + } + + if isEncrypted { + // Randomly generate keys + keys := make([]byte, 32 * 5) + _, err := rand.Read(keys) + if err != nil { + LOG_ERROR("CONFIG_KEY", "Failed to generate random keys: %v", err) + return nil + } + + config.ChunkSeed = keys[:32] + config.HashKey = keys[32:64] + config.IDKey = keys[64:96] + config.ChunkKey = keys[96:128] + config.FileKey = keys[128:] + } else { + config.ChunkSeed = DEFAULT_KEY + config.HashKey = DEFAULT_KEY + config.IDKey = DEFAULT_KEY + } + + if copyFrom != nil { + config.CompressionLevel = copyFrom.CompressionLevel + + config.AverageChunkSize = copyFrom.AverageChunkSize + config.MaximumChunkSize = copyFrom.MaximumChunkSize + config.MinimumChunkSize = copyFrom.MinimumChunkSize + + config.ChunkSeed = copyFrom.ChunkSeed + config.HashKey = copyFrom.HashKey + } + + config.chunkPool = make(chan *Chunk, runtime.NumCPU() * 16) + + return config +} + +func CreateConfig() (config *Config) { + return &Config { + HashKey: DEFAULT_KEY, + IDKey: DEFAULT_KEY, + CompressionLevel: DEFAULT_COMPRESSION_LEVEL, + chunkPool: make(chan *Chunk, runtime.NumCPU() * 16), + } +} + +func (config *Config) GetChunk() (chunk *Chunk) { + select { + case chunk = <- config.chunkPool : + default: + numberOfChunks := atomic.AddInt32(&config.numberOfChunks, 1) + if numberOfChunks >= int32(runtime.NumCPU() * 16) { + LOG_WARN("CONFIG_CHUNK", "%d chunks have been allocated", numberOfChunks) + if _, found := os.LookupEnv("DUPLICACY_CHUNK_DEBUG"); found { + debug.PrintStack() + } + } + chunk = CreateChunk(config, true) + } + return chunk +} + +func (config *Config) PutChunk(chunk *Chunk){ + + if chunk == nil { + return + } + + select { + case config.chunkPool <- chunk: + default: + LOG_INFO("CHUNK_BUFFER", "Discarding a free chunk due to a full pool") + } +} + +func (config *Config) NewKeyedHasher(key []byte) hash.Hash { + if config.CompressionLevel == DEFAULT_COMPRESSION_LEVEL { + hasher, err := blake2.New(&blake2.Config{ Size: 32, Key:key }) + if err != nil { + LOG_ERROR("HASH_KEY", "Invalid hash key: %x", key) + } + return hasher + } else { + return hmac.New(sha256.New, key) + } +} + +func (config *Config) NewFileHasher() hash.Hash { + if config.CompressionLevel == DEFAULT_COMPRESSION_LEVEL { + hasher, _ := blake2.New(&blake2.Config{ Size: 32 }) + return hasher + } else { + return sha256.New() + } +} + +// Calculate the file hash using the corresponding hasher +func (config *Config) ComputeFileHash(path string, buffer []byte) string { + + file, err := os.Open(path) + if err != nil { + return "" + } + + hasher := config.NewFileHasher() + defer file.Close() + + count := 1 + for count > 0 { + count, err = file.Read(buffer) + hasher.Write(buffer[:count]) + } + + return hex.EncodeToString(hasher.Sum(nil)) +} + +// GetChunkIDFromHash creates a chunk id from the chunk hash. The chunk id will be used as the name of the chunk +// file, so it is publicly exposed. The chunk hash is the HMAC-SHA256 of what is contained in the chunk and should +// never be exposed. +func (config *Config) GetChunkIDFromHash(hash string) string { + hasher := config.NewKeyedHasher(config.IDKey) + hasher.Write([]byte(hash)) + return hex.EncodeToString(hasher.Sum(nil)) +} + +func DownloadConfig(storage Storage, password string) (config *Config, isEncrypted bool, err error) { + // Although the default key is passed to the function call the key is not actually used since there is no need to + // calculate the hash or id of the config file. + configFile := CreateChunk(CreateConfig(), true) + + exist, _, _, err := storage.GetFileInfo(0, "config") + if err != nil { + return nil, false, err + } + + if !exist { + return nil, false, nil + } + + err = storage.DownloadFile(0, "config", configFile) + if err != nil { + return nil, false, err + } + + var masterKey []byte + + if len(password) > 0 { + masterKey = GenerateKeyFromPassword(password) + + // Decrypt the config file. masterKey == nil means no encryption. + err = configFile.Decrypt(masterKey, "") + if err != nil { + return nil, false, fmt.Errorf("Failed to retrieve the config file: %v", err) + } + } + + config = CreateConfig() + + err = json.Unmarshal(configFile.GetBytes(), config) + + if err != nil { + if bytes.Equal(configFile.GetBytes()[:9], []byte("duplicacy")) { + return nil, true, fmt.Errorf("The storage is likely to have been initialized with a password before") + } else { + return nil, false, fmt.Errorf("Failed to parse the config file: %v", err) + } + } + + return config, false, nil + +} + +func UploadConfig(storage Storage, config *Config, password string) (bool) { + + // This is the key to encrypt the config file. + var masterKey []byte + + if len(password) > 0 { + + if len(password) < 8 { + LOG_ERROR("CONFIG_PASSWORD", "The password must be at least 8 characters") + return false + } + + masterKey = GenerateKeyFromPassword(password) + } + + description, err := json.MarshalIndent(config, "", " ") + if err != nil { + LOG_ERROR("CONFIG_MARSHAL", "Failed to marshal the config: %v", err) + return false + } + + // Although the default key is passed to the function call the key is not actually used since there is no need to + // calculate the hash or id of the config file. + chunk := CreateChunk(CreateConfig(), true) + chunk.Write(description) + + if len(password) > 0 { + // Encrypt the config file with masterKey. If masterKey is nil then no encryption is performed. + err = chunk.Encrypt(masterKey, "") + + if err != nil { + LOG_ERROR("CONFIG_CREATE", "Failed to create the config file: %v", err) + return false + } + } + + err = storage.UploadFile(0, "config", chunk.GetBytes()) + if err != nil { + LOG_ERROR("CONFIG_INIT", "Failed to configure the storage: %v", err) + return false + } + + if IsTracing() { + config.Print() + } + + for _, subDir := range []string {"chunks", "snapshots"} { + err = storage.CreateDirectory(0, subDir) + if err != nil { + LOG_ERROR("CONFIG_MKDIR", "Failed to create storage subdirectory: %v", err) + } + } + + return true +} + +// ConfigStorage makes the general storage space available for storing duplicacy format snapshots. In essence, +// it simply creates a file named 'config' that stores various parameters as well as a set of keys if encryption +// is enabled. +func ConfigStorage(storage Storage, compressionLevel int, averageChunkSize int, maximumChunkSize int, + minimumChunkSize int, password string, copyFrom *Config) bool { + + exist, _, _, err := storage.GetFileInfo(0, "config") + if err != nil { + LOG_ERROR("CONFIG_INIT", "Failed to check if there is an existing config file: %v", err) + return false + } + + if exist { + LOG_INFO("CONFIG_EXIST", "The storage has already been configured") + return false + } + + + config := CreateConfigFromParameters(compressionLevel, averageChunkSize, maximumChunkSize, minimumChunkSize, len(password) > 0, + copyFrom) + if config == nil { + return false + } + + return UploadConfig(storage, config, password) +} diff --git a/duplicacy_dropboxstorage.go b/duplicacy_dropboxstorage.go new file mode 100644 index 0000000..57f7006 --- /dev/null +++ b/duplicacy_dropboxstorage.go @@ -0,0 +1,296 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "fmt" + "path" + "strings" + "github.com/gilbertchen/go-dropbox" +) + +type DropboxStorage struct { + RateLimitedStorage + + clients []*dropbox.Files + storageDir string +} + +// CreateDropboxStorage creates a dropbox storage object. +func CreateDropboxStorage(accessToken string, storageDir string, threads int) (storage *DropboxStorage, err error) { + + var clients []*dropbox.Files + for i := 0; i < threads; i++ { + client := dropbox.NewFiles(dropbox.NewConfig(accessToken)) + clients = append(clients, client) + } + + if storageDir == "" || storageDir[0] != '/' { + storageDir = "/" + storageDir + } + + if len(storageDir) > 1 && storageDir[len(storageDir) - 1] == '/' { + storageDir = storageDir[:len(storageDir) - 1] + } + + storage = &DropboxStorage { + clients: clients, + storageDir: storageDir, + } + + err = storage.CreateDirectory(0, "") + if err != nil { + return nil, fmt.Errorf("Can't create storage directory: %v", err) + } + + return storage, nil +} + +// ListFiles return the list of files and subdirectories under 'dir' (non-recursively) +func (storage *DropboxStorage) ListFiles(threadIndex int, dir string) (files []string, sizes []int64, err error) { + + if dir != "" && dir[0] != '/' { + dir = "/" + dir + } + + if len(dir) > 1 && dir[len(dir) - 1] == '/' { + dir = dir[:len(dir) - 1] + } + + input := &dropbox.ListFolderInput { + Path : storage.storageDir + dir, + Recursive : false, + IncludeMediaInfo: false, + IncludeDeleted: false, + } + + output, err := storage.clients[threadIndex].ListFolder(input) + + for { + + if err != nil { + return nil, nil, err + } + + for _, entry := range output.Entries { + name := entry.Name + if entry.Tag == "folder" { + name += "/" + } + files = append(files, name) + sizes = append(sizes, int64(entry.Size)) + } + + if output.HasMore { + output, err = storage.clients[threadIndex].ListFolderContinue( + &dropbox.ListFolderContinueInput { Cursor: output.Cursor, }) + + } else { + break + } + + } + + return files, sizes, nil +} + +// DeleteFile deletes the file or directory at 'filePath'. +func (storage *DropboxStorage) DeleteFile(threadIndex int, filePath string) (err error) { + if filePath != "" && filePath[0] != '/' { + filePath = "/" + filePath + } + + input := &dropbox.DeleteInput { + Path: storage.storageDir + filePath, + } + _, err = storage.clients[threadIndex].Delete(input) + if err != nil { + if e, ok := err.(*dropbox.Error); ok && strings.HasPrefix(e.Summary, "path_lookup/not_found/") { + return nil + } + } + + return err +} + +// MoveFile renames the file. +func (storage *DropboxStorage) MoveFile(threadIndex int, from string, to string) (err error) { + if from != "" && from[0] != '/' { + from = "/" + from + } + if to != "" && to[0] != '/' { + to = "/" + to + } + input := &dropbox.MoveInput { + FromPath: storage.storageDir + from, + ToPath: storage.storageDir + to, + } + _, err = storage.clients[threadIndex].Move(input) + return err +} + +// CreateDirectory creates a new directory. +func (storage *DropboxStorage) CreateDirectory(threadIndex int, dir string) (err error) { + if dir != "" && dir[0] != '/' { + dir = "/" + dir + } + + if len(dir) > 1 && dir[len(dir) - 1] == '/' { + dir = dir[:len(dir) - 1] + } + + input := &dropbox.CreateFolderInput { + Path : storage.storageDir + dir, + } + + _, err = storage.clients[threadIndex].CreateFolder(input) + if err != nil { + if e, ok := err.(*dropbox.Error); ok && strings.HasPrefix(e.Summary, "path/conflict/") { + return nil + } + } + return err +} + +// GetFileInfo returns the information about the file or directory at 'filePath'. +func (storage *DropboxStorage) GetFileInfo(threadIndex int, filePath string) (exist bool, isDir bool, size int64, err error) { + + if filePath != "" && filePath[0] != '/' { + filePath = "/" + filePath + } + + input := &dropbox.GetMetadataInput { + Path: storage.storageDir + filePath, + IncludeMediaInfo: false, + } + + output, err := storage.clients[threadIndex].GetMetadata(input) + if err != nil { + if e, ok := err.(*dropbox.Error); ok && strings.HasPrefix(e.Summary, "path/not_found/") { + return false, false, 0, nil + } else { + return false, false, 0, err + } + } + + return true, output.Tag == "folder", int64(output.Size), nil +} + +// FindChunk finds the chunk with the specified id. If 'isFossil' is true, it will search for chunk files with +// the suffix '.fsl'. +func (storage *DropboxStorage) FindChunk(threadIndex int, chunkID string, isFossil bool) (filePath string, exist bool, size int64, err error) { + dir := "/chunks" + + suffix := "" + if isFossil { + suffix = ".fsl" + } + + // The minimum level of directories to dive into before searching for the chunk file. + minimumLevel := 1 + + for level := 0; level * 2 < len(chunkID); level ++ { + if level >= minimumLevel { + filePath = path.Join(dir, chunkID[2 * level:]) + suffix + var size int64 + exist, _, size, err = storage.GetFileInfo(threadIndex, filePath) + if err != nil { + return "", false, 0, err + } + if exist { + return filePath, exist, size, nil + } + } + + // Find the subdirectory the chunk file may reside. + subDir := path.Join(dir, chunkID[2 * level: 2 * level + 2]) + exist, _, _, err = storage.GetFileInfo(threadIndex, subDir) + if err != nil { + return "", false, 0, err + } + + if exist { + dir = subDir + continue + } + + if level < minimumLevel { + // Create the subdirectory if it doesn't exist. + err = storage.CreateDirectory(threadIndex, subDir) + if err != nil { + return "", false, 0, err + } + + dir = subDir + continue + } + + // Teh chunk must be under this subdirectory but it doesn't exist. + return path.Join(dir, chunkID[2 * level:])[1:] + suffix, false, 0, nil + + } + + LOG_FATAL("CHUNK_FIND", "Chunk %s is still not found after having searched a maximum level of directories", + chunkID) + return "", false, 0, nil + +} + +// DownloadFile reads the file at 'filePath' into the chunk. +func (storage *DropboxStorage) DownloadFile(threadIndex int, filePath string, chunk *Chunk) (err error) { + + if filePath != "" && filePath[0] != '/' { + filePath = "/" + filePath + } + + input := &dropbox.DownloadInput { + Path: storage.storageDir + filePath, + } + + output, err := storage.clients[threadIndex].Download(input) + if err != nil { + return err + } + + defer output.Body.Close() + + _, err = RateLimitedCopy(chunk, output.Body, storage.DownloadRateLimit / len(storage.clients)) + return err + +} + +// UploadFile writes 'content' to the file at 'filePath'. +func (storage *DropboxStorage) UploadFile(threadIndex int, filePath string, content []byte) (err error) { + if filePath != "" && filePath[0] != '/' { + filePath = "/" + filePath + } + + input := &dropbox.UploadInput { + Path: storage.storageDir + filePath, + Mode: dropbox.WriteModeOverwrite, + AutoRename: false, + Mute: true, + Reader: CreateRateLimitedReader(content, storage.UploadRateLimit / len(storage.clients)), + } + + _, err = storage.clients[threadIndex].Upload(input) + return err +} + +// If a local snapshot cache is needed for the storage to avoid downloading/uploading chunks too often when +// managing snapshots. +func (storage *DropboxStorage) IsCacheNeeded() (bool) { return true } + +// If the 'MoveFile' method is implemented. +func (storage *DropboxStorage) IsMoveFileImplemented() (bool) { return true } + +// If the storage can guarantee strong consistency. +func (storage *DropboxStorage) IsStrongConsistent() (bool) { return false } + +// If the storage supports fast listing of files names. +func (storage *DropboxStorage) IsFastListing() (bool) { return false } + +// Enable the test mode. +func (storage *DropboxStorage) EnableTestMode() {} diff --git a/duplicacy_entry.go b/duplicacy_entry.go new file mode 100644 index 0000000..fe0ceee --- /dev/null +++ b/duplicacy_entry.go @@ -0,0 +1,566 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "os" + "fmt" + "path/filepath" + "io/ioutil" + "sort" + "regexp" + "strconv" + "time" + "encoding/json" + "encoding/base64" + "strings" + +) + + +// This is the hidden directory in the repository for storing various files. +var DUPLICACY_DIRECTORY = ".duplicacy" + +// Regex for matching 'StartChunk:StartOffset:EndChunk:EndOffset' +var contentRegex = regexp.MustCompile(`^([0-9]+):([0-9]+):([0-9]+):([0-9]+)`) + +// Entry encapsulates information about a file or directory. +type Entry struct { + Path string + Size int64 + Time int64 + Mode uint32 + Link string + Hash string + + UID int + GID int + + StartChunk int + StartOffset int + EndChunk int + EndOffset int + + Attributes map[string][]byte +} + +// CreateEntry creates an entry from file properties. +func CreateEntry(path string, size int64, time int64, mode uint32) *Entry { + + if len(path) > 0 && path[len(path) - 1] != '/' && (mode & uint32(os.ModeDir)) != 0 { + path += "/" + } + + return &Entry { + Path : path, + Size : size, + Time : time, + Mode : mode, + + UID : -1, + GID : -1, + } + +} + +// CreateEntryFromFileInfo creates an entry from a 'FileInfo' object. +func CreateEntryFromFileInfo(fileInfo os.FileInfo, directory string) *Entry { + path := directory + fileInfo.Name() + + mode := fileInfo.Mode() + + if mode & os.ModeDir != 0 && mode & os.ModeSymlink != 0 { + mode ^= os.ModeDir + } + + if path[len(path) - 1] != '/' && mode & os.ModeDir != 0 { + path += "/" + } + + entry := &Entry { + Path: path, + Size: fileInfo.Size(), + Time: fileInfo.ModTime().Unix(), + Mode: uint32(mode), + } + + GetOwner(entry, &fileInfo) + + return entry +} + +// CreateEntryFromJSON creates an entry from a json description. +func (entry *Entry) UnmarshalJSON(description []byte) (err error) { + + var object map[string]interface {} + + err = json.Unmarshal(description, &object) + if err != nil { + return err + } + + var value interface {} + var ok bool + + if value, ok = object["name"]; ok { + pathInBase64, ok := value.(string) + if !ok { + return fmt.Errorf("Name is not a string for a file in the snapshot") + } + path, err := base64.StdEncoding.DecodeString(pathInBase64) + if err != nil { + return fmt.Errorf("Invalid name '%s' in the snapshot", pathInBase64) + } + entry.Path = string(path) + } else if value, ok = object["path"]; !ok { + return fmt.Errorf("Path is not specified for a file in the snapshot") + } else if entry.Path, ok = value.(string); !ok { + return fmt.Errorf("Path is not a string for a file in the snapshot") + } + + if value, ok = object["size"]; !ok { + return fmt.Errorf("Size is not specified for file '%s' in the snapshot", entry.Path) + } else if _, ok = value.(float64); !ok { + return fmt.Errorf("Size is not a valid integer for file '%s' in the snapshot", entry.Path) + } + entry.Size = int64(value.(float64)) + + if value, ok = object["time"]; !ok { + return fmt.Errorf("Time is not specified for file '%s' in the snapshot", entry.Path) + } else if _, ok = value.(float64); !ok { + return fmt.Errorf("Time is not a valid integer for file '%s' in the snapshot", entry.Path) + } + entry.Time = int64(value.(float64)) + + if value, ok = object["mode"]; !ok { + return fmt.Errorf("float64 is not specified for file '%s' in the snapshot", entry.Path) + } else if _, ok = value.(float64); !ok { + return fmt.Errorf("Mode is not a valid integer for file '%s' in the snapshot", entry.Path) + } + entry.Mode = uint32(value.(float64)) + + if value, ok = object["hash"]; !ok { + return fmt.Errorf("Hash is not specified for file '%s' in the snapshot", entry.Path) + } else if entry.Hash, ok = value.(string); !ok { + return fmt.Errorf("Hash is not a string for file '%s' in the snapshot", entry.Path) + } + + if value, ok = object["link"]; ok { + var link string + if link, ok = value.(string); !ok { + return fmt.Errorf("Symlink is not a valid string for file '%s' in the snapshot", entry.Path) + } + entry.Link = link + } + + entry.UID = -1 + if value, ok = object["uid"]; ok { + if _, ok = value.(float64); ok { + entry.UID = int(value.(float64)) + } + } + + entry.GID = -1 + if value, ok = object["gid"]; ok { + if _, ok = value.(float64); ok { + entry.GID = int(value.(float64)) + } + } + + if value, ok = object["attributes"]; ok { + if attributes, ok := value.(map[string]interface {}); !ok { + return fmt.Errorf("Attributes are invalid for file '%s' in the snapshot", entry.Path) + } else { + entry.Attributes = make(map[string][]byte) + for name, object := range attributes { + if object == nil { + entry.Attributes[name] = []byte("") + } else if attributeInBase64, ok := object.(string); !ok { + return fmt.Errorf("Attribute '%s' is invalid for file '%s' in the snapshot", name, entry.Path) + } else if attribute, err := base64.StdEncoding.DecodeString(attributeInBase64); err != nil { + return fmt.Errorf("Failed to decode attribute '%s' for file '%s' in the snapshot: %v", + name, entry.Path, err) + } else { + entry.Attributes[name] = attribute + } + } + } + } + + if entry.IsFile() && entry.Size > 0 { + if value, ok = object["content"]; !ok { + return fmt.Errorf("Content is not specified for file '%s' in the snapshot", entry.Path) + } + + if content, ok := value.(string); !ok { + return fmt.Errorf("Content is invalid for file '%s' in the snapshot", entry.Path) + } else { + + matched := contentRegex.FindStringSubmatch(content) + if matched == nil { + return fmt.Errorf("Content is specified in a wrong format for file '%s' in the snapshot", entry.Path) + } + + entry.StartChunk, _ = strconv.Atoi(matched[1]) + entry.StartOffset, _ = strconv.Atoi(matched[2]) + entry.EndChunk, _ = strconv.Atoi(matched[3]) + entry.EndOffset, _ = strconv.Atoi(matched[4]) + } + } + + return nil + +} + +func (entry *Entry) convertToObject(encodeName bool) map[string]interface{} { + + object := make(map[string]interface{}) + + if encodeName { + object["name"] = base64.StdEncoding.EncodeToString([]byte(entry.Path)) + } else { + object["path"] = entry.Path + } + object["size"] = entry.Size + object["time"] = entry.Time + object["mode"] = entry.Mode + object["hash"] = entry.Hash + + if entry.IsLink() { + object["link"] = entry.Link + } + + if entry.IsFile() && entry.Size > 0 { + object["content"] = fmt.Sprintf("%d:%d:%d:%d", + entry.StartChunk, entry.StartOffset, entry.EndChunk, entry.EndOffset) + } + + if entry.UID != -1 && entry.GID != -1 { + object["uid"] = entry.UID + object["gid"] = entry.GID + } + + if len(entry.Attributes) > 0 { + object["attributes"] = entry.Attributes + } + + return object +} + +// MarshalJSON returns the json description of an entry. +func (entry *Entry) MarshalJSON() ([] byte, error) { + + object := entry.convertToObject(true) + description, err := json.Marshal(object) + return description, err +} + +func (entry *Entry) IsFile() bool { + return entry.Mode & uint32(os.ModeType) == 0 +} + +func (entry *Entry) IsDir() bool { + return entry.Mode & uint32(os.ModeDir) != 0 +} + +func (entry *Entry) IsLink() bool { + return entry.Mode & uint32(os.ModeSymlink) != 0 +} + +func (entry *Entry) GetPermissions() os.FileMode { + return os.FileMode(entry.Mode) & os.ModePerm +} + +func (entry *Entry) IsSameAs(other *Entry) bool { + return entry.Size == other.Size && entry.Time <= other.Time + 1 && entry.Time >= other.Time - 1 +} + +func (entry *Entry) IsSameAsFileInfo(other os.FileInfo) bool { + time := other.ModTime().Unix() + return entry.Size == other.Size() && entry.Time <= time + 1 && entry.Time >= time - 1 +} + +func (entry *Entry) String(maxSizeDigits int) string { + modifiedTime := time.Unix(entry.Time, 0).Format("2006-01-02 15:04:05") + return fmt.Sprintf("%*d %s %64s %s", maxSizeDigits, entry.Size, modifiedTime, entry.Hash, entry.Path) +} + +func (entry *Entry) RestoreMetadata(fullPath string, fileInfo *os.FileInfo) bool { + + if fileInfo == nil { + stat, err := os.Stat(fullPath) + fileInfo = &stat + if err != nil { + LOG_ERROR("RESTORE_STAT", "Failed to retrieve the file info: %v", err) + return false + } + } + + if (*fileInfo).Mode() & os.ModePerm != entry.GetPermissions() { + err := os.Chmod(fullPath, entry.GetPermissions()) + if err != nil { + LOG_ERROR("RESTORE_CHMOD", "Failed to set the file permissions: %v", err) + return false + } + } + + if (*fileInfo).ModTime().Unix() != entry.Time { + modifiedTime := time.Unix(entry.Time, 0) + err := os.Chtimes(fullPath, modifiedTime, modifiedTime) + if err != nil { + LOG_ERROR("RESTORE_CHTIME", "Failed to set the modification time: %v", err) + return false + } + } + + if len(entry.Attributes) > 0 { + entry.SetAttributesToFile(fullPath) + } + + return SetOwner(fullPath, entry, fileInfo) +} + + +// Return -1 if 'left' should appear before 'right', 1 if opposite, and 0 if they are the same. +// Files are always arranged before subdirectories under the same parent directory. +func (left *Entry) Compare(right *Entry) int { + + path1 := left.Path + path2 := right.Path + + p := 0 + for ; p < len(path1) && p < len(path2); p++ { + if path1[p] != path2[p] { + break + } + } + + // c1, c2 is the first byte that differs + var c1, c2 byte + if p < len(path1) { + c1 = path1[p] + } + if p < len(path2) { + c2 = path2[p] + } + + // c3, c4 indicates how the current component ends + // c3 == '/': the current component is a directory + // c3 != '/': the current component is the last one + c3 := c1 + for i := p; c3 != '/' && i < len(path1); i++ { + c3 = path1[i] + } + + c4 := c2 + for i := p; c4 != '/' && i < len(path2); i++ { + c4 = path2[i] + } + + if c3 == '/' { + if c4 == '/' { + // We are comparing two directory components + if c1 == '/' { + // left is shorter + // Note that c2 maybe smaller than c1 but c1 is '/' which is counted + // as 0 + return -1 + } else if c2 == '/' { + // right is shorter + return 1 + } else { + return int(c1) - int(c2) + } + } else { + return 1 + } + } else { + // We're at the last component of left and left is a file + if c4 == '/' { + // the current component of right is a directory + return -1 + } else { + return int(c1) - int(c2) + } + } +} + +// This is used to sort entries by their names. +type ByName []*Entry + +func (entries ByName) Len() int { return len(entries) } +func (entries ByName) Swap(i, j int) { entries[i], entries[j] = entries[j], entries[i] } +func (entries ByName) Less(i, j int) bool { + return entries[i].Compare(entries[j]) < 0 +} + +// This is used to sort entries by their starting chunks (and starting offsets if the starting chunks are the same). +type ByChunk []*Entry + +func (entries ByChunk) Len() int { return len(entries) } +func (entries ByChunk) Swap(i, j int) { entries[i], entries[j] = entries[j], entries[i] } +func (entries ByChunk) Less(i, j int) bool { + return entries[i].StartChunk < entries[j].StartChunk || + (entries[i].StartChunk == entries[j].StartChunk && entries[i].StartOffset < entries[j].StartOffset) +} + +// This is used to sort FileInfo objects. +type FileInfoCompare []os.FileInfo + +func (files FileInfoCompare) Len() int { return len(files) } +func (files FileInfoCompare) Swap(i, j int) { files[i], files[j] = files[j], files[i] } +func (files FileInfoCompare) Less(i, j int) bool { + + left := files[i] + right := files[j] + + if left.IsDir() && left.Mode() & os.ModeSymlink == 0 { + if right.IsDir() && right.Mode() & os.ModeSymlink == 0 { + return left.Name() < right.Name() + } else { + return false + } + } else { + if right.IsDir() && right.Mode() & os.ModeSymlink == 0 { + return true + } else { + return left.Name() < right.Name() + } + } +} + +// ListEntries returns a list of entries representing file and subdirectories under the directory 'path'. Entry paths +// are normalized as relative to 'top'. 'patterns' are used to exclude or include certain files. +func ListEntries(top string, path string, fileList *[]*Entry, patterns [] string, discardAttributes bool) (directoryList []*Entry, + skippedFiles [] string, err error) { + + LOG_DEBUG("LIST_ENTRIES", "Listing %s", path) + + fullPath := joinPath(top, path) + + files := make([]os.FileInfo, 0, 1024) + + files, err = ioutil.ReadDir(fullPath) + if err != nil { + return directoryList, nil, err + } + + normalizedPath := path + if len(normalizedPath) > 0 && normalizedPath[len(normalizedPath) - 1] != '/' { + normalizedPath += "/" + } + + normalizedTop := top + if normalizedTop != "" && normalizedTop[len(normalizedTop) - 1] != '/' { + normalizedTop += "/" + } + + sort.Sort(FileInfoCompare(files)) + + entries := make([]*Entry, 0, 4) + + for _, f := range files { + if f.Name() == DUPLICACY_DIRECTORY { + continue + } + entry := CreateEntryFromFileInfo(f, normalizedPath) + if len(patterns) > 0 && !MatchPath(entry.Path, patterns) { + LOG_DEBUG("LIST_EXCLUDE", "%s is excluded", entry.Path) + continue + } + if entry.IsLink() { + isRegular := false + isRegular, entry.Link, err = Readlink(filepath.Join(top, entry.Path)) + if err != nil { + LOG_WARN("LIST_LINK", "Failed to read the symlink %s: %v", entry.Path, err ) + skippedFiles = append(skippedFiles, entry.Path) + continue + } + + if isRegular { + entry.Mode ^= uint32(os.ModeSymlink) + } else if path == "" && filepath.IsAbs(entry.Link) && !strings.HasPrefix(entry.Link, normalizedTop) { + stat, err := os.Stat(filepath.Join(top, entry.Path)) + if err != nil { + LOG_WARN("LIST_LINK", "Failed to read the symlink: %v", err ) + skippedFiles = append(skippedFiles, entry.Path) + continue + } + entry = CreateEntryFromFileInfo(stat, "") + } + } + + if !discardAttributes { + entry.ReadAttributes(top) + } + + if f.Mode() & (os.ModeNamedPipe | os.ModeSocket | os.ModeDevice) != 0 { + LOG_WARN("LIST_SKIP", "Skipped non-regular file %s", entry.Path) + skippedFiles = append(skippedFiles, entry.Path) + continue + } + + entries = append(entries, entry) + } + + // For top level directory we need to sort again because symlinks may have been changed + if path == "" { + sort.Sort(ByName(entries)) + } + + for _, entry := range entries { + if entry.IsDir() { + directoryList = append(directoryList, entry) + } else { + *fileList = append(*fileList, entry) + } + } + + for i, j := 0, len(directoryList) - 1; i < j; i, j = i + 1, j - 1 { + directoryList[i], directoryList[j] = directoryList[j], directoryList[i] + } + + return directoryList, skippedFiles, nil +} + +// Diff returns how many bytes remain unmodifiled between two files. +func (entry *Entry) Diff(chunkHashes[]string, chunkLengths[]int, + otherHashes[]string, otherLengths [] int) (modifiedLength int64) { + + var offset1, offset2 int64 + i1 := entry.StartChunk + i2 := 0 + for i1 <= entry.EndChunk && i2 < len(otherHashes) { + + start := 0 + if i1 == entry.StartChunk { + start = entry.StartOffset + } + end := chunkLengths[i1] + if i1 == entry.EndChunk { + end = entry.EndOffset + } + + if offset1 < offset2 { + modifiedLength += int64(end - start) + offset1 += int64(end - start) + i1++ + } else if offset1 > offset2 { + offset2 += int64(otherLengths[i2]) + i2++ + } else { + if chunkHashes[i1] == otherHashes[i2] && end - start == otherLengths[i2] { + } else { + modifiedLength += int64(chunkLengths[i1]) + } + offset1 += int64(end - start) + offset2 += int64(otherLengths[i2]) + i1++ + i2++ + } + } + + return modifiedLength +} diff --git a/duplicacy_entry_test.go b/duplicacy_entry_test.go new file mode 100644 index 0000000..d508e1d --- /dev/null +++ b/duplicacy_entry_test.go @@ -0,0 +1,220 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "testing" + "io/ioutil" + "os" + "path/filepath" + "math/rand" + "sort" +) + +func TestEntrySort(t *testing.T) { + + DATA := [...]string { + "ab", + "ab-", + "ab0", + "ab1", + "\xBB\xDDfile", + "\xFF\xDDfile", + "ab/", + "ab/c", + "ab+/c-", + "ab+/c0", + "ab+/c/", + "ab+/c/d", + "ab+/c+/", + "ab+/c+/d", + "ab+/c0/", + "ab+/c0/d", + "ab-/", + "ab-/c", + "ab0/", + "ab1/", + "ab1/c", + "ab1/\xBB\xDDfile", + "ab1/\xFF\xDDfile", + } + + var entry1, entry2 *Entry + + for i, p1 := range DATA { + if p1[len(p1) - 1] == '/' { + entry1 = CreateEntry(p1, 0, 0, 0700 | uint32(os.ModeDir)) + } else { + entry1 = CreateEntry(p1, 0, 0, 0700) + } + for j, p2 := range DATA { + + if p2[len(p2) - 1] == '/' { + entry2 = CreateEntry(p2, 0, 0, 0700 | uint32(os.ModeDir)) + } else { + entry2 = CreateEntry(p2, 0, 0, 0700) + } + + compared := entry1.Compare(entry2) + + if compared < 0 { + compared = -1 + } else if compared > 0 { + compared = 1 + } + + var expected int + if i < j { + expected = -1 + } else if i > j { + expected = 1 + } else { + expected = 0 + } + + if compared != expected { + t.Errorf("%s vs %s: %d, expected: %d", p1, p2, compared, expected) + } + + } + } +} + +func TestEntryList(t *testing.T) { + + testDir := filepath.Join(os.TempDir(), "duplicacy_test") + os.RemoveAll(testDir) + os.MkdirAll(testDir, 0700) + + DATA := [...]string { + "ab", + "ab-", + "ab0", + "ab1", + "ab+/", + "ab+/c", + "ab+/c+", + "ab+/c1", + "ab+/c-/", + "ab+/c-/d", + "ab+/c0/", + "ab+/c0/d", + "ab2/", + "ab2/c", + "ab3/", + "ab3/c", + } + + + var entry1, entry2 *Entry + + for i, p1 := range DATA { + if p1[len(p1) - 1] == '/' { + entry1 = CreateEntry(p1, 0, 0, 0700 | uint32(os.ModeDir)) + } else { + entry1 = CreateEntry(p1, 0, 0, 0700) + } + for j, p2 := range DATA { + + if p2[len(p2) - 1] == '/' { + entry2 = CreateEntry(p2, 0, 0, 0700 | uint32(os.ModeDir)) + } else { + entry2 = CreateEntry(p2, 0, 0, 0700) + } + + compared := entry1.Compare(entry2) + + if compared < 0 { + compared = -1 + } else if compared > 0 { + compared = 1 + } + + var expected int + if i < j { + expected = -1 + } else if i > j { + expected = 1 + } else { + expected = 0 + } + + if compared != expected { + t.Errorf("%s vs %s: %d, expected: %d", p1, p2, compared, expected) + } + + } + } + + for _, file := range DATA { + + fullPath := filepath.Join(testDir, file) + if file[len(file) - 1] == '/' { + err := os.Mkdir(fullPath, 0700) + if err != nil { + t.Errorf("Mkdir(%s) returned an error: %s", fullPath, err) + } + continue + } + + err := ioutil.WriteFile(fullPath, []byte(file), 0700) + if err != nil { + t.Errorf("WriteFile(%s) returned an error: %s", fullPath, err) + } + } + + directories := make([]*Entry, 0, 4) + directories = append(directories, CreateEntry("", 0, 0, 0)) + + entries := make([]*Entry, 0, 4) + + for len(directories) > 0 { + directory := directories[len(directories) - 1] + directories = directories[:len(directories) - 1] + entries = append(entries, directory) + subdirectories, _, err := ListEntries(testDir, directory.Path, &entries, nil, false) + if err != nil { + t.Errorf("ListEntries(%s, %s) returned an error: %s", testDir, directory.Path, err) + } + directories = append(directories, subdirectories...) + } + + entries = entries[1:] + + for _, entry := range entries { + t.Logf("entry: %s", entry.Path) + } + + if len(entries) != len(DATA) { + t.Errorf("Got %d entries instead of %d", len(entries), len(DATA)) + return + } + + for i := 0; i < len(entries); i++ { + if entries[i].Path != DATA[i] { + t.Errorf("entry: %s, expected: %s", entries[i].Path, DATA[i]) + } + } + + t.Logf("shuffling %d entries", len(entries)) + for i := range entries { + j := rand.Intn(i + 1) + entries[i], entries[j] = entries[j], entries[i] + } + + sort.Sort(ByName(entries)) + + for i := 0; i < len(entries); i++ { + if entries[i].Path != DATA[i] { + t.Errorf("entry: %s, expected: %s", entries[i].Path, DATA[i]) + } + } + + if !t.Failed() { + os.RemoveAll(testDir) + } + +} + diff --git a/duplicacy_filereader.go b/duplicacy_filereader.go new file mode 100644 index 0000000..dff12b2 --- /dev/null +++ b/duplicacy_filereader.go @@ -0,0 +1,74 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "os" +) + +// FileReader wraps a number of files and turns them into a series of readers. +type FileReader struct { + top string + files [] *Entry + + CurrentFile *os.File + CurrentIndex int + CurrentEntry *Entry + + SkippedFiles [] string +} + +// CreateFileReader creates a file reader. +func CreateFileReader(top string, files[] *Entry) (*FileReader) { + + reader := &FileReader { + top: top, + files: files, + CurrentIndex: -1, + } + + reader.NextFile() + + return reader +} + +// NextFile switchs to the next file in the file reader. +func (reader *FileReader) NextFile() bool{ + + if reader.CurrentFile != nil { + reader.CurrentFile.Close() + } + + reader.CurrentIndex++ + for reader.CurrentIndex < len(reader.files) { + + reader.CurrentEntry = reader.files[reader.CurrentIndex] + if !reader.CurrentEntry.IsFile() || reader.CurrentEntry.Size == 0 { + reader.CurrentIndex++ + continue + } + + var err error + + fullPath := joinPath(reader.top, reader.CurrentEntry.Path) + reader.CurrentFile, err = os.OpenFile(fullPath, os.O_RDONLY, 0) + if err != nil { + LOG_WARN("OPEN_FAILURE", "Failed to open file for reading: %v", err) + reader.CurrentEntry.Size = 0 + reader.SkippedFiles = append(reader.SkippedFiles, reader.CurrentEntry.Path) + reader.CurrentIndex++ + continue + } + + return true + } + + reader.CurrentFile = nil + return false +} + + + + diff --git a/duplicacy_filestorage.go b/duplicacy_filestorage.go new file mode 100644 index 0000000..74c289a --- /dev/null +++ b/duplicacy_filestorage.go @@ -0,0 +1,252 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "os" + "fmt" + "path" + "io" + "io/ioutil" + "time" + "math/rand" +) + +// FileStorage is a local on-disk file storage implementing the Storage interface. +type FileStorage struct { + RateLimitedStorage + + storageDir string + numberOfThreads int +} + +// CreateFileStorage creates a file storage. +func CreateFileStorage(storageDir string, threads int) (storage *FileStorage, err error) { + + var stat os.FileInfo + + stat, err = os.Stat(storageDir) + if os.IsNotExist(err) { + err = os.MkdirAll(storageDir, 0744) + if err != nil { + return nil, err + } + } else { + if !stat.IsDir() { + return nil, fmt.Errorf("The storage path %s is a file", storageDir) + } + } + + for storageDir[len(storageDir) - 1] == '/' { + storageDir = storageDir[:len(storageDir) - 1] + } + + storage = &FileStorage { + storageDir : storageDir, + numberOfThreads: threads, + } + + // Random number fo generating the temporary chunk file suffix. + rand.Seed(time.Now().UnixNano()) + + return storage, nil +} + +// ListFiles return the list of files and subdirectories under 'dir' (non-recursively). +func (storage *FileStorage) ListFiles(threadIndex int, dir string) (files []string, sizes []int64, err error) { + + fullPath := path.Join(storage.storageDir, dir) + + list, err := ioutil.ReadDir(fullPath) + if err != nil { + if os.IsNotExist(err) { + return nil, nil, nil + } + return nil, nil, err + } + + for _, f := range list { + name := f.Name() + if f.IsDir() && name[len(name) - 1] != '/' { + name += "/" + } + files = append(files, name) + sizes = append(sizes, f.Size()) + } + + return files, sizes, nil +} + +// DeleteFile deletes the file or directory at 'filePath'. +func (storage *FileStorage) DeleteFile(threadIndex int, filePath string) (err error) { + err = os.Remove(path.Join(storage.storageDir, filePath)) + if err == nil || os.IsNotExist(err) { + return nil + } else { + return err + } +} + +// MoveFile renames the file. +func (storage *FileStorage) MoveFile(threadIndex int, from string, to string) (err error) { + return os.Rename(path.Join(storage.storageDir, from), path.Join(storage.storageDir, to)) +} + +// CreateDirectory creates a new directory. +func (storage *FileStorage) CreateDirectory(threadIndex int, dir string) (err error) { + err = os.Mkdir(path.Join(storage.storageDir, dir), 0744) + if err != nil && os.IsExist(err) { + return nil + } else { + return err + } +} + +// GetFileInfo returns the information about the file or directory at 'filePath'. +func (storage *FileStorage) GetFileInfo(threadIndex int, filePath string) (exist bool, isDir bool, size int64, err error) { + stat, err := os.Stat(path.Join(storage.storageDir, filePath)) + if err != nil { + if os.IsNotExist(err) { + return false, false, 0, nil + } else { + return false, false, 0, err + } + } + + return true, stat.IsDir(), stat.Size(), nil +} + +// FindChunk finds the chunk with the specified id. If 'isFossil' is true, it will search for chunk files with the +// suffix '.fsl'. +func (storage *FileStorage) FindChunk(threadIndex int, chunkID string, isFossil bool) (filePath string, exist bool, size int64, err error) { + dir := path.Join(storage.storageDir, "chunks") + + suffix := "" + if isFossil { + suffix = ".fsl" + } + + // The minimum level of directories to dive into before searching for the chunk file. + minimumLevel := 2 + + for level := 0; level * 2 < len(chunkID); level ++ { + if level >= minimumLevel { + filePath = path.Join(dir, chunkID[2 * level:]) + suffix + if stat, err := os.Stat(filePath); err == nil && !stat.IsDir() { + return filePath[len(storage.storageDir) + 1:], true, stat.Size(), nil + } else if err == nil && stat.IsDir() { + return filePath[len(storage.storageDir) + 1:], true, 0, fmt.Errorf("The path %s is a directory", filePath) + } + } + + // Find the subdirectory the chunk file may reside. + subDir := path.Join(dir, chunkID[2 * level: 2 * level + 2]) + stat, err := os.Stat(subDir) + if err == nil && stat.IsDir() { + dir = subDir + continue + } + + if level < minimumLevel { + // Create the subdirectory if it doesn't exist. + + if err == nil && !stat.IsDir() { + return "", false, 0, fmt.Errorf("The path %s is not a directory", subDir) + } + + err = os.Mkdir(subDir, 0744) + if err != nil { + return "", false, 0, err + } + + dir = subDir + continue + } + + // The chunk must be under this subdirectory but it doesn't exist. + return path.Join(dir, chunkID[2 * level:])[len(storage.storageDir) + 1:] + suffix, false, 0, nil + + } + + LOG_FATAL("CHUNK_FIND", "Chunk %s is still not found after having searched a maximum level of directories", + chunkID) + return "", false, 0, nil + +} + +// DownloadFile reads the file at 'filePath' into the chunk. +func (storage *FileStorage) DownloadFile(threadIndex int, filePath string, chunk *Chunk) (err error) { + + file, err := os.Open(path.Join(storage.storageDir, filePath)) + + if err != nil { + return err + } + + defer file.Close() + if _, err = RateLimitedCopy(chunk, file, storage.DownloadRateLimit / storage.numberOfThreads); err != nil { + return err + } + + return nil + +} + +// UploadFile writes 'content' to the file at 'filePath' +func (storage *FileStorage) UploadFile(threadIndex int, filePath string, content []byte) (err error) { + + fullPath := path.Join(storage.storageDir, filePath) + + letters := "abcdefghijklmnopqrstuvwxyz" + suffix := make([]byte, 8) + for i := range suffix { + suffix[i] = letters[rand.Intn(len(letters))] + } + + temporaryFile := fullPath + "." + string(suffix) + ".tmp" + + file, err := os.OpenFile(temporaryFile, os.O_WRONLY | os.O_CREATE | os.O_TRUNC, 0644) + if err != nil { + return err + } + + reader := CreateRateLimitedReader(content, storage.UploadRateLimit / storage.numberOfThreads) + _, err = io.Copy(file, reader) + if err != nil { + file.Close() + return err + } + + file.Close() + + err = os.Rename(temporaryFile, fullPath) + if err != nil { + + if _, e := os.Stat(fullPath); e == nil { + os.Remove(temporaryFile) + return nil + } else { + return err + } + } + + return nil +} + +// If a local snapshot cache is needed for the storage to avoid downloading/uploading chunks too often when +// managing snapshots. +func (storage *FileStorage) IsCacheNeeded () (bool) { return false } + +// If the 'MoveFile' method is implemented. +func (storage *FileStorage) IsMoveFileImplemented() (bool) { return true } + +// If the storage can guarantee strong consistency. +func (storage *FileStorage) IsStrongConsistent() (bool) { return true } + +// If the storage supports fast listing of files names. +func (storage *FileStorage) IsFastListing() (bool) { return false } + +// Enable the test mode. +func (storage *FileStorage) EnableTestMode() {} diff --git a/duplicacy_gcdstorage.go b/duplicacy_gcdstorage.go new file mode 100644 index 0000000..d51a3bc --- /dev/null +++ b/duplicacy_gcdstorage.go @@ -0,0 +1,619 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "io" + "fmt" + "net" + "path" + "time" + "sync" + "strings" + "net/http" + "net/url" + "io/ioutil" + "math/rand" + "encoding/json" + + "golang.org/x/net/context" + "golang.org/x/oauth2" + "google.golang.org/api/drive/v3" + "google.golang.org/api/googleapi" +) + +type GCDStorage struct { + RateLimitedStorage + + service *drive.Service + idCache map[string]string + idCacheLock *sync.Mutex + backoff int + + numberOfThreads int + TestMode bool + +} + +type GCDConfig struct { + ClientID string `json:"client_id"` + ClientSecret string `json:"client_secret"` + Endpoint oauth2.Endpoint `json:"end_point"` + Token oauth2.Token `json:"token"` +} + +func (storage *GCDStorage) shouldRetry(err error) (bool, error) { + + retry := false + message := "" + if err == nil { + storage.backoff = 1 + return false, nil + } else if e, ok := err.(*googleapi.Error); ok { + if 500 <= e.Code && e.Code < 600 { + // Retry for 5xx response codes. + message = fmt.Sprintf("HTTP status code %d", e.Code) + retry = true + } else if e.Code == 429 { + // Too many requests{ + message = "HTTP status code 429" + retry = true + } else if e.Code == 403 { + // User Rate Limit Exceeded + message = "User Rate Limit Exceeded" + retry = true + } + } else if e, ok := err.(*url.Error); ok { + message = e.Error() + retry = true + } else if err == io.ErrUnexpectedEOF { + // Retry on unexpected EOFs and temporary network errors. + message = "Unexpected EOF" + retry = true + } else if err, ok := err.(net.Error); ok { + message = "Temporary network error" + retry = err.Temporary() + } + + if !retry || storage.backoff >= 256{ + storage.backoff = 1 + return false, err + } + + delay := float32(storage.backoff) * rand.Float32() + LOG_DEBUG("GCD_RETRY", "%s; retrying after %.2f seconds", message, delay) + time.Sleep(time.Duration(float32(storage.backoff) * float32(time.Second))) + storage.backoff *= 2 + return true, nil +} + +func (storage *GCDStorage) convertFilePath(filePath string) (string) { + if strings.HasPrefix(filePath, "chunks/") && strings.HasSuffix(filePath, ".fsl") { + return "fossils/" + filePath[len("chunks/"):len(filePath) - len(".fsl")] + } + return filePath +} + +func (storage *GCDStorage) getPathID(path string) string { + storage.idCacheLock.Lock() + pathID := storage.idCache[path] + storage.idCacheLock.Unlock() + return pathID +} + +func (storage *GCDStorage) findPathID(path string) (string, bool) { + storage.idCacheLock.Lock() + pathID, ok := storage.idCache[path] + storage.idCacheLock.Unlock() + return pathID, ok +} + +func (storage *GCDStorage) savePathID(path string, pathID string) { + storage.idCacheLock.Lock() + storage.idCache[path] = pathID + storage.idCacheLock.Unlock() +} + +func (storage *GCDStorage) deletePathID(path string) { + storage.idCacheLock.Lock() + delete(storage.idCache, path) + storage.idCacheLock.Unlock() +} + +func (storage *GCDStorage) listFiles(parentID string, listFiles bool) ([]*drive.File, error) { + + if parentID == "" { + return nil, fmt.Errorf("No parent ID provided") + } + + files := []*drive.File {} + + startToken := "" + + query := "'" + parentID + "' in parents and " + if listFiles { + query += "mimeType != 'application/vnd.google-apps.folder'" + } else { + query += "mimeType = 'application/vnd.google-apps.folder'" + } + + maxCount := int64(1000) + if storage.TestMode { + maxCount = 8 + } + + for { + var fileList *drive.FileList + var err error + + for { + fileList, err = storage.service.Files.List().Q(query).Fields("nextPageToken", "files(name, mimeType, id, size)").PageToken(startToken).PageSize(maxCount).Do() + if retry, e := storage.shouldRetry(err); e == nil && !retry { + break + } else if retry { + continue + } else { + return nil, err + } + } + + files = append(files, fileList.Files...) + + startToken = fileList.NextPageToken + if startToken == "" { + break + } + } + + + return files, nil +} + +func (storage *GCDStorage) listByName(parentID string, name string) (string, bool, int64, error) { + + var fileList *drive.FileList + var err error + + for { + query := "name = '" + name + "' and '" + parentID + "' in parents" + fileList, err = storage.service.Files.List().Q(query).Fields("files(name, mimeType, id, size)").Do() + + if retry, e := storage.shouldRetry(err); e == nil && !retry { + break + } else if retry { + continue + } else { + return "", false, 0, err + } + } + + if len(fileList.Files) == 0 { + return "", false, 0, nil + } + + file := fileList.Files[0] + + return file.Id, file.MimeType == "application/vnd.google-apps.folder", file.Size, nil +} + +func (storage *GCDStorage) getIDFromPath(path string) (string, error) { + + fileID := "root" + + if rootID, ok := storage.findPathID(""); ok { + fileID = rootID + } + + names := strings.Split(path, "/") + current := "" + for i, name := range names { + + if len(current) == 0 { + current = name + } else { + current = current + "/" + name + } + + currentID, ok := storage.findPathID(current) + if ok { + fileID = currentID + continue + } + + var err error + var isDir bool + fileID, isDir, _, err = storage.listByName(fileID, name) + if err != nil { + return "", err + } + if fileID == "" { + return "", fmt.Errorf("Path %s doesn't exist", path) + } + if i != len(names) - 1 && !isDir { + return "", fmt.Errorf("Invalid path %s", path) + } + } + return fileID, nil +} + +// CreateGCDStorage creates a GCD storage object. +func CreateGCDStorage(tokenFile string, storagePath string, threads int) (storage *GCDStorage, err error) { + + description, err := ioutil.ReadFile(tokenFile) + if err != nil { + return nil, err + } + + gcdConfig := &GCDConfig {} + if err := json.Unmarshal(description, gcdConfig); err != nil { + return nil, err + } + + config := oauth2.Config{ + ClientID: gcdConfig.ClientID, + ClientSecret: gcdConfig.ClientSecret, + Endpoint: gcdConfig.Endpoint, + } + + authClient := config.Client(context.Background(), &gcdConfig.Token) + + service, err := drive.New(authClient) + if err != nil { + return nil, err + } + + storage = &GCDStorage { + service: service, + numberOfThreads: threads, + idCache: make(map[string]string), + idCacheLock: &sync.Mutex{}, + } + + storagePathID, err := storage.getIDFromPath(storagePath) + if err != nil { + return nil, err + } + + storage.idCache[""] = storagePathID + + for _, dir := range []string { "chunks", "snapshots", "fossils" } { + dirID, isDir, _, err := storage.listByName(storagePathID, dir) + if err != nil { + return nil, err + } + if dirID == "" { + err = storage.CreateDirectory(0, dir) + if err != nil { + return nil, err + } + } else if !isDir { + return nil, fmt.Errorf("%s/%s is not a directory", storagePath + "/" + dir) + } else { + storage.idCache[dir] = dirID + } + } + + return storage, nil + +} + +// ListFiles return the list of files and subdirectories under 'dir' (non-recursively) +func (storage *GCDStorage) ListFiles(threadIndex int, dir string) ([]string, []int64, error) { + for len(dir) > 0 && dir[len(dir) - 1] == '/' { + dir = dir[:len(dir) - 1] + } + + if dir == "snapshots" { + + files, err := storage.listFiles(storage.getPathID(dir), false) + if err != nil { + return nil, nil, err + } + + subDirs := []string{} + + for _, file := range files { + storage.savePathID("snapshots/" + file.Name, file.Id) + subDirs = append(subDirs, file.Name + "/") + } + return subDirs, nil, nil + } else if strings.HasPrefix(dir, "snapshots/") { + pathID, err := storage.getIDFromPath(dir) + if err != nil { + return nil, nil, err + } + + entries, err := storage.listFiles(pathID, true) + if err != nil { + return nil, nil, err + } + + files := []string{} + + for _, entry := range entries { + storage.savePathID(dir + "/" + entry.Name, entry.Id) + files = append(files, entry.Name) + } + return files, nil, nil + } else { + files := []string{} + sizes := []int64{} + + for _, parent := range []string { "chunks", "fossils" } { + entries, err := storage.listFiles(storage.getPathID(parent), true) + if err != nil { + return nil, nil, err + } + + for _, entry := range entries { + name := entry.Name + if parent == "fossils" { + name += ".fsl" + } + storage.savePathID(parent + "/" + entry.Name, entry.Id) + files = append(files, name) + sizes = append(sizes, entry.Size) + } + } + return files, sizes, nil + } + +} + +// DeleteFile deletes the file or directory at 'filePath'. +func (storage *GCDStorage) DeleteFile(threadIndex int, filePath string) (err error) { + filePath = storage.convertFilePath(filePath) + fileID, ok := storage.findPathID(filePath) + if !ok { + fileID, err = storage.getIDFromPath(filePath) + if err != nil { + LOG_TRACE("GCD_STORAGE", "Ignored file deletion error: %v", err) + return nil + } + } + + for { + err = storage.service.Files.Delete(fileID).Fields("id").Do() + if retry, err := storage.shouldRetry(err); err == nil && !retry { + storage.deletePathID(filePath) + return nil + } else if retry { + continue + } else { + if e, ok := err.(*googleapi.Error); ok && e.Code == 404 { + LOG_TRACE("GCD_STORAGE", "File %s has disappeared before deletion", filePath) + return nil + } + return err + } + } +} + +// MoveFile renames the file. +func (storage *GCDStorage) MoveFile(threadIndex int, from string, to string) (err error) { + + from = storage.convertFilePath(from) + to = storage.convertFilePath(to) + + fileID, ok := storage.findPathID(from) + if !ok { + return fmt.Errorf("Attempting to rename file %s with unknown id", to) + } + + fromParentID := storage.getPathID("chunks") + toParentID := storage.getPathID("fossils") + + if strings.HasPrefix(from, "fossils") { + fromParentID, toParentID = toParentID, fromParentID + } + + for { + _, err = storage.service.Files.Update(fileID, nil).AddParents(toParentID).RemoveParents(fromParentID).Do() + if retry, err := storage.shouldRetry(err); err == nil && !retry { + break + } else if retry { + continue + } else { + return err + } + } + + storage.savePathID(to, storage.getPathID(from)) + storage.deletePathID(from) + return nil +} + +// CreateDirectory creates a new directory. +func (storage *GCDStorage) CreateDirectory(threadIndex int, dir string) (err error) { + + for len(dir) > 0 && dir[len(dir) - 1] == '/' { + dir = dir[:len(dir) - 1] + } + + exist, isDir, _, err := storage.GetFileInfo(threadIndex, dir) + if err != nil { + return err + } + + if exist { + if !isDir { + return fmt.Errorf("%s is a file", dir) + } + return nil + } + + parentID := storage.getPathID("") + name := dir + + if strings.HasPrefix(dir, "snapshots/") { + parentID = storage.getPathID("snapshots") + name = dir[len("snapshots/"):] + } + + file := &drive.File { + Name: name, + MimeType: "application/vnd.google-apps.folder", + Parents: []string { parentID }, + } + + for { + file, err = storage.service.Files.Create(file).Fields("id").Do() + if retry, err := storage.shouldRetry(err); err == nil && !retry { + break + } else if retry { + continue + } else { + return err + } + } + + storage.savePathID(dir, file.Id) + return nil +} + +// GetFileInfo returns the information about the file or directory at 'filePath'. +func (storage *GCDStorage) GetFileInfo(threadIndex int, filePath string) (exist bool, isDir bool, size int64, err error) { + for len(filePath) > 0 && filePath[len(filePath) - 1] == '/' { + filePath = filePath[:len(filePath) - 1] + } + + // GetFileInfo is never called on a fossil + fileID, ok := storage.findPathID(filePath) + if !ok { + dir := path.Dir(filePath) + if dir == "." { + dir = "" + } + dirID, err := storage.getIDFromPath(dir) + if err != nil { + return false, false, 0, err + } + + fileID, isDir, size, err = storage.listByName(dirID, path.Base(filePath)) + if fileID != "" { + storage.savePathID(filePath, fileID) + } + return fileID != "", isDir, size, err + } + + for { + file, err := storage.service.Files.Get(fileID).Fields("id, mimeType").Do() + if retry, err := storage.shouldRetry(err); err == nil && !retry { + return true, file.MimeType == "application/vnd.google-apps.folder", file.Size, nil + } else if retry { + continue + } else { + return false, false, 0, err + } + } +} + +// FindChunk finds the chunk with the specified id. If 'isFossil' is true, it will search for chunk files with +// the suffix '.fsl'. +func (storage *GCDStorage) FindChunk(threadIndex int, chunkID string, isFossil bool) (filePath string, exist bool, size int64, err error) { + parentID := "" + filePath = "chunks/" + chunkID + realPath := storage.convertFilePath(filePath) + if isFossil { + parentID = storage.getPathID("fossils") + filePath += ".fsl" + } else { + parentID = storage.getPathID("chunks") + } + + fileID := "" + fileID, _, size, err = storage.listByName(parentID, chunkID) + if fileID != "" { + storage.savePathID(realPath, fileID) + } + return filePath, fileID != "", size, err +} + +// DownloadFile reads the file at 'filePath' into the chunk. +func (storage *GCDStorage) DownloadFile(threadIndex int, filePath string, chunk *Chunk) (err error) { + // We never download the fossil so there is no need to convert the path + fileID, ok := storage.findPathID(filePath) + if !ok { + fileID, err = storage.getIDFromPath(filePath) + if err != nil { + return err + } + storage.savePathID(filePath, fileID) + } + + var response *http.Response + + for { + response, err = storage.service.Files.Get(fileID).Download() + if retry, err := storage.shouldRetry(err); err == nil && !retry { + break + } else if retry { + continue + } else { + return err + } + } + + defer response.Body.Close() + + _, err = RateLimitedCopy(chunk, response.Body, storage.DownloadRateLimit / storage.numberOfThreads) + return err +} + +// UploadFile writes 'content' to the file at 'filePath'. +func (storage *GCDStorage) UploadFile(threadIndex int, filePath string, content []byte) (err error) { + + // We never upload a fossil so there is no need to convert the path + parent := path.Dir(filePath) + + if parent == "." { + parent = "" + } + + parentID, ok := storage.findPathID(parent) + if !ok { + parentID, err = storage.getIDFromPath(parent) + if err != nil { + return err + } + storage.savePathID(parent, parentID) + } + + file := &drive.File { + Name: path.Base(filePath), + MimeType: "application/octet-stream", + Parents: []string { parentID }, + } + + for { + reader := CreateRateLimitedReader(content, storage.UploadRateLimit / storage.numberOfThreads) + _, err = storage.service.Files.Create(file).Media(reader).Fields("id").Do() + if retry, err := storage.shouldRetry(err); err == nil && !retry { + break + } else if retry { + continue + } else { + return err + } + } + + return err +} + +// If a local snapshot cache is needed for the storage to avoid downloading/uploading chunks too often when +// managing snapshots. +func (storage *GCDStorage) IsCacheNeeded() (bool) { return true } + +// If the 'MoveFile' method is implemented. +func (storage *GCDStorage) IsMoveFileImplemented() (bool) { return true } + +// If the storage can guarantee strong consistency. +func (storage *GCDStorage) IsStrongConsistent() (bool) { return false } + +// If the storage supports fast listing of files names. +func (storage *GCDStorage) IsFastListing() (bool) { return true } + +// Enable the test mode. +func (storage *GCDStorage) EnableTestMode() { storage.TestMode = true } diff --git a/duplicacy_gcsstorage.go b/duplicacy_gcsstorage.go new file mode 100644 index 0000000..5767844 --- /dev/null +++ b/duplicacy_gcsstorage.go @@ -0,0 +1,303 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "io" + "fmt" + "net" + "time" + "net/url" + "math/rand" + "io/ioutil" + "encoding/json" + + "golang.org/x/net/context" + "golang.org/x/oauth2" + "golang.org/x/oauth2/google" + gcs "cloud.google.com/go/storage" + "google.golang.org/api/iterator" + "google.golang.org/api/option" + "google.golang.org/api/googleapi" +) + +type GCSStorage struct { + RateLimitedStorage + + bucket *gcs.BucketHandle + storageDir string + + numberOfThreads int + TestMode bool + +} + +type GCSConfig struct { + ClientID string `json:"client_id"` + ClientSecret string `json:"client_secret"` + Endpoint oauth2.Endpoint `json:"end_point"` + Token oauth2.Token `json:"token"` +} + +// CreateGCSStorage creates a GCD storage object. +func CreateGCSStorage(tokenFile string, bucketName string, storageDir string, threads int) (storage *GCSStorage, err error) { + + ctx := context.Background() + + description, err := ioutil.ReadFile(tokenFile) + if err != nil { + return nil, err + } + + var object map[string]interface {} + + err = json.Unmarshal(description, &object) + if err != nil { + return nil, err + } + + isServiceAccount := false + if value, ok := object["type"]; ok { + if authType, ok := value.(string); ok && authType == "service_account" { + isServiceAccount = true + } + } + + var tokenSource oauth2.TokenSource + + if isServiceAccount { + config, err := google.JWTConfigFromJSON(description, gcs.ScopeReadWrite) + if err != nil { + return nil, err + } + tokenSource = config.TokenSource(ctx) + } else { + gcsConfig := &GCSConfig {} + if err := json.Unmarshal(description, gcsConfig); err != nil { + return nil, err + } + + config := oauth2.Config{ + ClientID: gcsConfig.ClientID, + ClientSecret: gcsConfig.ClientSecret, + Endpoint: gcsConfig.Endpoint, + } + tokenSource = config.TokenSource(ctx, &gcsConfig.Token) + } + + options := option.WithTokenSource(tokenSource) + client, err := gcs.NewClient(ctx, options) + + bucket := client.Bucket(bucketName) + + if len(storageDir) > 0 && storageDir[len(storageDir) - 1] != '/' { + storageDir += "/" + } + + storage = &GCSStorage { + bucket: bucket, + storageDir: storageDir, + numberOfThreads: threads, + } + + return storage, nil + +} + +func (storage *GCSStorage) shouldRetry(backoff *int, err error) (bool, error) { + + retry := false + message := "" + if err == nil { + return false, nil + } else if e, ok := err.(*googleapi.Error); ok { + if 500 <= e.Code && e.Code < 600 { + // Retry for 5xx response codes. + message = fmt.Sprintf("HTTP status code %d", e.Code) + retry = true + } else if e.Code == 429 { + // Too many requests{ + message = "HTTP status code 429" + retry = true + } else if e.Code == 403 { + // User Rate Limit Exceeded + message = "User Rate Limit Exceeded" + retry = true + } + } else if e, ok := err.(*url.Error); ok { + message = e.Error() + retry = true + } else if err == io.ErrUnexpectedEOF { + // Retry on unexpected EOFs and temporary network errors. + message = "Unexpected EOF" + retry = true + } else if err, ok := err.(net.Error); ok { + message = "Temporary network error" + retry = err.Temporary() + } + + if !retry || *backoff >= 256 { + return false, err + } + + delay := float32(*backoff) * rand.Float32() + LOG_INFO("GCS_RETRY", "%s; retrying after %.2f seconds", message, delay) + time.Sleep(time.Duration(float32(*backoff) * float32(time.Second))) + *backoff *= 2 + return true, nil +} + + +// ListFiles return the list of files and subdirectories under 'dir' (non-recursively) +func (storage *GCSStorage) ListFiles(threadIndex int, dir string) ([]string, []int64, error) { + for len(dir) > 0 && dir[len(dir) - 1] == '/' { + dir = dir[:len(dir) - 1] + } + + query := gcs.Query { + Prefix: storage.storageDir + dir + "/", + } + dirOnly := false + prefixLength := len(query.Prefix) + + if dir == "snapshots" { + query.Delimiter = "/" + dirOnly = true + } + + files := []string{} + sizes := []int64{} + iter := storage.bucket.Objects(context.Background(), &query) + for { + attributes, err := iter.Next() + if err == iterator.Done { + break + } + if err != nil { + return nil, nil, err + } + + if dirOnly { + if len(attributes.Prefix) != 0 { + prefix := attributes.Prefix + files = append(files, prefix[prefixLength:]) + } + } else { + if len(attributes.Prefix) == 0 { + files = append(files, attributes.Name[prefixLength:]) + sizes = append(sizes, attributes.Size) + } + } + } + + return files, sizes, nil +} + +// DeleteFile deletes the file or directory at 'filePath'. +func (storage *GCSStorage) DeleteFile(threadIndex int, filePath string) (err error) { + err = storage.bucket.Object(storage.storageDir + filePath).Delete(context.Background()) + if err == gcs.ErrObjectNotExist { + return nil + } + return err +} + +// MoveFile renames the file. +func (storage *GCSStorage) MoveFile(threadIndex int, from string, to string) (err error) { + + source := storage.bucket.Object(storage.storageDir + from) + destination := storage.bucket.Object(storage.storageDir + to) + + _, err = destination.CopierFrom(source).Run(context.Background()) + if err != nil { + return err + } + + return storage.DeleteFile(threadIndex, from) +} + +// CreateDirectory creates a new directory. +func (storage *GCSStorage) CreateDirectory(threadIndex int, dir string) (err error) { + return nil +} + +// GetFileInfo returns the information about the file or directory at 'filePath'. +func (storage *GCSStorage) GetFileInfo(threadIndex int, filePath string) (exist bool, isDir bool, size int64, err error) { + object := storage.bucket.Object(storage.storageDir + filePath) + + attributes, err := object.Attrs(context.Background()) + + if err != nil { + if err == gcs.ErrObjectNotExist { + return false, false, 0, nil + } else { + return false, false, 0, err + } + } + + return true, false, attributes.Size, nil +} + +// FindChunk finds the chunk with the specified id. If 'isFossil' is true, it will search for chunk files with +// the suffix '.fsl'. +func (storage *GCSStorage) FindChunk(threadIndex int, chunkID string, isFossil bool) (filePath string, exist bool, size int64, err error) { + filePath = "chunks/" + chunkID + if isFossil { + filePath += ".fsl" + } + + exist, _, size, err = storage.GetFileInfo(threadIndex, filePath) + + return filePath, exist, size, err +} + +// DownloadFile reads the file at 'filePath' into the chunk. +func (storage *GCSStorage) DownloadFile(threadIndex int, filePath string, chunk *Chunk) (err error) { + readCloser, err := storage.bucket.Object(storage.storageDir + filePath).NewReader(context.Background()) + if err != nil { + return err + } + defer readCloser.Close() + _, err = RateLimitedCopy(chunk, readCloser, storage.DownloadRateLimit / storage.numberOfThreads) + return err +} + +// UploadFile writes 'content' to the file at 'filePath'. +func (storage *GCSStorage) UploadFile(threadIndex int, filePath string, content []byte) (err error) { + + backoff := 1 + for { + writeCloser := storage.bucket.Object(storage.storageDir + filePath).NewWriter(context.Background()) + defer writeCloser.Close() + reader := CreateRateLimitedReader(content, storage.UploadRateLimit / storage.numberOfThreads) + _, err = io.Copy(writeCloser, reader) + + if retry, e := storage.shouldRetry(&backoff, err); e == nil && !retry { + break + } else if retry { + continue + } else { + return err + } + } + + return err +} + +// If a local snapshot cache is needed for the storage to avoid downloading/uploading chunks too often when +// managing snapshots. +func (storage *GCSStorage) IsCacheNeeded() (bool) { return true } + +// If the 'MoveFile' method is implemented. +func (storage *GCSStorage) IsMoveFileImplemented() (bool) { return true } + +// If the storage can guarantee strong consistency. +func (storage *GCSStorage) IsStrongConsistent() (bool) { return true } + +// If the storage supports fast listing of files names. +func (storage *GCSStorage) IsFastListing() (bool) { return true } + +// Enable the test mode. +func (storage *GCSStorage) EnableTestMode() { storage.TestMode = true } diff --git a/duplicacy_hubicclient.go b/duplicacy_hubicclient.go new file mode 100644 index 0000000..6ac7ecc --- /dev/null +++ b/duplicacy_hubicclient.go @@ -0,0 +1,449 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "fmt" + "time" + "sync" + "bytes" + "strings" + "io/ioutil" + "encoding/json" + "io" + "net/http" + net_url "net/url" + "math/rand" + + "golang.org/x/oauth2" +) + +type HubicError struct { + Status int + Message string +} + +func (err HubicError) Error() string { + return fmt.Sprintf("%d %s", err.Status, err.Message) +} + +var HubicRefreshTokenURL = "https://duplicacy.com/hubic_refresh" +var HubicCredentialURL = "https://api.hubic.com/1.0/account/credentials" + +type HubicCredential struct { + Token string + Endpoint string + Expires time.Time +} + +type HubicClient struct { + HTTPClient *http.Client + + TokenFile string + Token *oauth2.Token + TokenLock *sync.Mutex + + Credential HubicCredential + CredentialLock *sync.Mutex + + TestMode bool +} + +func NewHubicClient(tokenFile string) (*HubicClient, error) { + + description, err := ioutil.ReadFile(tokenFile) + if err != nil { + return nil, err + } + + token := new(oauth2.Token) + if err := json.Unmarshal(description, token); err != nil { + return nil, fmt.Errorf("%v: %s", err, description) + } + + client := &HubicClient{ + HTTPClient: http.DefaultClient, + TokenFile: tokenFile, + Token: token, + TokenLock: &sync.Mutex{}, + CredentialLock: &sync.Mutex{}, + } + + err = client.RefreshToken() + if err != nil { + return nil, err + } + + err = client.GetCredential() + if err != nil { + return nil, err + } + + return client, nil +} + +func (client *HubicClient) call(url string, method string, input interface{}, extraHeader map[string]string) (io.ReadCloser, int64, string, error) { + + var response *http.Response + + backoff := 1 + for i := 0; i < 8; i++ { + + LOG_DEBUG("HUBIC_CALL", "%s %s", method, url) + + //fmt.Printf("%s %s\n", method, url) + + var inputReader io.Reader + + switch input.(type) { + default: + jsonInput, err := json.Marshal(input) + if err != nil { + return nil, 0, "", err + } + inputReader = bytes.NewReader(jsonInput) + case []byte: + inputReader = bytes.NewReader(input.([]byte)) + case int: + inputReader = bytes.NewReader([]byte("")) + case *bytes.Buffer: + inputReader = bytes.NewReader(input.(*bytes.Buffer).Bytes()) + case *RateLimitedReader: + input.(*RateLimitedReader).Reset() + inputReader = input.(*RateLimitedReader) + } + + request, err := http.NewRequest(method, url, inputReader) + if err != nil { + return nil, 0, "", err + } + + if reader, ok := inputReader.(*RateLimitedReader); ok { + request.ContentLength = reader.Length() + } + + if url == HubicCredentialURL { + client.TokenLock.Lock() + request.Header.Set("Authorization", "Bearer " + client.Token.AccessToken) + client.TokenLock.Unlock() + } else if url != HubicRefreshTokenURL { + client.CredentialLock.Lock() + request.Header.Set("X-Auth-Token", client.Credential.Token) + client.CredentialLock.Unlock() + } + + for key, value := range extraHeader { + request.Header.Set(key, value) + } + + response, err = client.HTTPClient.Do(request) + if err != nil { + return nil, 0, "", err + } + + contentType := "" + if len(response.Header["Content-Type"]) > 0 { + contentType = response.Header["Content-Type"][0] + } + + if response.StatusCode < 400 { + return response.Body, response.ContentLength, contentType, nil + } + + /*buffer := bytes.NewBufferString("") + io.Copy(buffer, response.Body) + fmt.Printf("%s\n", buffer.String())*/ + + response.Body.Close() + + if response.StatusCode == 401 { + + if url == HubicRefreshTokenURL { + return nil, 0, "", HubicError { Status: response.StatusCode, Message: "Authorization error when refreshing token"} + } + + if url == HubicCredentialURL { + return nil, 0, "", HubicError { Status: response.StatusCode, Message: "Authorization error when retrieving credentials"} + } + + err = client.RefreshToken() + if err != nil { + return nil, 0, "", err + } + + err = client.GetCredential() + if err != nil { + return nil, 0, "", err + } + continue + } else if response.StatusCode >= 500 && response.StatusCode < 600 { + retryAfter := time.Duration(rand.Float32() * 1000.0 * float32(backoff)) + LOG_INFO("HUBIC_RETRY", "Response status: %d; retry after %d milliseconds", response.StatusCode, retryAfter) + time.Sleep(retryAfter * time.Millisecond) + backoff *= 2 + continue + } else { + return nil, 0, "", HubicError { Status: response.StatusCode, Message: "Hubic API error"} + } + } + + return nil, 0, "", fmt.Errorf("Maximum number of retries reached") +} + +func (client *HubicClient) RefreshToken() (err error) { + client.TokenLock.Lock() + defer client.TokenLock.Unlock() + + if client.Token.Valid() { + return nil + } + + readCloser, _, _, err := client.call(HubicRefreshTokenURL, "POST", client.Token, nil) + if err != nil { + return err + } + + defer readCloser.Close() + + if err = json.NewDecoder(readCloser).Decode(&client.Token); err != nil { + return err + } + + description, err := json.Marshal(client.Token) + if err != nil { + return err + } + + err = ioutil.WriteFile(client.TokenFile, description, 0644) + if err != nil { + return err + } + + return nil +} + +func (client *HubicClient) GetCredential() (err error) { + client.CredentialLock.Lock() + defer client.CredentialLock.Unlock() + + readCloser, _, _, err := client.call(HubicCredentialURL, "GET", 0, nil) + if err != nil { + return err + } + + buffer := bytes.NewBufferString("") + io.Copy(buffer, readCloser) + readCloser.Close() + + if err = json.NewDecoder(buffer).Decode(&client.Credential); err != nil { + return fmt.Errorf("%v (response: %s)", err, buffer) + } + + return nil +} + +type HubicEntry struct { + Name string `json:"name"` + Size int64 `json:"bytes"` + Type string `json:"content_type"` + Subdir string `json:"subdir"` +} + +func (client *HubicClient) ListEntries(path string) ([]HubicEntry, error) { + + if len(path) > 0 && path[len(path) - 1] != '/' { + path += "/" + } + + count := 1000 + if client.TestMode { + count = 8 + } + + marker := "" + + var entries []HubicEntry + + for { + + client.CredentialLock.Lock() + url := client.Credential.Endpoint + "/default" + client.CredentialLock.Unlock() + url += fmt.Sprintf("?format=json&limit=%d&delimiter=%%2f", count) + if path != "" { + url += "&prefix=" + net_url.QueryEscape(path) + } + if marker != "" { + url += "&marker=" + net_url.QueryEscape(marker) + } + + readCloser, _, _, err := client.call(url, "GET", 0, nil) + if err != nil { + return nil, err + } + + defer readCloser.Close() + + var output []HubicEntry + + if err = json.NewDecoder(readCloser).Decode(&output); err != nil { + return nil, err + } + + for _, entry := range output { + if entry.Subdir == "" { + marker = entry.Name + } else { + marker = entry.Subdir + for len(entry.Subdir) > 0 && entry.Subdir[len(entry.Subdir) - 1] == '/' { + entry.Subdir = entry.Subdir[:len(entry.Subdir) - 1] + } + entry.Name = entry.Subdir + entry.Type = "application/directory" + } + if path != "" && strings.HasPrefix(entry.Name, path) { + entry.Name = entry.Name[len(path):] + } + entries = append(entries, entry) + } + if len(output) < count { + break + } + } + + return entries, nil +} + +func (client *HubicClient) GetFileInfo(path string) (bool, bool, int64, error) { + + for len(path) > 0 && path[len(path) - 1] == '/' { + path = path[:len(path) - 1] + } + + client.CredentialLock.Lock() + url := client.Credential.Endpoint + "/default/" + path + client.CredentialLock.Unlock() + + readCloser, size, contentType, err := client.call(url, "HEAD", 0, nil) + if err != nil { + if e, ok := err.(HubicError); ok && e.Status == 404 { + return false, false, 0, nil + } else { + return false, false, 0, err + } + } + + readCloser.Close() + + return true, contentType == "application/directory", size, nil +} + +func (client *HubicClient) DownloadFile(path string) (io.ReadCloser, int64, error) { + + for len(path) > 0 && path[len(path) - 1] == '/' { + path = path[:len(path) - 1] + } + + client.CredentialLock.Lock() + url := client.Credential.Endpoint + "/default/" + path + client.CredentialLock.Unlock() + + readCloser, size, _, err := client.call(url, "GET", 0, nil) + return readCloser, size, err +} + +func (client *HubicClient) UploadFile(path string, content []byte, rateLimit int) (err error) { + + for len(path) > 0 && path[len(path) - 1] == '/' { + path = path[:len(path) - 1] + } + + client.CredentialLock.Lock() + url := client.Credential.Endpoint + "/default/" + path + client.CredentialLock.Unlock() + + header := make(map[string]string) + header["Content-Type"] = "application/octet-stream" + + readCloser, _, _, err := client.call(url, "PUT", CreateRateLimitedReader(content, rateLimit), header) + + if err != nil { + return err + } + + readCloser.Close() + return nil +} + +func (client *HubicClient) DeleteFile(path string) error { + + for len(path) > 0 && path[len(path) - 1] == '/' { + path = path[:len(path) - 1] + } + + client.CredentialLock.Lock() + url := client.Credential.Endpoint + "/default/" + path + client.CredentialLock.Unlock() + + readCloser, _, _, err := client.call(url, "DELETE", 0, nil) + + if err != nil { + return err + } + + readCloser.Close() + return nil +} + +func (client *HubicClient) MoveFile(from string, to string) error { + + for len(from) > 0 && from[len(from) - 1] == '/' { + from = from[:len(from) - 1] + } + + for len(to) > 0 && to[len(to) - 1] == '/' { + to = to[:len(to) - 1] + } + + client.CredentialLock.Lock() + url := client.Credential.Endpoint + "/default/" + from + client.CredentialLock.Unlock() + + header := make(map[string]string) + header["Destination"] = "default/" + to + + readCloser, _, _, err := client.call(url, "COPY", 0, header) + + if err != nil { + return err + } + + readCloser.Close() + + return client.DeleteFile(from) +} + +func (client *HubicClient) CreateDirectory(path string) (error) { + + for len(path) > 0 && path[len(path) - 1] == '/' { + path = path[:len(path) - 1] + } + + client.CredentialLock.Lock() + url := client.Credential.Endpoint + "/default/" + path + client.CredentialLock.Unlock() + + header := make(map[string]string) + header["Content-Type"] = "application/directory" + + readCloser, _, _, err := client.call(url, "PUT", "", header) + + if err != nil { + return err + } + + readCloser.Close() + return nil +} diff --git a/duplicacy_hubicclient_test.go b/duplicacy_hubicclient_test.go new file mode 100644 index 0000000..4f31954 --- /dev/null +++ b/duplicacy_hubicclient_test.go @@ -0,0 +1,149 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "io" + "fmt" + "testing" + "crypto/sha256" + "encoding/hex" + + crypto_rand "crypto/rand" + "math/rand" +) + +func TestHubicClient(t *testing.T) { + + hubicClient, err := NewHubicClient("hubic-token.json") + if err != nil { + t.Errorf("Failed to create the Hubic client: %v", err) + return + } + + hubicClient.TestMode = true + + existingFiles, err := hubicClient.ListEntries("") + for _, file := range existingFiles { + fmt.Printf("name: %s, isDir: %t\n", file.Name, file.Type == "application/directory") + } + + testExists, _, _, err := hubicClient.GetFileInfo("test") + if err != nil { + t.Errorf("Failed to list the test directory: %v", err) + return + } + if !testExists { + err = hubicClient.CreateDirectory("test") + if err != nil { + t.Errorf("Failed to create the test directory: %v", err) + return + } + } + + test1Exists, _, _, err := hubicClient.GetFileInfo("test/test1") + if err != nil { + t.Errorf("Failed to list the test1 directory: %v", err) + return + } + if !test1Exists { + err = hubicClient.CreateDirectory("test/test1") + if err != nil { + t.Errorf("Failed to create the test1 directory: %v", err) + return + } + } + + test2Exists, _, _, err := hubicClient.GetFileInfo("test/test2") + if err != nil { + t.Errorf("Failed to list the test2 directory: %v", err) + return + } + if !test2Exists { + err = hubicClient.CreateDirectory("test/test2") + if err != nil { + t.Errorf("Failed to create the test2 directory: %v", err) + return + } + } + + numberOfFiles := 20 + maxFileSize := 64 * 1024 + + for i := 0; i < numberOfFiles; i++ { + content := make([]byte, rand.Int() % maxFileSize + 1) + _, err = crypto_rand.Read(content) + if err != nil { + t.Errorf("Error generating random content: %v", err) + return + } + + hasher := sha256.New() + hasher.Write(content) + filename := hex.EncodeToString(hasher.Sum(nil)) + + fmt.Printf("file: %s\n", filename) + + err = hubicClient.UploadFile("test/test1/" + filename, content, 100) + if err != nil { + /*if e, ok := err.(ACDError); !ok || e.Status != 409 */ { + t.Errorf("Failed to upload the file %s: %v", filename, err) + return + } + } + } + + entries, err := hubicClient.ListEntries("test/test1") + if err != nil { + t.Errorf("Error list randomly generated files: %v", err) + return + } + + for _, entry := range entries { + + exists, isDir, size, err := hubicClient.GetFileInfo("test/test1/" + entry.Name) + fmt.Printf("%s exists: %t, isDir: %t, size: %d, err: %v\n", "test/test1/" + entry.Name, exists, isDir, size, err) + + err = hubicClient.MoveFile("test/test1/" + entry.Name, "test/test2/" + entry.Name) + if err != nil { + t.Errorf("Failed to move %s: %v", entry.Name, err) + return + } + } + + entries, err = hubicClient.ListEntries("test/test2") + if err != nil { + t.Errorf("Error list randomly generated files: %v", err) + return + } + + for _, entry := range entries { + readCloser, _, err := hubicClient.DownloadFile("test/test2/" + entry.Name) + if err != nil { + t.Errorf("Error downloading file %s: %v", entry.Name, err) + return + } + + hasher := sha256.New() + io.Copy(hasher, readCloser) + hash := hex.EncodeToString(hasher.Sum(nil)) + + if hash != entry.Name { + t.Errorf("File %s, hash %s", entry.Name, hash) + } + + readCloser.Close() + } + + for _, entry := range entries { + + err = hubicClient.DeleteFile("test/test2/" + entry.Name) + if err != nil { + t.Errorf("Failed to delete the file %s: %v", entry.Name, err) + return + } + } + +} diff --git a/duplicacy_hubicstorage.go b/duplicacy_hubicstorage.go new file mode 100644 index 0000000..981ebe5 --- /dev/null +++ b/duplicacy_hubicstorage.go @@ -0,0 +1,207 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "fmt" + "strings" +) + +type HubicStorage struct { + RateLimitedStorage + + client *HubicClient + storageDir string + numberOfThreads int +} + +// CreateHubicStorage creates an Hubic storage object. +func CreateHubicStorage(tokenFile string, storagePath string, threads int) (storage *HubicStorage, err error) { + + for len(storagePath) > 0 && storagePath[len(storagePath) - 1] == '/' { + storagePath = storagePath[:len(storagePath) - 1] + } + + client, err := NewHubicClient(tokenFile) + if err != nil { + return nil, err + } + + exists, isDir, _, err := client.GetFileInfo(storagePath) + if err != nil { + return nil, err + } + + if !exists { + return nil, fmt.Errorf("Path '%s' doesn't exist", storagePath) + } + + if !isDir { + return nil, fmt.Errorf("Path '%s' is not a directory", storagePath) + } + + storage = &HubicStorage { + client: client, + storageDir: storagePath, + numberOfThreads: threads, + } + + for _, path := range []string { "chunks", "snapshots" } { + dir := storagePath + "/" + path + exists, isDir, _, err := client.GetFileInfo(dir) + if err != nil { + return nil, err + } + if !exists { + err = client.CreateDirectory(storagePath + "/" + path) + if err != nil { + return nil, err + } + } else if !isDir { + return nil, fmt.Errorf("%s is not a directory", dir) + } + } + + return storage, nil + +} + +// ListFiles return the list of files and subdirectories under 'dir' (non-recursively) +func (storage *HubicStorage) ListFiles(threadIndex int, dir string) ([]string, []int64, error) { + for len(dir) > 0 && dir[len(dir) - 1] == '/' { + dir = dir[:len(dir) - 1] + } + + if dir == "snapshots" { + entries, err := storage.client.ListEntries(storage.storageDir + "/" + dir) + if err != nil { + return nil, nil, err + } + + subDirs := []string{} + for _, entry := range entries { + if entry.Type == "application/directory" { + subDirs = append(subDirs, entry.Name + "/") + } + } + return subDirs, nil, nil + } else if strings.HasPrefix(dir, "snapshots/") { + entries, err := storage.client.ListEntries(storage.storageDir + "/" + dir) + if err != nil { + return nil, nil, err + } + + files := []string{} + + for _, entry := range entries { + if entry.Type == "application/directory" { + continue + } + files = append(files, entry.Name) + } + return files, nil, nil + } else { + files := []string{} + sizes := []int64{} + entries, err := storage.client.ListEntries(storage.storageDir + "/chunks") + if err != nil { + return nil, nil, err + } + + for _, entry := range entries { + if entry.Type == "application/directory" { + continue + } + files = append(files, entry.Name) + sizes = append(sizes, entry.Size) + } + return files, sizes, nil + } + +} + +// DeleteFile deletes the file or directory at 'filePath'. +func (storage *HubicStorage) DeleteFile(threadIndex int, filePath string) (err error) { + err = storage.client.DeleteFile(storage.storageDir + "/" + filePath) + if e, ok := err.(HubicError); ok && e.Status == 404 { + LOG_DEBUG("HUBIC_DELETE", "Ignore 404 error") + return nil + } + return err +} + +// MoveFile renames the file. +func (storage *HubicStorage) MoveFile(threadIndex int, from string, to string) (err error) { + fromPath := storage.storageDir + "/" + from + toPath := storage.storageDir + "/" + to + + return storage.client.MoveFile(fromPath, toPath) +} + +// CreateDirectory creates a new directory. +func (storage *HubicStorage) CreateDirectory(threadIndex int, dir string) (err error) { + for len(dir) > 0 && dir[len(dir) - 1] == '/' { + dir = dir[:len(dir) - 1] + } + + return storage.client.CreateDirectory(storage.storageDir + "/" + dir) +} + +// GetFileInfo returns the information about the file or directory at 'filePath'. +func (storage *HubicStorage) GetFileInfo(threadIndex int, filePath string) (exist bool, isDir bool, size int64, err error) { + + for len(filePath) > 0 && filePath[len(filePath) - 1] == '/' { + filePath = filePath[:len(filePath) - 1] + } + return storage.client.GetFileInfo(storage.storageDir + "/" + filePath) +} + +// FindChunk finds the chunk with the specified id. If 'isFossil' is true, it will search for chunk files with +// the suffix '.fsl'. +func (storage *HubicStorage) FindChunk(threadIndex int, chunkID string, isFossil bool) (filePath string, exist bool, size int64, err error) { + filePath = "chunks/" + chunkID + if isFossil { + filePath += ".fsl" + } + + exist, _, size, err = storage.client.GetFileInfo(storage.storageDir + "/" + filePath) + return filePath, exist, size, err +} + +// DownloadFile reads the file at 'filePath' into the chunk. +func (storage *HubicStorage) DownloadFile(threadIndex int, filePath string, chunk *Chunk) (err error) { + readCloser, _, err := storage.client.DownloadFile(storage.storageDir + "/" + filePath) + if err != nil { + return err + } + + defer readCloser.Close() + + _, err = RateLimitedCopy(chunk, readCloser, storage.DownloadRateLimit / storage.numberOfThreads) + return err +} + +// UploadFile writes 'content' to the file at 'filePath'. +func (storage *HubicStorage) UploadFile(threadIndex int, filePath string, content []byte) (err error) { + return storage.client.UploadFile(storage.storageDir + "/" + filePath, content, storage.UploadRateLimit / storage.numberOfThreads) +} + +// If a local snapshot cache is needed for the storage to avoid downloading/uploading chunks too often when +// managing snapshots. +func (storage *HubicStorage) IsCacheNeeded() (bool) { return true } + +// If the 'MoveFile' method is implemented. +func (storage *HubicStorage) IsMoveFileImplemented() (bool) { return true } + +// If the storage can guarantee strong consistency. +func (storage *HubicStorage) IsStrongConsistent() (bool) { return false } + +// If the storage supports fast listing of files names. +func (storage *HubicStorage) IsFastListing() (bool) { return true } + +// Enable the test mode. +func (storage *HubicStorage) EnableTestMode() { + storage.client.TestMode = true +} diff --git a/duplicacy_keyring.go b/duplicacy_keyring.go new file mode 100644 index 0000000..f19f544 --- /dev/null +++ b/duplicacy_keyring.go @@ -0,0 +1,30 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +// +build !windows + +package duplicacy + +import ( + "github.com/gilbertchen/keyring" +) + +func SetKeyringFile(path string) { + // We only use keyring file on Windows +} + +func keyringGet(key string) (value string) { + value, err := keyring.Get("duplicacy", key) + if err != nil { + LOG_DEBUG("KEYRING_GET", "Failed to get the value from the keyring: %v", err) + } + return value +} + +func keyringSet(key string, value string) { + err := keyring.Set("duplicacy", key, value) + if err != nil { + LOG_DEBUG("KEYRING_GET", "Failed to store the value to the keyring: %v", err) + } +} diff --git a/duplicacy_keyring_windows.go b/duplicacy_keyring_windows.go new file mode 100644 index 0000000..26866d3 --- /dev/null +++ b/duplicacy_keyring_windows.go @@ -0,0 +1,160 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "syscall" + "unsafe" + "io/ioutil" + "encoding/json" +) + +var keyringFile string + +var ( + dllcrypt32 = syscall.NewLazyDLL("Crypt32.dll") + dllkernel32 = syscall.NewLazyDLL("Kernel32.dll") + + procEncryptData = dllcrypt32.NewProc("CryptProtectData") + procDecryptData = dllcrypt32.NewProc("CryptUnprotectData") + procLocalFree = dllkernel32.NewProc("LocalFree") +) + +type DATA_BLOB struct { + cbData uint32 + pbData *byte +} + +func SetKeyringFile(path string) { + keyringFile = path +} + +func keyringEncrypt(value []byte) ([]byte, error) { + + dataIn := DATA_BLOB { + pbData: &value[0], + cbData: uint32(len(value)), + } + dataOut := DATA_BLOB {} + + r, _, err := procEncryptData.Call(uintptr(unsafe.Pointer(&dataIn)), + 0, 0, 0, 0, 0, uintptr(unsafe.Pointer(&dataOut))) + if r == 0 { + return nil, err + } + + address := uintptr(unsafe.Pointer(dataOut.pbData)) + defer procLocalFree.Call(address) + + encryptedData := make([]byte, dataOut.cbData) + for i := 0; i < len(encryptedData); i++ { + encryptedData[i] = *(*byte)(unsafe.Pointer(uintptr(int(address) + i))) + } + return encryptedData, nil +} + +func keyringDecrypt(value []byte) ([]byte, error) { + + dataIn := DATA_BLOB { + pbData: &value[0], + cbData: uint32(len(value)), + } + dataOut := DATA_BLOB {} + + r, _, err := procDecryptData.Call(uintptr(unsafe.Pointer(&dataIn)), + 0, 0, 0, 0, 0, uintptr(unsafe.Pointer(&dataOut))) + if r == 0 { + return nil, err + } + + address := uintptr(unsafe.Pointer(dataOut.pbData)) + defer procLocalFree.Call(address) + + decryptedData := make([]byte, dataOut.cbData) + for i := 0; i < len(decryptedData); i++ { + address := int(uintptr(unsafe.Pointer(dataOut.pbData))) + decryptedData[i] = *(*byte)(unsafe.Pointer(uintptr(int(address) + i))) + } + return decryptedData, nil +} + +func keyringGet(key string) (value string) { + if keyringFile == "" { + LOG_DEBUG("KEYRING_NOT_INITIALIZED", "Keyring file not set") + return "" + } + + description, err := ioutil.ReadFile(keyringFile) + if err != nil { + LOG_DEBUG("KEYRING_READ", "Keyring file not read: %v", err) + return "" + } + + var keyring map[string][]byte + err = json.Unmarshal(description, &keyring) + if err != nil { + LOG_DEBUG("KEYRING_PARSE", "Failed to parse the keyring storage file %s: %v", keyringFile, err) + return "" + } + + encryptedValue := keyring[key] + + if len(encryptedValue) == 0 { + return "" + } + + valueInBytes, err := keyringDecrypt(encryptedValue) + if err != nil { + LOG_DEBUG("KEYRING_DECRYPT", "Failed to decrypt the value: %v", err) + return "" + } + + return string(valueInBytes) +} + +func keyringSet(key string, value string) bool { + if value == "" { + return false + } + if keyringFile == "" { + LOG_DEBUG("KEYRING_NOT_INITIALIZED", "Keyring file not set") + return false + } + + keyring := make(map[string][]byte) + + description, err := ioutil.ReadFile(keyringFile) + if err == nil { + err = json.Unmarshal(description, &keyring) + if err != nil { + LOG_DEBUG("KEYRING_PARSE", "Failed to parse the keyring storage file %s: %v", keyringFile, err) + } + } + + if value == "" { + keyring[key] = nil + } else { + encryptedValue, err := keyringEncrypt([]byte(value)) + if err != nil { + LOG_DEBUG("KEYRING_ENCRYPT", "Failed to encrypt the value: %v", err) + return false + } + keyring[key] = encryptedValue + } + + description, err = json.MarshalIndent(keyring, "", " ") + if err != nil { + LOG_DEBUG("KEYRING_MARSHAL", "Failed to marshal the keyring storage: %v", err) + return false + } + + err = ioutil.WriteFile(keyringFile, description, 0600) + if err != nil { + LOG_DEBUG("KEYRING_WRITE", "Failed to save the keyring storage to file %s: %v", keyringFile, err) + return false + } + + return true +} diff --git a/duplicacy_log.go b/duplicacy_log.go new file mode 100644 index 0000000..b2dfa22 --- /dev/null +++ b/duplicacy_log.go @@ -0,0 +1,177 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "os" + "fmt" + "time" + "sync" + "testing" + "runtime/debug" +) + +const ( + DEBUG = -2 + TRACE = -1 + INFO = 0 + WARN = 1 + ERROR = 2 + FATAL = 3 + ASSERT = 4 +) + + +var printLogHeader = false + +func EnableLogHeader() { + printLogHeader = true +} + +var printStackTrace = false + +func EnableStackTrace() { + printStackTrace = true +} + +var testingT *testing.T + +func setTestingT(t *testing.T) { + testingT = t +} + +func getLevelName(level int) string { + switch level { + case DEBUG: + return "DEBUG" + case TRACE: + return "TRACE" + case INFO: + return "INFO" + case WARN: + return "WARN" + case ERROR: + return "ERROR" + case FATAL: + return "FATAL" + case ASSERT: + return "ASSERT" + default: + return fmt.Sprintf("[%d]", level) + } +} + +var loggingLevel int + +func IsDebugging() bool { + return loggingLevel <= DEBUG +} + +func IsTracing() bool { + return loggingLevel <= TRACE +} + +func SetLoggingLevel(level int) { + loggingLevel = level +} + +func LOG_DEBUG(logID string, format string, v ...interface{}) { + logf(DEBUG, logID, format, v...) +} + +func LOG_TRACE(logID string, format string, v ...interface{}) { + logf(TRACE, logID, format, v...) +} + +func LOG_INFO(logID string, format string, v ...interface{}) { + logf(INFO, logID, format, v...) +} + +func LOG_WARN(logID string, format string, v ...interface{}) { + logf(WARN, logID, format, v...) +} + +func LOG_ERROR(logID string, format string, v ...interface{}) { + logf(ERROR, logID, format, v...) +} + +func LOG_FATAL(logID string, format string, v ...interface{}) { + logf(FATAL, logID, format, v...) +} + +func LOG_ASSERT(logID string, format string, v ...interface{}) { + logf(ASSERT, logID, format, v...) +} + +type Exception struct { + Level int + LogID string + Message string +} + +var logMutex sync.Mutex + +func logf(level int, logID string, format string, v ...interface{}) { + + message := fmt.Sprintf(format, v...) + + now := time.Now() + + // Uncomment this line to enable unbufferred logging for tests + // fmt.Printf("%s %s %s %s\n", now.Format("2006-01-02 15:04:05.000"), getLevelName(level), logID, message) + + if testingT != nil { + if level < WARN { + if level >= loggingLevel { + testingT.Logf("%s %s %s %s\n", + now.Format("2006-01-02 15:04:05.000"), getLevelName(level), logID, message) + } + } else { + testingT.Errorf("%s %s %s %s\n", + now.Format("2006-01-02 15:04:05.000"), getLevelName(level), logID, message) + } + } else { + logMutex.Lock() + defer logMutex.Unlock() + + if level >= loggingLevel { + if printLogHeader { + fmt.Printf("%s %s %s %s\n", + now.Format("2006-01-02 15:04:05.000"), getLevelName(level), logID, message) + } else { + fmt.Printf("%s\n", message) + } + } + } + + if level > WARN { + panic(Exception{ + Level: level, + LogID: logID, + Message: message, + }) + } +} + +const ( + duplicacyExitCode = 100 + otherExitCode = 101 +) + +func CatchLogException() { + if r := recover(); r != nil { + switch e := r.(type) { + case Exception: + if printStackTrace { + debug.PrintStack() + } + os.Exit(duplicacyExitCode) + default: + fmt.Fprintf(os.Stderr, "%v\n", e) + debug.PrintStack() + os.Exit(otherExitCode) + } + } +} diff --git a/duplicacy_oneclient.go b/duplicacy_oneclient.go new file mode 100644 index 0000000..7f24e99 --- /dev/null +++ b/duplicacy_oneclient.go @@ -0,0 +1,357 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "fmt" + "time" + "sync" + "bytes" + "io/ioutil" + "encoding/json" + "io" + "net/http" + "math/rand" + + "golang.org/x/oauth2" +) + +type OneDriveError struct { + Status int + Message string +} + +func (err OneDriveError) Error() string { + return fmt.Sprintf("%d %s", err.Status, err.Message) +} + +type OneDriveErrorResponse struct { + Error OneDriveError `json:"error"` +} + +var OneDriveRefreshTokenURL = "https://duplicacy.com/one_refresh" +var OneDriveAPIURL = "https://api.onedrive.com/v1.0" + +type OneDriveClient struct { + HTTPClient *http.Client + + TokenFile string + Token *oauth2.Token + TokenLock *sync.Mutex + + TestMode bool +} + +func NewOneDriveClient(tokenFile string) (*OneDriveClient, error) { + + description, err := ioutil.ReadFile(tokenFile) + if err != nil { + return nil, err + } + + token := new(oauth2.Token) + if err := json.Unmarshal(description, token); err != nil { + return nil, err + } + + client := &OneDriveClient{ + HTTPClient: http.DefaultClient, + TokenFile: tokenFile, + Token: token, + TokenLock: &sync.Mutex{}, + } + + return client, nil +} + +func (client *OneDriveClient) call(url string, method string, input interface{}, contentType string) (io.ReadCloser, int64, error) { + + var response *http.Response + + backoff := 1 + for i := 0; i < 8; i++ { + + LOG_DEBUG("ONEDRIVE_CALL", "%s %s", method, url) + + var inputReader io.Reader + + switch input.(type) { + default: + jsonInput, err := json.Marshal(input) + if err != nil { + return nil, 0, err + } + inputReader = bytes.NewReader(jsonInput) + case []byte: + inputReader = bytes.NewReader(input.([]byte)) + case int: + inputReader = bytes.NewReader([]byte("")) + case *bytes.Buffer: + inputReader = bytes.NewReader(input.(*bytes.Buffer).Bytes()) + case *RateLimitedReader: + input.(*RateLimitedReader).Reset() + inputReader = input.(*RateLimitedReader) + } + + request, err := http.NewRequest(method, url, inputReader) + if err != nil { + return nil, 0, err + } + + if reader, ok := inputReader.(*RateLimitedReader); ok { + request.ContentLength = reader.Length() + } + + if url != OneDriveRefreshTokenURL { + client.TokenLock.Lock() + request.Header.Set("Authorization", "Bearer " + client.Token.AccessToken) + client.TokenLock.Unlock() + } + if contentType != "" { + request.Header.Set("Content-Type", contentType) + } + + response, err = client.HTTPClient.Do(request) + if err != nil { + return nil, 0, err + } + + if response.StatusCode < 400 { + return response.Body, response.ContentLength, nil + } + + defer response.Body.Close() + + errorResponse := &OneDriveErrorResponse { + Error: OneDriveError { Status: response.StatusCode }, + } + + if err := json.NewDecoder(response.Body).Decode(errorResponse); err != nil { + return nil, 0, OneDriveError { Status: response.StatusCode, Message: fmt.Sprintf("Unexpected response"), } + } + + errorResponse.Error.Status = response.StatusCode + + if response.StatusCode == 401 { + + if url == OneDriveRefreshTokenURL { + return nil, 0, OneDriveError { Status: response.StatusCode, Message: "Authorization error when refreshing token"} + } + + err = client.RefreshToken() + if err != nil { + return nil, 0, err + } + continue + } else if response.StatusCode == 500 || response.StatusCode == 503 || response.StatusCode == 509 { + retryAfter := time.Duration(rand.Float32() * 1000.0 * float32(backoff)) + LOG_INFO("ONEDRIVE_RETRY", "Response status: %d; retry after %d milliseconds", response.StatusCode, retryAfter) + time.Sleep(retryAfter * time.Millisecond) + backoff *= 2 + continue + } else { + return nil, 0, errorResponse.Error + } + } + + return nil, 0, fmt.Errorf("Maximum number of retries reached") +} + +func (client *OneDriveClient) RefreshToken() (err error) { + client.TokenLock.Lock() + defer client.TokenLock.Unlock() + + if client.Token.Valid() { + return nil + } + + readCloser, _, err := client.call(OneDriveRefreshTokenURL, "POST", client.Token, "") + if err != nil { + return err + } + + defer readCloser.Close() + + if err = json.NewDecoder(readCloser).Decode(client.Token); err != nil { + return err + } + + description, err := json.Marshal(client.Token) + if err != nil { + return err + } + + err = ioutil.WriteFile(client.TokenFile, description, 0644) + if err != nil { + return err + } + + return nil +} + +type OneDriveEntry struct { + ID string + Name string + Folder map[string] interface {} + Size int64 +} + +type OneDriveListEntriesOutput struct { + Entries []OneDriveEntry `json:"value"` + NextLink string `json:"@odata.nextLink"` +} + +func (client *OneDriveClient) ListEntries(path string) ([]OneDriveEntry, error) { + + entries := []OneDriveEntry{} + + url := OneDriveAPIURL + "/drive/root:/" + path + ":/children" + if path == "" { + url = OneDriveAPIURL + "/drive/root/children" + } + if client.TestMode { + url += "?top=8" + } else { + url += "?top=1000" + } + url += "&select=name,size,folder" + + for { + readCloser, _, err := client.call(url, "GET", 0, "") + if err != nil { + return nil, err + } + + defer readCloser.Close() + + output := &OneDriveListEntriesOutput {} + + if err = json.NewDecoder(readCloser).Decode(&output); err != nil { + return nil, err + } + + entries = append(entries, output.Entries...) + + url = output.NextLink + if url == "" { + break + } + } + + return entries, nil +} + +func (client *OneDriveClient) GetFileInfo(path string) (string, bool, int64, error) { + + url := OneDriveAPIURL + "/drive/root:/" + path + url += "?select=id,name,size,folder" + + readCloser, _, err := client.call(url, "GET", 0, "") + if err != nil { + if e, ok := err.(OneDriveError); ok && e.Status == 404 { + return "", false, 0, nil + } else { + return "", false, 0, err + } + } + + defer readCloser.Close() + + output := &OneDriveEntry{} + + if err = json.NewDecoder(readCloser).Decode(&output); err != nil { + return "", false, 0, err + } + + return output.ID, len(output.Folder) != 0, output.Size, nil +} + +func (client *OneDriveClient) DownloadFile(path string) (io.ReadCloser, int64, error) { + + url := OneDriveAPIURL + "/drive/items/root:/" + path + ":/content" + + return client.call(url, "GET", 0, "") +} + +func (client *OneDriveClient) UploadFile(path string, content []byte, rateLimit int) (err error) { + + url := OneDriveAPIURL + "/drive/root:/" + path + ":/content" + + readCloser, _, err := client.call(url, "PUT", CreateRateLimitedReader(content, rateLimit), "application/octet-stream") + + if err != nil { + return err + } + + readCloser.Close() + return nil +} + +func (client *OneDriveClient) DeleteFile(path string) error { + + url := OneDriveAPIURL + "/drive/root:/" + path + + readCloser, _, err := client.call(url, "DELETE", 0, "") + if err != nil { + return err + } + + readCloser.Close() + return nil +} + +func (client *OneDriveClient) MoveFile(path string, parent string) error { + + url := OneDriveAPIURL + "/drive/root:/" + path + + parentReference := make(map[string]string) + parentReference["path"] = "/drive/root:/" + parent + + parameters := make(map[string]interface{}) + parameters["parentReference"] = parentReference + + readCloser, _, err := client.call(url, "PATCH", parameters, "application/json") + if err != nil { + return err + } + + readCloser.Close() + return nil +} + +func (client *OneDriveClient) CreateDirectory(path string, name string) (error) { + + url := OneDriveAPIURL + "/root/children" + + if path != "" { + + parentID, isDir, _, err := client.GetFileInfo(path) + if err != nil { + return err + } + + if parentID == "" { + return fmt.Errorf("The path '%s' does not exist", path) + } + + if !isDir { + return fmt.Errorf("The path '%s' is not a directory", path) + } + + url = OneDriveAPIURL + "/drive/items/" + parentID + "/children" + } + + parameters := make(map[string]interface{}) + parameters["name"] = name + parameters["folder"] = make(map[string]int) + + readCloser, _, err := client.call(url, "POST", parameters, "application/json") + if err != nil { + return err + } + + readCloser.Close() + return nil +} diff --git a/duplicacy_oneclient_test.go b/duplicacy_oneclient_test.go new file mode 100644 index 0000000..ed35f25 --- /dev/null +++ b/duplicacy_oneclient_test.go @@ -0,0 +1,146 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "io" + "fmt" + "testing" + "crypto/sha256" + "encoding/hex" + + crypto_rand "crypto/rand" + "math/rand" +) + +func TestOneDriveClient(t *testing.T) { + + oneDriveClient, err := NewOneDriveClient("one-token.json") + if err != nil { + t.Errorf("Failed to create the OneDrive client: %v", err) + return + } + + oneDriveClient.TestMode = true + + existingFiles, err := oneDriveClient.ListEntries("") + for _, file := range existingFiles { + fmt.Printf("name: %s, isDir: %t\n", file.Name, len(file.Folder) != 0) + } + + + testID, _, _, err := oneDriveClient.GetFileInfo("test") + if err != nil { + t.Errorf("Failed to list the test directory: %v", err) + return + } + if testID == "" { + err = oneDriveClient.CreateDirectory("", "test") + if err != nil { + t.Errorf("Failed to create the test directory: %v", err) + return + } + } + + test1ID, _, _, err := oneDriveClient.GetFileInfo("test/test1") + if err != nil { + t.Errorf("Failed to list the test1 directory: %v", err) + return + } + if test1ID == "" { + err = oneDriveClient.CreateDirectory("test", "test1") + if err != nil { + t.Errorf("Failed to create the test1 directory: %v", err) + return + } + } + + test2ID, _, _, err := oneDriveClient.GetFileInfo("test/test2") + if err != nil { + t.Errorf("Failed to list the test2 directory: %v", err) + return + } + if test2ID == "" { + err = oneDriveClient.CreateDirectory("test", "test2") + if err != nil { + t.Errorf("Failed to create the test2 directory: %v", err) + return + } + } + + numberOfFiles := 20 + maxFileSize := 64 * 1024 + + for i := 0; i < numberOfFiles; i++ { + content := make([]byte, rand.Int() % maxFileSize + 1) + _, err = crypto_rand.Read(content) + if err != nil { + t.Errorf("Error generating random content: %v", err) + return + } + + hasher := sha256.New() + hasher.Write(content) + filename := hex.EncodeToString(hasher.Sum(nil)) + + fmt.Printf("file: %s\n", filename) + + err = oneDriveClient.UploadFile("test/test1/" + filename, content, 100) + if err != nil { + /*if e, ok := err.(ACDError); !ok || e.Status != 409 */ { + t.Errorf("Failed to upload the file %s: %v", filename, err) + return + } + } + } + + entries, err := oneDriveClient.ListEntries("test/test1") + if err != nil { + t.Errorf("Error list randomly generated files: %v", err) + return + } + + for _, entry := range entries { + err = oneDriveClient.MoveFile("test/test1/" + entry.Name, "test/test2") + if err != nil { + t.Errorf("Failed to move %s: %v", entry.Name, err) + return + } + } + + entries, err = oneDriveClient.ListEntries("test/test2") + if err != nil { + t.Errorf("Error list randomly generated files: %v", err) + return + } + + for _, entry := range entries { + readCloser, _, err := oneDriveClient.DownloadFile("test/test2/" + entry.Name) + if err != nil { + t.Errorf("Error downloading file %s: %v", entry.Name, err) + return + } + + hasher := sha256.New() + io.Copy(hasher, readCloser) + hash := hex.EncodeToString(hasher.Sum(nil)) + + if hash != entry.Name { + t.Errorf("File %s, hash %s", entry.Name, hash) + } + + readCloser.Close() + } + + for _, entry := range entries { + + err = oneDriveClient.DeleteFile("test/test2/" + entry.Name) + if err != nil { + t.Errorf("Failed to delete the file %s: %v", entry.Name, err) + return + } + } + +} diff --git a/duplicacy_onestorage.go b/duplicacy_onestorage.go new file mode 100644 index 0000000..73b2a3b --- /dev/null +++ b/duplicacy_onestorage.go @@ -0,0 +1,242 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "fmt" + "path" + "strings" +) + +type OneDriveStorage struct { + RateLimitedStorage + + client *OneDriveClient + storageDir string + numberOfThread int +} + +// CreateOneDriveStorage creates an OneDrive storage object. +func CreateOneDriveStorage(tokenFile string, storagePath string, threads int) (storage *OneDriveStorage, err error) { + + for len(storagePath) > 0 && storagePath[len(storagePath) - 1] == '/' { + storagePath = storagePath[:len(storagePath) - 1] + } + + client, err := NewOneDriveClient(tokenFile) + if err != nil { + return nil, err + } + + fileID, isDir, _, err := client.GetFileInfo(storagePath) + if err != nil { + return nil, err + } + + if fileID == "" { + return nil, fmt.Errorf("Path '%s' doesn't exist", storagePath) + } + + if !isDir { + return nil, fmt.Errorf("Path '%s' is not a directory", storagePath) + } + + storage = &OneDriveStorage { + client: client, + storageDir: storagePath, + numberOfThread: threads, + } + + for _, path := range []string { "chunks", "fossils", "snapshots" } { + dir := storagePath + "/" + path + dirID, isDir, _, err := client.GetFileInfo(dir) + if err != nil { + return nil, err + } + if dirID == "" { + err = client.CreateDirectory(storagePath, path) + if err != nil { + return nil, err + } + } else if !isDir { + return nil, fmt.Errorf("%s is not a directory", dir) + } + } + + return storage, nil + +} + +// ListFiles return the list of files and subdirectories under 'dir' (non-recursively) +func (storage *OneDriveStorage) ListFiles(threadIndex int, dir string) ([]string, []int64, error) { + for len(dir) > 0 && dir[len(dir) - 1] == '/' { + dir = dir[:len(dir) - 1] + } + + if dir == "snapshots" { + entries, err := storage.client.ListEntries(storage.storageDir + "/" + dir) + if err != nil { + return nil, nil, err + } + + subDirs := []string{} + for _, entry := range entries { + if len(entry.Folder) > 0 { + subDirs = append(subDirs, entry.Name + "/") + } + } + return subDirs, nil, nil + } else if strings.HasPrefix(dir, "snapshots/") { + entries, err := storage.client.ListEntries(storage.storageDir + "/" + dir) + if err != nil { + return nil, nil, err + } + + files := []string{} + + for _, entry := range entries { + if len(entry.Folder) == 0 { + files = append(files, entry.Name) + } + } + return files, nil, nil + } else { + files := []string{} + sizes := []int64{} + for _, parent := range []string {"chunks", "fossils" } { + entries, err := storage.client.ListEntries(storage.storageDir + "/" + parent) + if err != nil { + return nil, nil, err + } + + for _, entry := range entries { + name := entry.Name + if parent == "fossils" { + name += ".fsl" + } + files = append(files, name) + sizes = append(sizes, entry.Size) + } + } + return files, sizes, nil + } + +} + +// DeleteFile deletes the file or directory at 'filePath'. +func (storage *OneDriveStorage) DeleteFile(threadIndex int, filePath string) (err error) { + if strings.HasSuffix(filePath, ".fsl") && strings.HasPrefix(filePath, "chunks/") { + filePath = "fossils/" + filePath[len("chunks/"):len(filePath) - len(".fsl")] + } + + err = storage.client.DeleteFile(storage.storageDir + "/" + filePath) + if e, ok := err.(OneDriveError); ok && e.Status == 404 { + LOG_DEBUG("ONEDRIVE_DELETE", "Ignore 404 error") + return nil + } + return err +} + +// MoveFile renames the file. +func (storage *OneDriveStorage) MoveFile(threadIndex int, from string, to string) (err error) { + fromPath := storage.storageDir + "/" + from + toParent := storage.storageDir + "/fossils" + if strings.HasSuffix(from, ".fsl") { + fromPath = storage.storageDir + "/fossils/" + from[len("chunks/"):len(from) - len(".fsl")] + toParent = storage.storageDir + "/chunks" + } + + err = storage.client.MoveFile(fromPath, toParent) + if err != nil { + if e, ok := err.(OneDriveError); ok && e.Status == 409 { + LOG_DEBUG("ONEDRIVE_MOVE", "Ignore 409 conflict error") + } else { + return err + } + } + return nil +} + +// CreateDirectory creates a new directory. +func (storage *OneDriveStorage) CreateDirectory(threadIndex int, dir string) (err error) { + for len(dir) > 0 && dir[len(dir) - 1] == '/' { + dir = dir[:len(dir) - 1] + } + + parent := path.Dir(dir) + + if parent == "." { + return storage.client.CreateDirectory(storage.storageDir, dir) + } else { + return storage.client.CreateDirectory(storage.storageDir + "/" + parent, path.Base(dir)) + } +} + +// GetFileInfo returns the information about the file or directory at 'filePath'. +func (storage *OneDriveStorage) GetFileInfo(threadIndex int, filePath string) (exist bool, isDir bool, size int64, err error) { + + for len(filePath) > 0 && filePath[len(filePath) - 1] == '/' { + filePath = filePath[:len(filePath) - 1] + } + fileID, isDir, size, err := storage.client.GetFileInfo(storage.storageDir + "/" + filePath) + return fileID != "", isDir, size, err +} + +// FindChunk finds the chunk with the specified id. If 'isFossil' is true, it will search for chunk files with +// the suffix '.fsl'. +func (storage *OneDriveStorage) FindChunk(threadIndex int, chunkID string, isFossil bool) (filePath string, exist bool, size int64, err error) { + filePath = "chunks/" + chunkID + realPath := storage.storageDir + "/" + filePath + if isFossil { + filePath += ".fsl" + realPath = storage.storageDir + "/fossils/" + chunkID + } + + fileID, _, size, err := storage.client.GetFileInfo(realPath) + return filePath, fileID != "", size, err +} + +// DownloadFile reads the file at 'filePath' into the chunk. +func (storage *OneDriveStorage) DownloadFile(threadIndex int, filePath string, chunk *Chunk) (err error) { + readCloser, _, err := storage.client.DownloadFile(storage.storageDir + "/" + filePath) + if err != nil { + return err + } + + defer readCloser.Close() + + _, err = RateLimitedCopy(chunk, readCloser, storage.DownloadRateLimit / storage.numberOfThread) + return err +} + +// UploadFile writes 'content' to the file at 'filePath'. +func (storage *OneDriveStorage) UploadFile(threadIndex int, filePath string, content []byte) (err error) { + err = storage.client.UploadFile(storage.storageDir + "/" + filePath, content, storage.UploadRateLimit / storage.numberOfThread) + + if e, ok := err.(OneDriveError); ok && e.Status == 409 { + LOG_TRACE("ONEDRIVE_UPLOAD", "File %s already exists", filePath) + return nil + } else { + return err + } +} + +// If a local snapshot cache is needed for the storage to avoid downloading/uploading chunks too often when +// managing snapshots. +func (storage *OneDriveStorage) IsCacheNeeded() (bool) { return true } + +// If the 'MoveFile' method is implemented. +func (storage *OneDriveStorage) IsMoveFileImplemented() (bool) { return true } + +// If the storage can guarantee strong consistency. +func (storage *OneDriveStorage) IsStrongConsistent() (bool) { return false } + +// If the storage supports fast listing of files names. +func (storage *OneDriveStorage) IsFastListing() (bool) { return true } + +// Enable the test mode. +func (storage *OneDriveStorage) EnableTestMode() { + storage.client.TestMode = true +} diff --git a/duplicacy_preference.go b/duplicacy_preference.go new file mode 100644 index 0000000..139e80e --- /dev/null +++ b/duplicacy_preference.go @@ -0,0 +1,79 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "encoding/json" + "path" + "io/ioutil" + "reflect" +) + +// Preference stores options for each storage. +type Preference struct { + Name string `json:"name"` + SnapshotID string `json:"id"` + StorageURL string `json:"storage"` + Encrypted bool `json:"encrypted"` + BackupProhibited bool `json:"no_backup"` + RestoreProhibited bool `json:"no_restore"` + DoNotSavePassword bool `json:"no_save_password"` + Keys map[string]string `json:"keys"` +} + +var Preferences [] Preference + +func LoadPreferences(repository string) (bool) { + + description, err := ioutil.ReadFile(path.Join(repository, DUPLICACY_DIRECTORY, "preferences")) + if err != nil { + LOG_ERROR("PREFERENCE_OPEN", "Failed to read the preference file from repository %s: %v", repository, err) + return false + } + + err = json.Unmarshal(description, &Preferences) + if err != nil { + LOG_ERROR("PREFERENCE_PARSE", "Failed to parse the preference file for repository %s: %v", repository, err) + return false + } + + if len(Preferences) == 0 { + LOG_ERROR("PREFERENCE_NONE", "No preference found in the preference file") + return false + } + + return true +} + +func SavePreferences(repository string) (bool) { + description, err := json.MarshalIndent(Preferences, "", " ") + if err != nil { + LOG_ERROR("PREFERENCE_MARSHAL", "Failed to marshal the repository preferences: %v", err) + return false + } + + preferenceFile := path.Join(repository, DUPLICACY_DIRECTORY, "/preferences") + err = ioutil.WriteFile(preferenceFile, description, 0644) + if err != nil { + LOG_ERROR("PREFERENCE_WRITE", "Failed to save the preference file %s: %v", preferenceFile, err) + return false + } + + return true +} + +func FindPreference(name string) (*Preference) { + for _, preference := range Preferences { + if preference.Name == name || preference.StorageURL == name { + return &preference + } + } + + return nil +} + +func (preference *Preference) Equal(other *Preference) bool { + return reflect.DeepEqual(preference, other) +} diff --git a/duplicacy_s3storage.go b/duplicacy_s3storage.go new file mode 100644 index 0000000..501a80e --- /dev/null +++ b/duplicacy_s3storage.go @@ -0,0 +1,211 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "time" + "github.com/gilbertchen/goamz/aws" + "github.com/gilbertchen/goamz/s3" +) + +type S3Storage struct { + RateLimitedStorage + + buckets []*s3.Bucket + storageDir string +} + +// CreateS3Storage creates a amazon s3 storage object. +func CreateS3Storage(regionName string, endpoint string, bucketName string, storageDir string, + accessKey string, secretKey string, threads int) (storage *S3Storage, err error) { + + var region aws.Region + + if endpoint == "" { + if regionName == "" { + regionName = "us-east-1" + } + region = aws.Regions[regionName] + } else { + region = aws.Region { Name: regionName, S3Endpoint:"https://" + endpoint } + } + + auth := aws.Auth{ AccessKey: accessKey, SecretKey: secretKey } + + var buckets []*s3.Bucket + for i := 0; i < threads; i++ { + s3Client := s3.New(auth, region) + s3Client.AttemptStrategy = aws.AttemptStrategy{ + Min: 8, + Total: 300 * time.Second, + Delay: 1000 * time.Millisecond, + } + + bucket := s3Client.Bucket(bucketName) + buckets = append(buckets, bucket) + } + + if len(storageDir) > 0 && storageDir[len(storageDir) - 1] != '/' { + storageDir += "/" + } + + storage = &S3Storage { + buckets: buckets, + storageDir: storageDir, + } + + return storage, nil +} + +// ListFiles return the list of files and subdirectories under 'dir' (non-recursively) +func (storage *S3Storage) ListFiles(threadIndex int, dir string) (files []string, sizes []int64, err error) { + if len(dir) > 0 && dir[len(dir) - 1] != '/' { + dir += "/" + } + + dirLength := len(storage.storageDir + dir) + if dir == "snapshots/" { + results, err := storage.buckets[threadIndex].List(storage.storageDir + dir, "/", "", 100) + if err != nil { + return nil, nil, err + } + + for _, subDir := range results.CommonPrefixes { + files = append(files, subDir[dirLength:]) + } + return files, nil, nil + } else if dir == "chunks/" { + marker := "" + for { + results, err := storage.buckets[threadIndex].List(storage.storageDir + dir, "", marker, 1000) + if err != nil { + return nil, nil, err + } + + for _, object := range results.Contents { + files = append(files, object.Key[dirLength:]) + sizes = append(sizes, object.Size) + } + + if !results.IsTruncated { + break + } + + marker = results.Contents[len(results.Contents) - 1].Key + } + return files, sizes, nil + + } else { + + results, err := storage.buckets[threadIndex].List(storage.storageDir + dir, "", "", 1000) + if err != nil { + return nil, nil, err + } + + for _, object := range results.Contents { + files = append(files, object.Key[dirLength:]) + } + return files, nil, nil + } +} + +// DeleteFile deletes the file or directory at 'filePath'. +func (storage *S3Storage) DeleteFile(threadIndex int, filePath string) (err error) { + return storage.buckets[threadIndex].Del(storage.storageDir + filePath) +} + +// MoveFile renames the file. +func (storage *S3Storage) MoveFile(threadIndex int, from string, to string) (err error) { + + options := s3.CopyOptions { ContentType: "application/duplicacy" } + _, err = storage.buckets[threadIndex].PutCopy(storage.storageDir + to, s3.Private, options, storage.buckets[threadIndex].Name + "/" + storage.storageDir + from) + if err != nil { + return nil + } + + return storage.DeleteFile(threadIndex, from) +} + +// CreateDirectory creates a new directory. +func (storage *S3Storage) CreateDirectory(threadIndex int, dir string) (err error) { + return nil +} + +// GetFileInfo returns the information about the file or directory at 'filePath'. +func (storage *S3Storage) GetFileInfo(threadIndex int, filePath string) (exist bool, isDir bool, size int64, err error) { + + response, err := storage.buckets[threadIndex].Head(storage.storageDir + filePath, nil) + if err != nil { + if e, ok := err.(*s3.Error); ok && (e.StatusCode == 403 || e.StatusCode == 404) { + return false, false, 0, nil + } else { + return false, false, 0, err + } + } + + if response.StatusCode == 403 || response.StatusCode == 404 { + return false, false, 0, nil + } else { + return true, false, response.ContentLength, nil + } +} + +// FindChunk finds the chunk with the specified id. If 'isFossil' is true, it will search for chunk files with +// the suffix '.fsl'. +func (storage *S3Storage) FindChunk(threadIndex int, chunkID string, isFossil bool) (filePath string, exist bool, size int64, err error) { + + filePath = "chunks/" + chunkID + if isFossil { + filePath += ".fsl" + } + + exist, _, size, err = storage.GetFileInfo(threadIndex, filePath) + + if err != nil { + return "", false, 0, err + } else { + return filePath, exist, size, err + } + +} + +// DownloadFile reads the file at 'filePath' into the chunk. +func (storage *S3Storage) DownloadFile(threadIndex int, filePath string, chunk *Chunk) (err error) { + + readCloser, err := storage.buckets[threadIndex].GetReader(storage.storageDir + filePath) + if err != nil { + return err + } + + defer readCloser.Close() + + _, err = RateLimitedCopy(chunk, readCloser, storage.DownloadRateLimit / len(storage.buckets)) + return err + +} + +// UploadFile writes 'content' to the file at 'filePath'. +func (storage *S3Storage) UploadFile(threadIndex int, filePath string, content []byte) (err error) { + + options := s3.Options { } + reader := CreateRateLimitedReader(content, storage.UploadRateLimit / len(storage.buckets)) + return storage.buckets[threadIndex].PutReader(storage.storageDir + filePath, reader, int64(len(content)), "application/duplicacy", s3.Private, options) +} + +// If a local snapshot cache is needed for the storage to avoid downloading/uploading chunks too often when +// managing snapshots. +func (storage *S3Storage) IsCacheNeeded () (bool) { return true } + +// If the 'MoveFile' method is implemented. +func (storage *S3Storage) IsMoveFileImplemented() (bool) { return true } + +// If the storage can guarantee strong consistency. +func (storage *S3Storage) IsStrongConsistent() (bool) { return false } + +// If the storage supports fast listing of files names. +func (storage *S3Storage) IsFastListing() (bool) { return true } + +// Enable the test mode. +func (storage *S3Storage) EnableTestMode() {} diff --git a/duplicacy_sftpstorage.go b/duplicacy_sftpstorage.go new file mode 100644 index 0000000..d505980 --- /dev/null +++ b/duplicacy_sftpstorage.go @@ -0,0 +1,305 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "fmt" + "io" + "os" + "net" + "path" + "time" + "runtime" + "math/rand" + + "golang.org/x/crypto/ssh" + "github.com/pkg/sftp" +) + +type SFTPStorage struct { + RateLimitedStorage + + client *sftp.Client + storageDir string + numberOfThreads int +} + +func CreateSFTPStorageWithPassword(server string, port int, username string, storageDir string, + password string, threads int) (storage *SFTPStorage, err error) { + + authMethods := [] ssh.AuthMethod { ssh.Password(password) } + + + hostKeyCallback := func(hostname string, remote net.Addr, + key ssh.PublicKey) error { + return nil + } + + return CreateSFTPStorage(server, port, username, storageDir, authMethods, hostKeyCallback, threads) +} + +func CreateSFTPStorage(server string, port int, username string, storageDir string, + authMethods [] ssh.AuthMethod, + hostKeyCallback func(hostname string, remote net.Addr, + key ssh.PublicKey) error, threads int) (storage *SFTPStorage, err error) { + + config := &ssh.ClientConfig{ + User: username, + Auth: authMethods, + HostKeyCallback: hostKeyCallback, + } + + if server == "sftp.hidrive.strato.com" { + config.Ciphers = []string {"aes128-cbc", "aes128-ctr", "aes256-ctr"} + } + + serverAddress := fmt.Sprintf("%s:%d", server, port) + connection, err := ssh.Dial("tcp", serverAddress, config) + if err != nil { + return nil, err + } + + client, err := sftp.NewClient(connection) + if err != nil { + connection.Close() + return nil, err + } + + for storageDir[len(storageDir) - 1] == '/' { + storageDir = storageDir[:len(storageDir) - 1] + } + + fileInfo, err := client.Stat(storageDir) + if err != nil { + return nil, fmt.Errorf("Can't access the storage path %s: %v", storageDir, err) + } + + if !fileInfo.IsDir() { + return nil, fmt.Errorf("The storage path %s is not a directory", storageDir) + } + + storage = &SFTPStorage { + client: client, + storageDir: storageDir, + numberOfThreads: threads, + } + + // Random number fo generating the temporary chunk file suffix. + rand.Seed(time.Now().UnixNano()) + + runtime.SetFinalizer(storage, CloseSFTPStorage) + + return storage, nil +} + +func CloseSFTPStorage(storage *SFTPStorage) { + storage.client.Close() +} + +// ListFiles return the list of files and subdirectories under 'file' (non-recursively) +func (storage *SFTPStorage) ListFiles(threadIndex int, dirPath string) (files []string, sizes []int64, err error) { + + entries, err := storage.client.ReadDir(path.Join(storage.storageDir, dirPath)) + if err != nil { + return nil, nil, err + } + + for _, entry := range entries { + name := entry.Name() + if entry.IsDir() && name[len(name) - 1] != '/' { + name += "/" + } + + files = append(files, name) + sizes = append(sizes, entry.Size()) + } + + return files, sizes, nil +} + +// DeleteFile deletes the file or directory at 'filePath'. +func (storage *SFTPStorage) DeleteFile(threadIndex int, filePath string) (err error) { + fullPath := path.Join(storage.storageDir, filePath) + fileInfo, err := storage.client.Stat(fullPath) + if err != nil { + if os.IsNotExist(err) { + LOG_TRACE("SFTP_STORAGE", "File %s has disappeared before deletion", filePath) + return nil + } + return err + } + if fileInfo == nil { + return nil + } + return storage.client.Remove(path.Join(storage.storageDir, filePath)) +} + +// MoveFile renames the file. +func (storage *SFTPStorage) MoveFile(threadIndex int, from string, to string) (err error) { + toPath := path.Join(storage.storageDir, to) + fileInfo, err := storage.client.Stat(toPath) + if fileInfo != nil { + return fmt.Errorf("The destination file %s already exists", toPath) + } + return storage.client.Rename(path.Join(storage.storageDir, from), + path.Join(storage.storageDir, to)) +} + +// CreateDirectory creates a new directory. +func (storage *SFTPStorage) CreateDirectory(threadIndex int, dirPath string) (err error) { + fullPath := path.Join(storage.storageDir, dirPath) + fileInfo, err := storage.client.Stat(fullPath) + if fileInfo != nil && fileInfo.IsDir() { + return nil + } + return storage.client.Mkdir(path.Join(storage.storageDir, dirPath)) +} + +// GetFileInfo returns the information about the file or directory at 'filePath'. +func (storage *SFTPStorage) GetFileInfo(threadIndex int, filePath string) (exist bool, isDir bool, size int64, err error) { + fileInfo, err := storage.client.Stat(path.Join(storage.storageDir, filePath)) + if err != nil { + if os.IsNotExist(err) { + return false, false, 0, nil + } else { + return false, false, 0, err + } + } + + if fileInfo == nil { + return false, false, 0, nil + } + + return true, fileInfo.IsDir(), fileInfo.Size(), nil +} + +// FindChunk finds the chunk with the specified id. If 'isFossil' is true, it will search for chunk files with +// the suffix '.fsl'. +func (storage *SFTPStorage) FindChunk(threadIndex int, chunkID string, isFossil bool) (filePath string, exist bool, size int64, err error) { + dir := path.Join(storage.storageDir, "chunks") + + suffix := "" + if isFossil { + suffix = ".fsl" + } + + // The minimum level of directories to dive into before searching for the chunk file. + minimumLevel := 2 + + for level := 0; level * 2 < len(chunkID); level ++ { + if level >= minimumLevel { + filePath = path.Join(dir, chunkID[2 * level:]) + suffix + if stat, err := storage.client.Stat(filePath); err == nil && !stat.IsDir() { + return filePath[len(storage.storageDir) + 1:], true, stat.Size(), nil + } else if err == nil && stat.IsDir() { + return filePath[len(storage.storageDir) + 1:], true, 0, fmt.Errorf("The path %s is a directory", filePath) + } + } + + // Find the subdirectory the chunk file may reside. + subDir := path.Join(dir, chunkID[2 * level: 2 * level + 2]) + stat, err := storage.client.Stat(subDir) + if err == nil && stat.IsDir() { + dir = subDir + continue + } + + if level < minimumLevel { + // Create the subdirectory if is doesn't exist. + + if err == nil && !stat.IsDir() { + return "", false, 0, fmt.Errorf("The path %s is not a directory", subDir) + } + + err = storage.client.Mkdir(subDir) + if err != nil { + return "", false, 0, fmt.Errorf("Failed to create the directory %s: %v", subDir, err) + } + + dir = subDir + continue + } + + // Teh chunk must be under this subdirectory but it doesn't exist. + return path.Join(dir, chunkID[2 * level:])[len(storage.storageDir) + 1:] + suffix, false, 0, nil + + } + + LOG_FATAL("CHUNK_FIND", "Chunk %s is still not found after having searched a maximum level of directories", + chunkID) + return "", false, 0, nil +} + +// DownloadFile reads the file at 'filePath' into the chunk. +func (storage *SFTPStorage) DownloadFile(threadIndex int, filePath string, chunk *Chunk) (err error) { + file, err := storage.client.Open(path.Join(storage.storageDir, filePath)) + + if err != nil { + return err + } + + defer file.Close() + if _, err = RateLimitedCopy(chunk, file, storage.DownloadRateLimit / storage.numberOfThreads); err != nil { + return err + } + + return nil +} + +// UploadFile writes 'content' to the file at 'filePath'. +func (storage *SFTPStorage) UploadFile(threadIndex int, filePath string, content []byte) (err error) { + + fullPath := path.Join(storage.storageDir, filePath) + + letters := "abcdefghijklmnopqrstuvwxyz" + suffix := make([]byte, 8) + for i := range suffix { + suffix[i] = letters[rand.Intn(len(letters))] + } + + temporaryFile := fullPath + "." + string(suffix) + ".tmp" + + file, err := storage.client.OpenFile(temporaryFile, os.O_WRONLY | os.O_CREATE | os.O_TRUNC) + if err != nil { + return err + } + + reader := CreateRateLimitedReader(content, storage.UploadRateLimit / storage.numberOfThreads) + _, err = io.Copy(file, reader) + if err != nil { + file.Close() + return err + } + file.Close() + + err = storage.client.Rename(temporaryFile, fullPath) + if err != nil { + + if _, err = storage.client.Stat(fullPath); err == nil { + storage.client.Remove(temporaryFile) + return nil + } else { + return fmt.Errorf("Uploaded file but failed to store it at %s", fullPath) + } + } + + return nil +} + +// If a local snapshot cache is needed for the storage to avoid downloading/uploading chunks too often when +// managing snapshots. +func (storage *SFTPStorage) IsCacheNeeded () (bool) { return true } + +// If the 'MoveFile' method is implemented. +func (storage *SFTPStorage) IsMoveFileImplemented() (bool) { return true } + +// If the storage can guarantee strong consistency. +func (storage *SFTPStorage) IsStrongConsistent() (bool) { return true } + +// If the storage supports fast listing of files names. +func (storage *SFTPStorage) IsFastListing() (bool) { return false } + +// Enable the test mode. +func (storage *SFTPStorage) EnableTestMode() {} diff --git a/duplicacy_shadowcopy.go b/duplicacy_shadowcopy.go new file mode 100644 index 0000000..893cd14 --- /dev/null +++ b/duplicacy_shadowcopy.go @@ -0,0 +1,13 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +// +build !windows + +package duplicacy + +func CreateShadowCopy(top string, shadowCopy bool) (shadowTop string) { + return top +} + +func DeleteShadowCopy() {} diff --git a/duplicacy_shadowcopy_windows.go b/duplicacy_shadowcopy_windows.go new file mode 100644 index 0000000..1f1749c --- /dev/null +++ b/duplicacy_shadowcopy_windows.go @@ -0,0 +1,525 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "syscall" + "unsafe" + "time" + "os" + "path" + "runtime" + + ole "github.com/gilbertchen/go-ole" +) + +//507C37B4-CF5B-4e95-B0AF-14EB9767467E +var IID_IVSS_ASYNC = &ole.GUID{0x507C37B4, 0xCF5B, 0x4e95, [8]byte{0xb0, 0xaf, 0x14, 0xeb, 0x97, 0x67, 0x46, 0x7e}} + +type IVSSAsync struct { + ole.IUnknown +} + +type IVSSAsyncVtbl struct { + ole.IUnknownVtbl + cancel uintptr + wait uintptr + queryStatus uintptr +} + +func (async *IVSSAsync) VTable() * IVSSAsyncVtbl { + return (*IVSSAsyncVtbl)(unsafe.Pointer(async.RawVTable)) +} + +var VSS_S_ASYNC_PENDING int32 = 0x00042309 +var VSS_S_ASYNC_FINISHED int32 = 0x0004230A +var VSS_S_ASYNC_CANCELLED int32 = 0x0004230B + +func (async *IVSSAsync) Wait(seconds int) bool { + + startTime := time.Now().Unix() + for { + ret, _, _ := syscall.Syscall(async.VTable().wait, 2, uintptr(unsafe.Pointer(async)), uintptr(1000), 0) + if ret != 0 { + LOG_WARN("IVSSASYNC_WAIT", "IVssAsync::Wait returned %d\n", ret) + } + + var status int32 + ret, _, _ = syscall.Syscall(async.VTable().queryStatus, 3, uintptr(unsafe.Pointer(async)), + uintptr(unsafe.Pointer(&status)), 0) + if ret != 0 { + LOG_WARN("IVSSASYNC_QUERY", "IVssAsync::QueryStatus returned %d\n", ret) + } + + if status == VSS_S_ASYNC_FINISHED { + return true + } + if time.Now().Unix() - startTime > int64(seconds) { + LOG_WARN("IVSSASYNC_TIMEOUT", "IVssAsync is pending for more than %d seconds\n", seconds) + return false + } + } +} + +func getIVSSAsync(unknown *ole.IUnknown, iid *ole.GUID) (async *IVSSAsync) { + r, _, _ := syscall.Syscall( + unknown.VTable().QueryInterface, + 3, + uintptr(unsafe.Pointer(unknown)), + uintptr(unsafe.Pointer(iid)), + uintptr(unsafe.Pointer(&async))) + + if r != 0 { + LOG_WARN("IVSSASYNC_QUERY", "IVSSAsync::QueryInterface returned %d\n", r) + return nil + } + return +} + + +//665c1d5f-c218-414d-a05d-7fef5f9d5c86 +var IID_IVSS = &ole.GUID{0x665c1d5f, 0xc218, 0x414d, [8]byte{0xa0, 0x5d, 0x7f, 0xef, 0x5f, 0x9d, 0x5c, 0x86}} + +type IVSS struct { + ole.IUnknown +} + +type IVSSVtbl struct { + ole.IUnknownVtbl + getWriterComponentsCount uintptr + getWriterComponents uintptr + initializeForBackup uintptr + setBackupState uintptr + initializeForRestore uintptr + setRestoreState uintptr + gatherWriterMetadata uintptr + getWriterMetadataCount uintptr + getWriterMetadata uintptr + freeWriterMetadata uintptr + addComponent uintptr + prepareForBackup uintptr + abortBackup uintptr + gatherWriterStatus uintptr + getWriterStatusCount uintptr + freeWriterStatus uintptr + getWriterStatus uintptr + setBackupSucceeded uintptr + setBackupOptions uintptr + setSelectedForRestore uintptr + setRestoreOptions uintptr + setAdditionalRestores uintptr + setPreviousBackupStamp uintptr + saveAsXML uintptr + backupComplete uintptr + addAlternativeLocationMapping uintptr + addRestoreSubcomponent uintptr + setFileRestoreStatus uintptr + addNewTarget uintptr + setRangesFilePath uintptr + preRestore uintptr + postRestore uintptr + setContext uintptr + startSnapshotSet uintptr + addToSnapshotSet uintptr + doSnapshotSet uintptr + deleteSnapshots uintptr + importSnapshots uintptr + breakSnapshotSet uintptr + getSnapshotProperties uintptr + query uintptr + isVolumeSupported uintptr + disableWriterClasses uintptr + enableWriterClasses uintptr + disableWriterInstances uintptr + exposeSnapshot uintptr + revertToSnapshot uintptr + queryRevertStatus uintptr +} + +func (vss *IVSS) VTable() * IVSSVtbl { + return (*IVSSVtbl)(unsafe.Pointer(vss.RawVTable)) +} + +func (vss *IVSS) InitializeForBackup() int { + ret, _, _ := syscall.Syscall(vss.VTable().initializeForBackup, 2, uintptr(unsafe.Pointer(vss)), 0, 0) + return int(ret) +} + +func (vss *IVSS) GatherWriterMetadata() (int, *IVSSAsync) { + var unknown *ole.IUnknown + ret, _, _ := syscall.Syscall(vss.VTable().gatherWriterMetadata, 2, + uintptr(unsafe.Pointer(vss)), + uintptr(unsafe.Pointer(&unknown)), 0) + + if ret != 0 { + return int(ret), nil + } else { + return int(ret), getIVSSAsync(unknown, IID_IVSS_ASYNC) + } +} + +func (vss *IVSS) StartSnapshotSet(snapshotID *ole.GUID) int { + ret, _, _ := syscall.Syscall(vss.VTable().startSnapshotSet, 2, + uintptr(unsafe.Pointer(vss)), + uintptr(unsafe.Pointer(snapshotID)), 0) + return int(ret) +} + +func (vss *IVSS) AddToSnapshotSet(drive string, snapshotID *ole.GUID) int { + + volumeName := syscall.StringToUTF16Ptr(drive) + + var ret uintptr + if runtime.GOARCH == "386" { + // On 32-bit Windows, GUID is passed by value + ret, _, _ = syscall.Syscall9(vss.VTable().addToSnapshotSet, 7, + uintptr(unsafe.Pointer(vss)), + uintptr(unsafe.Pointer(volumeName)), + 0, 0, 0, 0, + uintptr(unsafe.Pointer(snapshotID)), 0, 0) + } else { + ret, _, _ = syscall.Syscall6(vss.VTable().addToSnapshotSet, 4, + uintptr(unsafe.Pointer(vss)), + uintptr(unsafe.Pointer(volumeName)), + uintptr(unsafe.Pointer(ole.IID_NULL)), + uintptr(unsafe.Pointer(snapshotID)), 0, 0) + } + return int(ret) +} + +func (vss *IVSS) SetBackupState() int { + VSS_BT_COPY := 5 + ret, _, _ := syscall.Syscall6(vss.VTable().setBackupState, 4, + uintptr(unsafe.Pointer(vss)), + 0, 0, uintptr(VSS_BT_COPY), 0, 0) + return int(ret) +} + +func (vss *IVSS) PrepareForBackup() (int, *IVSSAsync) { + var unknown *ole.IUnknown + ret, _, _ := syscall.Syscall(vss.VTable().prepareForBackup, 2, + uintptr(unsafe.Pointer(vss)), + uintptr(unsafe.Pointer(&unknown)), 0) + + if ret != 0 { + return int(ret), nil + } else { + return int(ret), getIVSSAsync(unknown, IID_IVSS_ASYNC) + } +} + +func (vss *IVSS) DoSnapshotSet() (int, *IVSSAsync) { + var unknown *ole.IUnknown + ret, _, _ := syscall.Syscall(vss.VTable().doSnapshotSet, 2, + uintptr(unsafe.Pointer(vss)), + uintptr(unsafe.Pointer(&unknown)), 0) + + if ret != 0 { + return int(ret), nil + } else { + return int(ret), getIVSSAsync(unknown, IID_IVSS_ASYNC) + } +} + +type SnapshotProperties struct { + SnapshotID ole.GUID + SnapshotSetID ole.GUID + SnapshotsCount uint32 + SnapshotDeviceObject *uint16 + OriginalVolumeName *uint16 + OriginatingMachine *uint16 + ServiceMachine *uint16 + ExposedName *uint16 + ExposedPath *uint16 + ProviderId ole.GUID + SnapshotAttributes uint32 + CreationTimestamp int64 + Status int +} + +func (vss *IVSS) GetSnapshotProperties(snapshotSetID ole.GUID, properties *SnapshotProperties) (int) { + var ret uintptr + if runtime.GOARCH == "386" { + address := uint(uintptr(unsafe.Pointer(&snapshotSetID))) + ret, _, _ = syscall.Syscall6(vss.VTable().getSnapshotProperties, 6, + uintptr(unsafe.Pointer(vss)), + uintptr(*(*uint32)(unsafe.Pointer(uintptr(address)))), + uintptr(*(*uint32)(unsafe.Pointer(uintptr(address + 4)))), + uintptr(*(*uint32)(unsafe.Pointer(uintptr(address + 8)))), + uintptr(*(*uint32)(unsafe.Pointer(uintptr(address + 12)))), + uintptr(unsafe.Pointer(properties))) + } else { + ret, _, _ = syscall.Syscall(vss.VTable().getSnapshotProperties, 3, + uintptr(unsafe.Pointer(vss)), + uintptr(unsafe.Pointer(&snapshotSetID)), + uintptr(unsafe.Pointer(properties))) + } + return int(ret) +} + +func (vss *IVSS) DeleteSnapshots(snapshotID ole.GUID) (int, int, ole.GUID) { + + VSS_OBJECT_SNAPSHOT := 3 + + deleted := int32(0) + + var deletedGUID ole.GUID + + var ret uintptr + if runtime.GOARCH == "386" { + address := uint(uintptr(unsafe.Pointer(&snapshotID))) + ret, _, _ = syscall.Syscall9(vss.VTable().deleteSnapshots, 9, + uintptr(unsafe.Pointer(vss)), + uintptr(*(*uint32)(unsafe.Pointer(uintptr(address)))), + uintptr(*(*uint32)(unsafe.Pointer(uintptr(address + 4)))), + uintptr(*(*uint32)(unsafe.Pointer(uintptr(address + 8)))), + uintptr(*(*uint32)(unsafe.Pointer(uintptr(address + 12)))), + uintptr(VSS_OBJECT_SNAPSHOT), + uintptr(1), + uintptr(unsafe.Pointer(&deleted)), + uintptr(unsafe.Pointer(&deletedGUID))) + } else { + ret, _, _ = syscall.Syscall6(vss.VTable().deleteSnapshots, 6, + uintptr(unsafe.Pointer(vss)), + uintptr(unsafe.Pointer(&snapshotID)), + uintptr(VSS_OBJECT_SNAPSHOT), + uintptr(1), + uintptr(unsafe.Pointer(&deleted)), + uintptr(unsafe.Pointer(&deletedGUID))) + } + + return int(ret), int(deleted), deletedGUID +} + + +func uint16ArrayToString(p *uint16) (string) { + if p == nil { + return "" + } + s := make([]uint16, 0) + address := uintptr(unsafe.Pointer(p)) + for { + c := *(*uint16)(unsafe.Pointer(address)) + if c == 0 { + break + } + + s = append(s, c) + address = uintptr(int(address) + 2) + } + + return syscall.UTF16ToString(s) +} + +func getIVSS(unknown *ole.IUnknown, iid *ole.GUID) (ivss *IVSS) { + r, _, _ := syscall.Syscall( + unknown.VTable().QueryInterface, + 3, + uintptr(unsafe.Pointer(unknown)), + uintptr(unsafe.Pointer(iid)), + uintptr(unsafe.Pointer(&ivss))) + + if r != 0 { + LOG_WARN("IVSS_QUERY", "IVSS::QueryInterface returned %d\n", r) + return nil + } + + return ivss +} + +var vssBackupComponent *IVSS +var snapshotID ole.GUID +var shadowLink string + +func DeleteShadowCopy() { + if vssBackupComponent != nil { + defer vssBackupComponent.Release() + + LOG_TRACE("VSS_DELETE", "Deleting the shadow copy used for this backup") + ret, _, _ := vssBackupComponent.DeleteSnapshots(snapshotID) + if ret != 0 { + LOG_WARN("VSS_DELETE", "Failed to delete the shadow copy: %x\n", uint(ret)) + } else { + LOG_INFO("VSS_DELETE", "The shadow copy has been successfully deleted") + } + } + + if shadowLink != "" { + err := os.Remove(shadowLink) + if err != nil { + LOG_WARN("VSS_SYMLINK", "Failed to remove the symbolic link for the shadow copy: %v", err) + } + } + + ole.CoUninitialize() +} + +func CreateShadowCopy(top string, shadowCopy bool) (shadowTop string) { + + if !shadowCopy { + return top + } + + ole.CoInitialize(0) + defer ole.CoUninitialize() + + dllVssApi := syscall.NewLazyDLL("VssApi.dll") + procCreateVssBackupComponents := + dllVssApi.NewProc("?CreateVssBackupComponents@@YAJPEAPEAVIVssBackupComponents@@@Z") + if runtime.GOARCH == "386" { + procCreateVssBackupComponents = + dllVssApi.NewProc("?CreateVssBackupComponents@@YGJPAPAVIVssBackupComponents@@@Z") + } + + if len(top) < 3 || top[1] != ':' || (top[2] != '/' && top[2] != '\\') { + LOG_ERROR("VSS_PATH", "Invalid repository path: %s", top) + return top + } + volume := top[:1] + ":\\" + + LOG_INFO("VSS_CREATE", "Creating a shadow copy for %s", volume) + + var unknown *ole.IUnknown + r, _, err := procCreateVssBackupComponents.Call(uintptr(unsafe.Pointer(&unknown))) + + if r == 0x80070005 { + LOG_ERROR("VSS_CREATE", "Only administrators can create shadow copies") + return top + } + + if r != 0 { + LOG_ERROR("VSS_CREATE", "Failed to create the VSS backup component: %d", r) + return top + } + + vssBackupComponent = getIVSS(unknown, IID_IVSS) + if vssBackupComponent == nil { + LOG_ERROR("VSS_CREATE", "Failed to create the VSS backup component") + return top + } + + ret := vssBackupComponent.InitializeForBackup() + if ret != 0 { + LOG_ERROR("VSS_INIT", "Shadow copy creation failed: InitializeForBackup returned %x", uint(ret)) + return top + } + + var async *IVSSAsync + ret, async = vssBackupComponent.GatherWriterMetadata() + if ret != 0 { + LOG_ERROR("VSS_GATHER", "Shadow copy creation failed: GatherWriterMetadata returned %x", uint(ret)) + return top + } + + if async == nil { + LOG_ERROR("VSS_GATHER", + "Shadow copy creation failed: GatherWriterMetadata failed to return a valid IVssAsync object") + return top + } + + if !async.Wait(20) { + LOG_ERROR("VSS_GATHER", "Shadow copy creation failed: GatherWriterMetadata didn't finish properly") + return top + } + async.Release() + + var snapshotSetID ole.GUID + + ret = vssBackupComponent.StartSnapshotSet(&snapshotSetID) + if ret != 0 { + LOG_ERROR("VSS_START", "Shadow copy creation failed: StartSnapshotSet returned %x", uint(ret)) + return top + } + + ret = vssBackupComponent.AddToSnapshotSet(volume, &snapshotID) + if ret != 0 { + LOG_ERROR("VSS_ADD", "Shadow copy creation failed: AddToSnapshotSet returned %x", uint(ret)) + return top + } + + s, _ := ole.StringFromIID(&snapshotID) + LOG_DEBUG("VSS_ID", "Creating shadow copy %s", s) + + ret = vssBackupComponent.SetBackupState() + if ret != 0 { + LOG_ERROR("VSS_SET", "Shadow copy creation failed: SetBackupState returned %x", uint(ret)) + return top + } + + ret, async = vssBackupComponent.PrepareForBackup() + if ret != 0 { + LOG_ERROR("VSS_PREPARE", "Shadow copy creation failed: PrepareForBackup returned %x", uint(ret)) + return top + } + if async == nil { + LOG_ERROR("VSS_PREPARE", + "Shadow copy creation failed: PrepareForBackup failed to return a valid IVssAsync object") + return top + } + + if !async.Wait(20) { + LOG_ERROR("VSS_PREPARE", "Shadow copy creation failed: PrepareForBackup didn't finish properly") + return top + } + async.Release() + + ret, async = vssBackupComponent.DoSnapshotSet() + if ret != 0 { + LOG_ERROR("VSS_SNAPSHOT", "Shadow copy creation failed: DoSnapshotSet returned %x", uint(ret)) + return top + } + if async == nil { + LOG_ERROR("VSS_SNAPSHOT", + "Shadow copy creation failed: DoSnapshotSet failed to return a valid IVssAsync object") + return top + } + + if !async.Wait(60) { + LOG_ERROR("VSS_SNAPSHOT", "Shadow copy creation failed: DoSnapshotSet didn't finish properly") + return top + } + async.Release() + + + properties := SnapshotProperties { + } + + ret = vssBackupComponent.GetSnapshotProperties(snapshotID, &properties) + if ret != 0 { + LOG_ERROR("VSS_PROPERTIES", "GetSnapshotProperties returned %x", ret) + return top + } + + SnapshotIDString, _ := ole.StringFromIID(&properties.SnapshotID) + SnapshotSetIDString, _ := ole.StringFromIID(&properties.SnapshotSetID) + + LOG_DEBUG("VSS_PROPERTY", "SnapshotID: %s", SnapshotIDString) + LOG_DEBUG("VSS_PROPERTY", "SnapshotSetID: %s", SnapshotSetIDString) + + LOG_DEBUG("VSS_PROPERTY", "SnapshotDeviceObject: %s", uint16ArrayToString(properties.SnapshotDeviceObject)) + LOG_DEBUG("VSS_PROPERTY", "OriginalVolumeName: %s", uint16ArrayToString(properties.OriginalVolumeName)) + LOG_DEBUG("VSS_PROPERTY", "OriginatingMachine: %s", uint16ArrayToString(properties.OriginatingMachine)) + LOG_DEBUG("VSS_PROPERTY", "OriginatingMachine: %s", uint16ArrayToString(properties.OriginatingMachine)) + LOG_DEBUG("VSS_PROPERTY", "ServiceMachine: %s", uint16ArrayToString(properties.ServiceMachine)) + LOG_DEBUG("VSS_PROPERTY", "ExposedName: %s", uint16ArrayToString(properties.ExposedName)) + LOG_DEBUG("VSS_PROPERTY", "ExposedPath: %s", uint16ArrayToString(properties.ExposedPath)) + + LOG_INFO("VSS_DONE", "Shadow copy %s created", SnapshotIDString) + + snapshotPath := uint16ArrayToString(properties.SnapshotDeviceObject) + + shadowLink = path.Join(top, DUPLICACY_DIRECTORY) + "\\shadow" + os.Remove(shadowLink) + err = os.Symlink(snapshotPath + "\\", shadowLink) + if err != nil { + LOG_ERROR("VSS_SYMLINK", "Failed to create a symbolic link to the shadow copy just created: %v", err) + return top + } + + return shadowLink + "\\" + top[2:] + +} + + diff --git a/duplicacy_snapshot.go b/duplicacy_snapshot.go new file mode 100644 index 0000000..75699ed --- /dev/null +++ b/duplicacy_snapshot.go @@ -0,0 +1,315 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "os" + "fmt" + "time" + "path" + "strings" + "strconv" + "io/ioutil" + "encoding/json" + "encoding/hex" +) + +// Snapshot represents a backup of the repository. +type Snapshot struct { + ID string // the snapshot id; must be different for different repositories + Revision int // the revision number + Options string // options used to create this snapshot (some not included) + Tag string // user-assigned tag + StartTime int64 // at what time the snapshot was created + EndTime int64 // at what time the snapshot was done + FileSize int64 // total file size + NumberOfFiles int64 // number of files + + // A sequence of chunks whose aggregated content is the json representation of 'Files'. + FileSequence []string + + // A sequence of chunks whose aggregated content is the json representation of 'ChunkHashes'. + ChunkSequence []string + + // A sequence of chunks whose aggregated content is the json representation of 'ChunkLengths'. + LengthSequence []string + + Files []*Entry // list of files and subdirectories + + ChunkHashes []string // a sequence of chunks representing the file content + ChunkLengths []int // the length of each chunk + + Flag bool // used to mark certain snapshots for deletion or copy + + discardAttributes bool +} + +// CreateEmptySnapshot creates an empty snapshot. +func CreateEmptySnapshot (id string) (snapshto *Snapshot) { + return &Snapshot{ + ID : id, + Revision : 0, + StartTime: time.Now().Unix(), + } +} + +// CreateSnapshotFromDirectory creates a snapshot from the local directory 'top'. Only 'Files' +// will be constructed, while 'ChunkHashes' and 'ChunkLengths' can only be populated after uploading. +func CreateSnapshotFromDirectory(id string, top string) (snapshot *Snapshot, skippedDirectories []string, + skippedFiles []string, err error) { + + snapshot = &Snapshot { + ID : id, + Revision: 0, + StartTime: time.Now().Unix(), + } + + var patterns []string + patternFile, err := ioutil.ReadFile(path.Join(top, DUPLICACY_DIRECTORY, "filters")) + if err == nil { + for _, pattern := range strings.Split(string(patternFile), "\n") { + pattern = strings.TrimSpace(pattern) + if len(pattern) == 0 { + continue + } + + if pattern[0] != '+' && pattern[0] != '-' { + pattern = "+" + pattern + } + + if pattern == "+" || pattern == "-" { + continue + } + + patterns = append(patterns, pattern) + } + + LOG_INFO("SNAPSHOT_FILTER", "Loaded %d include/exclude pattern(s)", len(patterns)) + + if IsTracing() { + for _, pattern := range patterns { + LOG_TRACE("SNAPSHOT_PATTERN", "Pattern: %s", pattern) + } + } + + } + + directories := make([]*Entry, 0, 256) + directories = append(directories, CreateEntry("", 0, 0, 0)) + + snapshot.Files = make([]*Entry, 0, 256) + + attributeThreshold := 1024 * 1024 + if attributeThresholdValue, found := os.LookupEnv("DUPLICACY_ATTRIBUTE_THRESHOLD"); found && attributeThresholdValue != "" { + attributeThreshold, _ = strconv.Atoi(attributeThresholdValue) + } + + for len(directories) > 0 { + + directory := directories[len(directories) - 1] + directories = directories[:len(directories) - 1] + snapshot.Files = append(snapshot.Files, directory) + subdirectories, skipped, err := ListEntries(top, directory.Path, &snapshot.Files, patterns, snapshot.discardAttributes) + if err != nil { + LOG_WARN("LIST_FAILURE", "Failed to list subdirectory: %v", err) + skippedDirectories = append(skippedDirectories, directory.Path) + continue + } + + directories = append(directories, subdirectories...) + skippedFiles = append(skippedFiles, skipped...) + + if !snapshot.discardAttributes && len(snapshot.Files) > attributeThreshold { + LOG_INFO("LIST_ATTRIBUTES", "Discarding file attributes") + snapshot.discardAttributes = true + for _, file := range snapshot.Files { + file.Attributes = nil + } + } + } + + // Remove the root entry + snapshot.Files = snapshot.Files[1:] + + return snapshot, skippedDirectories, skippedFiles, nil +} + +// CreateSnapshotFromDescription creates a snapshot from json decription. +func CreateSnapshotFromDescription(description []byte) (snapshot *Snapshot, err error) { + + var root map[string] interface{} + + err = json.Unmarshal(description, &root) + if err != nil { + return nil, err + } + + snapshot = &Snapshot {} + + if value, ok := root["id"]; !ok { + return nil, fmt.Errorf("No id is specified in the snapshot") + } else if snapshot.ID, ok = value.(string); !ok { + return nil, fmt.Errorf("Invalid id is specified in the snapshot") + } + + if value, ok := root["revision"]; !ok { + return nil, fmt.Errorf("No revision is specified in the snapshot") + } else if _, ok = value.(float64); !ok { + return nil, fmt.Errorf("Invalid revision is specified in the snapshot") + } else { + snapshot.Revision = int(value.(float64)) + } + + if value, ok := root["tag"]; !ok { + } else if snapshot.Tag, ok = value.(string); !ok { + return nil, fmt.Errorf("Invalid tag is specified in the snapshot") + } + + if value, ok := root["options"]; !ok { + } else if snapshot.Options, ok = value.(string); !ok { + return nil, fmt.Errorf("Invalid options is specified in the snapshot") + } + + if value, ok := root["start_time"]; !ok { + return nil, fmt.Errorf("No creation time is specified in the snapshot") + } else if _, ok = value.(float64); !ok { + return nil, fmt.Errorf("Invalid creation time is specified in the snapshot") + } else { + snapshot.StartTime = int64(value.(float64)) + } + + if value, ok := root["end_time"]; !ok { + return nil, fmt.Errorf("No creation time is specified in the snapshot") + } else if _, ok = value.(float64); !ok { + return nil, fmt.Errorf("Invalid creation time is specified in the snapshot") + } else { + snapshot.EndTime = int64(value.(float64)) + } + + if value, ok := root["file_size"]; ok { + if _, ok = value.(float64); ok { + snapshot.FileSize = int64(value.(float64)) + } + } + + if value, ok := root["number_of_files"]; ok { + if _, ok = value.(float64); ok { + snapshot.NumberOfFiles = int64(value.(float64)) + } + } + + for _, sequenceType := range []string { "files", "chunks", "lengths" } { + if value, ok := root[sequenceType]; !ok { + return nil, fmt.Errorf("No %s are specified in the snapshot", sequenceType) + } else if _, ok = value.([]interface{}); !ok { + return nil, fmt.Errorf("Invalid %s are specified in the snapshot", sequenceType) + } else { + array := value.([]interface{}) + sequence := make([]string, len(array)) + for i := 0; i < len(array); i++ { + if hashInHex, ok := array[i].(string); !ok { + return nil, fmt.Errorf("Invalid file sequence is specified in the snapshot") + } else if hash, err := hex.DecodeString(hashInHex); err != nil { + return nil, fmt.Errorf("Hash %s is not a valid hex string in the snapshot", hashInHex) + } else { + sequence[i] = string(hash) + } + } + + snapshot.SetSequence(sequenceType, sequence) + } + } + + return snapshot, nil +} + +// LoadChunks construct 'ChunkHashes' from the json description. +func (snapshot *Snapshot) LoadChunks(description []byte) (err error) { + + var root [] interface {} + err = json.Unmarshal(description, &root) + if err != nil { + return err + } + + snapshot.ChunkHashes = make([]string, len(root)) + + for i, object := range root { + if hashInHex, ok := object.(string); !ok { + return fmt.Errorf("Invalid chunk hash is specified in the snapshot") + } else if hash, err := hex.DecodeString(hashInHex); err != nil { + return fmt.Errorf("The chunk hash %s is not a valid hex string", hashInHex) + } else { + snapshot.ChunkHashes[i] = string(hash) + } + } + + return err +} + +// LoadLengths construct 'ChunkLengths' from the json description. +func (snapshot *Snapshot) LoadLengths(description []byte) (err error) { + return json.Unmarshal(description, &snapshot.ChunkLengths) +} + +// MarshalJSON creates a json representation of the snapshot. +func (snapshot *Snapshot) MarshalJSON() ([] byte, error) { + + object := make(map[string]interface{}) + + object["id"] = snapshot.ID + object["revision"] = snapshot.Revision + object["options"] = snapshot.Options + object["tag"] = snapshot.Tag + object["start_time"] = snapshot.StartTime + object["end_time"] = snapshot.EndTime + + if snapshot.FileSize != 0 && snapshot.NumberOfFiles != 0 { + object["file_size"] = snapshot.FileSize + object["number_of_files"] = snapshot.NumberOfFiles + } + object["files"] = encodeSequence(snapshot.FileSequence) + object["chunks"] = encodeSequence(snapshot.ChunkSequence) + object["lengths"] = encodeSequence(snapshot.LengthSequence) + + return json.Marshal(object) +} + +// MarshalSequence creates a json represetion for the specified chunk sequence. +func (snapshot *Snapshot) MarshalSequence(sequenceType string) ([] byte, error) { + + if sequenceType == "files" { + return json.Marshal(snapshot.Files) + } else if sequenceType == "chunks" { + return json.Marshal(encodeSequence(snapshot.ChunkHashes)) + } else { + return json.Marshal(snapshot.ChunkLengths) + } +} + +// SetSequence assign a chunk sequence to the specified field. +func (snapshot *Snapshot) SetSequence(sequenceType string, sequence [] string) { + if sequenceType == "files" { + snapshot.FileSequence = sequence + } else if sequenceType == "chunks" { + snapshot.ChunkSequence = sequence + } else { + snapshot.LengthSequence = sequence + } +} + +// encodeSequence turns a sequence of binary hashes into a sequence of hex hashes. +func encodeSequence(sequence[] string) ([] string) { + + sequenceInHex := make([]string, len(sequence)) + + for i, hash := range sequence { + sequenceInHex[i] = hex.EncodeToString([]byte(hash)) + } + + return sequenceInHex +} + + diff --git a/duplicacy_snapshotmanager.go b/duplicacy_snapshotmanager.go new file mode 100644 index 0000000..1091a0e --- /dev/null +++ b/duplicacy_snapshotmanager.go @@ -0,0 +1,2256 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "io" + "os" + "fmt" + "sort" + "bytes" + "regexp" + "strconv" + "strings" + "time" + "path" + "io/ioutil" + "encoding/json" + "encoding/hex" + + "github.com/aryann/difflib" +) + +const ( + secondsInDay = 86400 +) + +// FossilCollection contains fossils and temporary files found during a snapshot deletions. +type FossilCollection struct { + + // At what time the fossil collection was finished + EndTime int64 `json:"end_time"` + + // The lastest revision for each snapshot id when the fossil collection was created. + LastRevisions map[string] int `json:"last_revisions"` + + // Fossils (i.e., chunks not referenced by any snapshots) + Fossils []string `json:"fossils"` + + // Temporary files. + Temporaries []string `json:"temporaries"` +} + +// CreateFossilCollection creates an empty fossil collection +func CreateFossilCollection(allSnapshots map[string][] *Snapshot) *FossilCollection{ + + lastRevisions := make(map[string] int) + for id, snapshots := range allSnapshots { + lastRevisions[id] = snapshots[len(snapshots) - 1].Revision + } + + return &FossilCollection { + LastRevisions : lastRevisions, + } +} + +// IsDeletable determines if the previously collected fossils are safe to be permanently removed. If so, it will +// also returns a number of snapshots that were created during or after these fossils were being collected. +// Therefore, some fossils may be referenced by these new snapshots and they must be resurrected. +func (collection *FossilCollection) IsDeletable(isStrongConsistent bool, ignoredIDs [] string, + allSnapshots map[string][] *Snapshot) (isDeletable bool, newSnapshots []*Snapshot) { + + hasNewSnapshot := make(map[string]bool) + lastSnapshotTime := make(map[string]int64) + for snapshotID, snapshotList := range allSnapshots { + + if len(snapshotList) == 0 { + continue + } + + ignored := false + for _, ignoredID := range ignoredIDs { + if snapshotID == ignoredID { + ignored = true + } + } + + if ignored { + LOG_INFO("SNAPSHOT_NOT_CONSIDERED", "Ignored snapshot %s", snapshotID) + continue + } + + lastRevision := collection.LastRevisions[snapshotID] + + // We want to handle snapshot ids such as 'repo@host' so that one new snapshot from that host means other + // repositories on the same host are safe -- because presumably one host can do one backup at a time. + hostID := snapshotID + if strings.Contains(hostID, "@") { + hostID = strings.SplitN(hostID, "@", 2)[1] + } + + if _, found := hasNewSnapshot[hostID]; !found { + hasNewSnapshot[hostID] = false + lastSnapshotTime[hostID] = 0 + } + + for _, snapshot := range snapshotList { + + if snapshot.Revision <= lastRevision { + // This is an old snapshot known by this fossil collection + continue + } + + extraTime := 0 + if !isStrongConsistent { + extraTime = secondsInDay / 2 + } + + // If this snapshot ends before this fossil collection, then it is still possible that another snapshot + // might be in progress (although very unlikely). So we only deem it deletable if that is not the case. + if snapshot.EndTime > collection.EndTime + int64(extraTime){ + hasNewSnapshot[hostID] = true + newSnapshots = append(newSnapshots, snapshot) + break + } else { + LOG_TRACE("SNAPSHOT_UNDELETABLE", + "New snapshot %s revision %d doesn't meet the fossil deletion criteria", + snapshot.ID, snapshot.Revision) + } + } + + if !hasNewSnapshot[hostID] { + LOG_TRACE("SNAPSHOT_NO_NEW", "No new snapshot from %s since the fossil collection step", snapshotID) + } + + lastSnapshot := allSnapshots[snapshotID][len(allSnapshots[snapshotID]) - 1] + if lastSnapshot.EndTime > lastSnapshotTime[hostID] { + lastSnapshotTime[hostID] = lastSnapshot.EndTime + } + } + + maxSnapshotRunningTime := int64(7) + + for hostID, value := range hasNewSnapshot { + if value == false { + // In case of a dormant repository, a fossil collection is safe if no new snapshot has been seen for a + // snapshot id during the last 7 days. A snapshot created at the roughly same time as this fossil + // collection would have finsihed already, while a snapshot currently being created does not affect + // this fossil collection. + if lastSnapshotTime[hostID] > 0 && lastSnapshotTime[hostID] < time.Now().Unix() - maxSnapshotRunningTime * secondsInDay { + LOG_INFO("SNAPSHOT_INACTIVE", "Ignore snapshot %s whose last revision was created %d days ago", + hostID, maxSnapshotRunningTime) + continue + } + return false, nil + } + } + + return true, newSnapshots +} + +func (collection *FossilCollection) AddFossil(hash string) { + collection.Fossils = append(collection.Fossils, hash) +} + +func (collection *FossilCollection) AddTemporary(temporary string) { + collection.Temporaries = append(collection.Temporaries, temporary) +} + +func (collection *FossilCollection) IsEmpty() bool { + return len(collection.Fossils) == 0 && len(collection.Temporaries) == 0 +} + +// SnapshotManager is mainly responsible for downloading, and deleting snapshots. +type SnapshotManager struct { + + // These are variables shared with the backup manager + config *Config + storage Storage + fileChunk *Chunk + snapshotCache *FileStorage + + chunkDownloader *ChunkDownloader + +} + +// CreateSnapshotManager creates a snapshot manager +func CreateSnapshotManager(config *Config, storage Storage) *SnapshotManager { + + manager := &SnapshotManager { + config: config, + storage: storage, + fileChunk: CreateChunk(config, true), + } + + return manager +} + +// DownloadSnapshot downloads the specified snapshot. +func (manager *SnapshotManager) DownloadSnapshot(snapshotID string, revision int) *Snapshot { + + snapshotDir := fmt.Sprintf("snapshots/%s", snapshotID) + manager.storage.CreateDirectory(0, snapshotDir) + manager.snapshotCache.CreateDirectory(0, snapshotDir) + + snapshotPath := fmt.Sprintf("snapshots/%s/%d", snapshotID, revision) + + // We must check if the snapshot file exists in the storage, because the snapshot cache may store a copy of the + // file even if the snapshot has been deleted in the storage (possibly by a different client) + exist, _, _, err := manager.storage.GetFileInfo(0, snapshotPath) + if err != nil { + LOG_ERROR("SNAPSHOT_INFO", "Failed to get the information on the snapshot %s at revision %d: %v", + snapshotID, revision, err) + return nil + } + + if !exist { + LOG_ERROR("SNAPSHOT_NOT_EXIST", "Snapshot %s at revision %d does not exist", snapshotID, revision) + return nil + + } + + description := manager.DownloadFile(snapshotPath, snapshotPath) + + snapshot, err := CreateSnapshotFromDescription(description) + + if err != nil { + LOG_ERROR("SNAPSHOT_PARSE", "Failed to parse the snapshot %s at revision %d: %v", snapshotID, revision, err) + return nil + } + + // Overwrite the snapshot ID; this allows snapshot dirs to be renamed freely + snapshot.ID = snapshotID + + return snapshot +} + +// sequenceReader loads the chunks pointed to by 'sequence' one by one as needed. This avoid loading all chunks into +// the memory before passing them to the json unmarshaller. +type sequenceReader struct { + sequence [] string + buffer *bytes.Buffer + index int + refillFunc func(hash string) ([]byte) +} + +// Read reads a new chunk using the refill function when there is no more data in the buffer +func (reader *sequenceReader)Read(data []byte) (n int, err error) { + if len(reader.buffer.Bytes()) == 0 { + if reader.index < len(reader.sequence) { + newData := reader.refillFunc(reader.sequence[reader.index]) + reader.buffer.Write(newData) + reader.index++ + } else { + return 0, io.EOF + } + } + + return reader.buffer.Read(data) +} + +func (manager *SnapshotManager) CreateChunkDownloader() { + if manager.chunkDownloader == nil { + manager.chunkDownloader = CreateChunkDownloader(manager.config, manager.storage, manager.snapshotCache, false, 1) + } +} + +// DownloadSequence returns the content represented by a sequence of chunks. +func (manager *SnapshotManager) DownloadSequence(sequence []string) (content []byte) { + manager.CreateChunkDownloader() + for _, chunkHash := range sequence { + i := manager.chunkDownloader.AddChunk(chunkHash) + chunk := manager.chunkDownloader.WaitForChunk(i) + content = append(content, chunk.GetBytes()...) + } + + return content +} + +func (manager *SnapshotManager) DownloadSnapshotFileSequence(snapshot *Snapshot, patterns []string) bool { + + manager.CreateChunkDownloader() + + reader := sequenceReader { + sequence: snapshot.FileSequence, + buffer: new(bytes.Buffer), + refillFunc: func (chunkHash string) ([]byte) { + i := manager.chunkDownloader.AddChunk(chunkHash) + chunk := manager.chunkDownloader.WaitForChunk(i) + return chunk.GetBytes() + }, + } + + files := make([] *Entry, 0) + decoder := json.NewDecoder(&reader) + + // read open bracket + _, err := decoder.Token() + if err != nil { + LOG_ERROR("SNAPSHOT_PARSE", "Failed to load files specified in the snapshot %s at revision %d: not a list of entries", + snapshot.ID, snapshot.Revision) + return false + } + + // while the array contains values + for decoder.More() { + var entry Entry + err = decoder.Decode(&entry) + if err != nil { + LOG_ERROR("SNAPSHOT_PARSE", "Failed to load files specified in the snapshot %s at revision %d: %v", + snapshot.ID, snapshot.Revision, err) + return false + } + + if patterns == nil { + entry.Attributes = nil + } else if len(patterns) != 0 { + if !MatchPath(entry.Path, patterns) { + entry.Attributes = nil + } + } + + files = append(files, &entry) + } + snapshot.Files = files + return true +} + + +// DownloadSnapshotSequence downloads the content represented by a sequence of chunks, and then unmarshal the content +// using the specified 'loadFunction'. It purpose is to decode the chunk sequences representing chunk hashes or chunk lengths +// in a snapshot. +func (manager *SnapshotManager) DownloadSnapshotSequence(snapshot *Snapshot, sequenceType string) bool { + + sequence := snapshot.ChunkSequence + loadFunc := snapshot.LoadChunks + + if sequenceType == "lengths" { + sequence = snapshot.LengthSequence + loadFunc = snapshot.LoadLengths + } + + content := manager.DownloadSequence(sequence) + + + if len(content) == 0 { + LOG_ERROR("SNAPSHOT_PARSE", "Failed to load %s specified in the snapshot %s at revision %d", + sequenceType, snapshot.ID, snapshot.Revision) + return false + } + + err := loadFunc(content) + if err != nil { + LOG_ERROR("SNAPSHOT_PARSE", "Failed to load %s specified in the snapshot %s at revision %d: %v", + sequenceType, snapshot.ID, snapshot.Revision, err) + return false + } + return true +} + +// DownloadSnapshotContents loads all chunk sequences in a snapshot. A snapshot, when just created, only contains +// some metadata and theree sequence representing files, chunk hashes, and chunk lengths. This function must be called +// for the actual content of the snapshot to be usable. +func (manager *SnapshotManager) DownloadSnapshotContents(snapshot *Snapshot, patterns []string) bool { + + manager.DownloadSnapshotFileSequence(snapshot, patterns) + manager.DownloadSnapshotSequence(snapshot, "chunks") + manager.DownloadSnapshotSequence(snapshot, "lengths") + + err := manager.CheckSnapshot(snapshot) + if err != nil { + LOG_ERROR("SNAPSHOT_CHECK", "The snapshot %s at revision %d contains an error: %v", + snapshot.ID, snapshot.Revision, err) + return false + } + + return true +} + +// CleanSnapshotCache removes all files not referenced by the specified 'snapshot' in the snapshot cache. +func (manager *SnapshotManager) CleanSnapshotCache(latestSnapshot *Snapshot, allSnapshots map[string] []*Snapshot) bool { + + if allSnapshots == nil { + // If the 'fossils' directory exists then don't clean the cache as all snapshots will be needed later + // during the fossil collection phase. The deletion procedure creates this direcotry. + // We only check this condition when allSnapshots is nil because + // in thise case it is the deletion procedure that is trying to clean the snapshot cache. + exist, _, _, err := manager.snapshotCache.GetFileInfo(0, "fossils") + + if err != nil { + LOG_ERROR("SNAPSHOT_CLEAN", "Failed to list the snapshot cache: %v", err) + return false + } + + if exist { + return true + } + } + + // This stores all chunks we want to keep + chunks := make(map[string]bool) + + if latestSnapshot != nil { + for _, chunkID := range manager.GetSnapshotChunks(latestSnapshot) { + chunks[chunkID] = true + } + } + + allSnapshotFiles := make(map[string]bool) + + for snapshotID, snapshotList := range allSnapshots { + for _, snapshot := range snapshotList { + allSnapshotFiles[fmt.Sprintf("%s/%d", snapshotID, snapshot.Revision)] = false + } + } + + if latestSnapshot != nil { + allSnapshotFiles[fmt.Sprintf("%s/%d", latestSnapshot.ID, latestSnapshot.Revision)] = false + } + + allCachedSnapshots, _ := manager.ListAllFiles(manager.snapshotCache, "snapshots/") + for _, snapshotFile := range allCachedSnapshots { + if snapshotFile[len(snapshotFile) - 1] == '/' { + continue + } + + if _, found := allSnapshotFiles[snapshotFile]; !found { + LOG_DEBUG("SNAPSHOT_CLEAN", "Delete cached snapshot file %s not found in the storage", snapshotFile) + manager.snapshotCache.DeleteFile(0, path.Join("snapshots", snapshotFile)) + continue + } + + description, err := ioutil.ReadFile(path.Join(manager.snapshotCache.storageDir, + "snapshots", snapshotFile)) + if err != nil { + LOG_WARN("SNAPSHOT_CACHE", "Failed to read the cached snapshot file: %v", err) + continue + } + + cachedSnapshot, err := CreateSnapshotFromDescription(description) + + if err != nil { + LOG_ERROR("SNAPSHOT_CACHE", "Failed to parse the cached snapshot file %s: %v", snapshotFile, err) + continue + } + + isComplete := true + for _, chunkHash := range cachedSnapshot.ChunkSequence { + chunkID := manager.config.GetChunkIDFromHash(chunkHash) + + if _, exist, _, _ := manager.snapshotCache.FindChunk(0, chunkID, false); !exist { + if _, exist, _, _ = manager.storage.FindChunk(0, chunkID, false); !exist { + isComplete = false + break + } + } + } + + if !isComplete { + LOG_DEBUG("SNAPSHOT_CLEAN", "Delete cached snapshot file %s with nonexistent chunks", snapshotFile) + manager.snapshotCache.DeleteFile(0, path.Join("snapshots", snapshotFile)) + continue + } + + for _, chunkHash := range cachedSnapshot.ChunkSequence { + chunkID := manager.config.GetChunkIDFromHash(chunkHash) + LOG_DEBUG("SNAPSHOT_CLEAN", "Snapshot %s revision %d needs chunk %s", cachedSnapshot.ID, cachedSnapshot.Revision, chunkID) + chunks[chunkID] = true + } + } + + allFiles, _ := manager.ListAllFiles(manager.snapshotCache, "chunks/") + for _, file := range allFiles { + if file[len(file) - 1] != '/' { + chunkID := strings.Replace(file, "/", "", -1) + if _, found := chunks[chunkID]; !found { + LOG_DEBUG("SNAPSHOT_CLEAN", "Delete chunk %s from the snapshot cache", chunkID) + err := manager.snapshotCache.DeleteFile(0, path.Join("chunks", file)) + if err != nil { + LOG_WARN("SNAPSHOT_CLEAN", "Failed to remove the chunk %s from the snapshot cache: %v", + file, err) + } + } + } + } + + return true + +} + +// ListSnapshotIDs returns all snapshot ids. +func (manager *SnapshotManager) ListSnapshotIDs() (snapshotIDs [] string, err error) { + + LOG_TRACE("SNAPSHOT_LIST_IDS", "Listing all snapshot ids") + + dirs, _, err := manager.storage.ListFiles(0, "snapshots/") + if err != nil { + return nil, err + } + + for _, dir := range dirs { + if len(dir) > 0 && dir[len(dir) - 1] == '/' { + snapshotIDs = append(snapshotIDs, dir[:len(dir) - 1]) + } + } + + return snapshotIDs, nil +} + +// ListSnapshotRevisions returns the list of all revisions given a snapshot id. +func (manager *SnapshotManager) ListSnapshotRevisions(snapshotID string) (revisions [] int, err error) { + + LOG_TRACE("SNAPSHOT_LIST_REVISIONS", "Listing revisions for snapshot %s", snapshotID) + + snapshotDir := fmt.Sprintf("snapshots/%s/", snapshotID) + + err = manager.storage.CreateDirectory(0, snapshotDir) + if err != nil { + return nil, err + } + + err = manager.snapshotCache.CreateDirectory(0, snapshotDir) + if err != nil { + LOG_WARN("SNAPSHOT_CACHE_DIR", "Failed to create the snapshot cache directory %s: %v", snapshotDir, err) + } + + files, _, err := manager.storage.ListFiles(0, snapshotDir) + if err != nil { + return nil, err + } + + for _, file := range files { + if len(file) > 0 && file[len(file) - 1] != '/' { + revision, err := strconv.Atoi(file) + if err == nil { + revisions = append(revisions, revision) + } + } + } + + sort.Ints(revisions) + + return revisions, nil +} + +// DownloadLatestSnapshot downloads the snapshot with the largest revision number. +func (manager *SnapshotManager) downloadLatestSnapshot(snapshotID string) (remote *Snapshot) { + + LOG_TRACE("SNAPSHOT_DOWNLOAD_LATEST", "Downloading latest revision for snapshot %s", snapshotID) + + revisions, err := manager.ListSnapshotRevisions(snapshotID) + + if err != nil { + LOG_ERROR("SNAPSHOT_LIST", "Failed to list the revisions of the snapshot %s: %v", snapshotID, err) + return nil + } + + latest := 0 + for _, revision := range revisions { + if revision > latest { + latest = revision + } + } + + if latest > 0 { + remote = manager.DownloadSnapshot(snapshotID, latest) + } + + if remote != nil { + manager.DownloadSnapshotContents(remote, nil) + } + + return remote +} + +// ListAllFiles return all files and subdirectories in the subtree of the 'top' directory in the specified 'storage'. +func (manager *SnapshotManager) ListAllFiles(storage Storage, top string) (allFiles []string, allSizes []int64) { + + directories := make([]string, 0, 1024) + + directories = append(directories, top) + + for len(directories) > 0 { + + dir := directories[len(directories) - 1] + directories = directories[:len(directories) - 1] + + LOG_TRACE("LIST_FILES", "Listing %s", dir) + + files, sizes, err := storage.ListFiles(0, dir) + if err != nil { + LOG_ERROR("LIST_FILES", "Failed to list the directory %s: %v", dir, err) + return nil, nil + } + + if len(dir) > len(top) { + allFiles = append(allFiles, dir[len(top):]) + allSizes = append(allSizes, 0) + } + + for i, file := range files { + if len(file) > 0 && file[len(file) - 1] == '/' { + directories = append(directories, dir + file) + } else { + allFiles = append(allFiles, (dir + file)[len(top):]) + allSizes = append(allSizes, sizes[i]) + } + } + + if top == "chunks/" { + // We're listing all chunks so this is the perfect place to detect if a directory contains too many + // chunks. Create sub-directories if necessary + if len(files) > 1024 && !storage.IsFastListing() { + for i := 0; i < 256; i++ { + subdir := dir + fmt.Sprintf("%02x\n", i) + manager.storage.CreateDirectory(0, subdir) + } + } + } else { + // Remove chunk sub-directories that are empty + if len(files) == 0 && strings.HasPrefix(dir, "chunks/") && dir != "chunks/" { + storage.DeleteFile(0, dir) + } + } + } + + return allFiles, allSizes +} + +// GetSnapshotChunks returns all chunks referenced by a given snapshot. +func (manager *SnapshotManager) GetSnapshotChunks(snapshot *Snapshot) (chunks [] string) { + + for _, chunkHash := range snapshot.FileSequence { + chunks = append(chunks, manager.config.GetChunkIDFromHash(chunkHash)) + } + + for _, chunkHash := range snapshot.ChunkSequence { + chunks = append(chunks, manager.config.GetChunkIDFromHash(chunkHash)) + } + + for _, chunkHash := range snapshot.LengthSequence { + chunks = append(chunks, manager.config.GetChunkIDFromHash(chunkHash)) + } + + if len(snapshot.ChunkHashes) == 0 { + + description := manager.DownloadSequence(snapshot.ChunkSequence) + err := snapshot.LoadChunks(description) + if err != nil { + LOG_ERROR("SNAPSHOT_CHUNK", "Failed to load chunks for snapshot %s at revision %d: %v", + snapshot.ID, snapshot.Revision, err) + return nil + } + } + + for _, chunkHash := range snapshot.ChunkHashes { + chunks = append(chunks, manager.config.GetChunkIDFromHash(chunkHash)) + } + + return chunks +} + +// ListSnapshots shows the information about a snapshot. +func (manager *SnapshotManager) ListSnapshots(snapshotID string, revisionsToList []int, tag string, + showFiles bool, showChunks bool) int { + + LOG_DEBUG("LIST_PARAMETERS", "id: %s, revisions: %v, tag: %s, showFiles: %t, showChunks: %t", + snapshotID, revisionsToList, tag, showFiles, showChunks) + + var snapshotIDs [] string + var err error + + if snapshotID == "" { + snapshotIDs, err = manager.ListSnapshotIDs() + if err != nil { + LOG_ERROR("SNAPSHOT_LIST", "Failed to list all snpashots: %v", err) + return 0 + } + } else { + snapshotIDs = []string { snapshotID } + } + + numberOfSnapshots := 0 + + for _, snapshotID = range snapshotIDs { + + revisions := revisionsToList + if len(revisions) == 0 { + revisions, err = manager.ListSnapshotRevisions(snapshotID) + if err != nil { + LOG_ERROR("SNAPSHOT_LIST", "Failed to list all revisions for snapshot %s: %v", snapshotID, err) + return 0 + } + } + + for _, revision := range revisions { + + snapshot := manager.DownloadSnapshot(snapshotID, revision) + creationTime := time.Unix(snapshot.StartTime, 0).Format("2006-01-02 15:04") + tagWithSpace := "" + if len(snapshot.Tag) > 0 { + tagWithSpace = snapshot.Tag + " " + } + LOG_INFO("SNAPSHOT_INFO", "Snapshot %s revision %d created at %s %s%s", + snapshotID, revision, creationTime, tagWithSpace, snapshot.Options) + + if tag != "" && snapshot.Tag != tag { + continue + } + + if showFiles { + manager.DownloadSnapshotFileSequence(snapshot, nil) + } + + if showFiles { + maxSize := int64(9) + maxSizeDigits := 1 + totalFiles := 0 + totalFileSize := int64(0) + lastChunk := 0 + + for _, file := range snapshot.Files { + if file.IsFile() { + totalFiles++ + totalFileSize += file.Size + if file.Size > maxSize { + maxSize = maxSize * 10 + 9 + maxSizeDigits += 1 + } + if file.EndChunk > lastChunk { + lastChunk = file.EndChunk + } + } + } + + for _, file := range snapshot.Files { + if file.IsFile() { + LOG_INFO("SNAPSHOT_FILE", "%s", file.String(maxSizeDigits)) + } + } + + metaChunks := len(snapshot.FileSequence) + len(snapshot.ChunkSequence) + len(snapshot.LengthSequence) + LOG_INFO("SNAPSHOT_STATS", "Files: %d, total size: %d, file chunks: %d, metadata chunks: %d", + totalFiles, totalFileSize, lastChunk + 1, metaChunks) + } + + if showChunks { + for _, chunkID := range manager.GetSnapshotChunks(snapshot) { + LOG_INFO("SNAPSHOT_CHUNKS", "chunk: %s", chunkID) + } + } + + numberOfSnapshots++ + } + } + + return numberOfSnapshots + +} + +// ListSnapshots shows the information about a snapshot. +func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToCheck []int, tag string, showStatistics bool, + checkFiles bool, searchFossils bool, resurrect bool) bool { + + LOG_DEBUG("LIST_PARAMETERS", "id: %s, revisions: %v, tag: %s, showStatistics: %t, checkFiles: %t, searchFossils: %t, resurrect: %t", + snapshotID, revisionsToCheck, tag, showStatistics, checkFiles, searchFossils, resurrect) + + snapshotMap := make(map[string] [] *Snapshot) + var err error + + // Stores the chunk file size for each chunk + chunkSizeMap := make(map[string]int64) + + // Indicate whether or not a chunk is shared by multiple snapshots + chunkUniqueMap := make(map[string]bool) + + // Store the index of the snapshot that references each chunk; if the chunk is shared by multiple chunks, the index is -1 + chunkSnapshotMap := make(map[string]int) + + LOG_INFO("SNAPSHOT_CHECK", "Listing all chunks") + allChunks, allSizes := manager.ListAllFiles(manager.storage, "chunks/") + + for i, chunk := range allChunks { + if len(chunk) == 0 || chunk[len(chunk) - 1] == '/'{ + continue + } + + if strings.HasSuffix(chunk, ".fsl") { + continue + } + + chunk = strings.Replace(chunk, "/", "", -1) + chunkSizeMap[chunk] = allSizes[i] + } + + if snapshotID == "" || showStatistics { + snapshotIDs, err := manager.ListSnapshotIDs() + if err != nil { + LOG_ERROR("SNAPSHOT_LIST", "Failed to list all snpashots: %v", err) + return false + } + + for _, snapshotID := range snapshotIDs { + snapshotMap[snapshotID] = nil + } + + } else { + snapshotMap[snapshotID] = nil + } + + + snapshotIDIndex := 0 + for snapshotID, _ = range snapshotMap { + + revisions := revisionsToCheck + if len(revisions) == 0 || showStatistics { + revisions, err = manager.ListSnapshotRevisions(snapshotID) + if err != nil { + LOG_ERROR("SNAPSHOT_LIST", "Failed to list all revisions for snapshot %s: %v", snapshotID, err) + return false + } + } + + for _, revision := range revisions { + snapshot := manager.DownloadSnapshot(snapshotID, revision) + snapshotMap[snapshotID] = append(snapshotMap[snapshotID], snapshot) + + if tag != "" && snapshot.Tag != tag { + continue + } + + if checkFiles { + manager.DownloadSnapshotContents(snapshot, nil) + manager.VerifySnapshot(snapshot) + continue + } + + chunks := make(map[string]bool) + for _, chunkID := range manager.GetSnapshotChunks(snapshot) { + chunks[chunkID] = true + } + + missingChunks := 0 + for chunkID, _ := range chunks { + + _, found := chunkSizeMap[chunkID] + + if !found { + if !searchFossils { + missingChunks += 1 + LOG_WARN("SNAPHOST_VALIDATE", + "Chunk %s referenced by snapshot %s at revision %d does not exist", + chunkID, snapshotID, revision) + continue + } + + chunkPath, exist, size, err := manager.storage.FindChunk(0, chunkID, true) + if err != nil { + LOG_ERROR("SNAPHOST_VALIDATE", "Failed to check the existence of chunk %s: %v", + chunkID, err) + return false + } + + if !exist { + missingChunks += 1 + LOG_WARN("SNAPHOST_VALIDATE", + "Chunk %s referenced by snapshot %s at revision %d does not exist", + chunkID, snapshotID, revision) + continue + } + + if resurrect { + manager.resurrectChunk(chunkPath, chunkID) + } else { + LOG_WARN("SNAPHOST_FOSSIL", "Chunk %s referenced by snapshot %s at revision %d " + + "has been marked as a fossil", chunkID, snapshotID, revision) + } + + chunkSizeMap[chunkID] = size + } + + if unique, found := chunkUniqueMap[chunkID]; !found { + chunkUniqueMap[chunkID] = true + } else { + if unique { + chunkUniqueMap[chunkID] = false + } + } + + if previousSnapshotIDIndex, found := chunkSnapshotMap[chunkID]; !found { + chunkSnapshotMap[chunkID] = snapshotIDIndex + } else if previousSnapshotIDIndex != snapshotIDIndex && previousSnapshotIDIndex != -1 { + chunkSnapshotMap[chunkID] = -1 + } + } + + if missingChunks > 0 { + LOG_ERROR("SNAPSHOT_CHECK", "Some chunks referenced by snapshot %s at revision %d are missing", + snapshotID, revision) + return false + } + + LOG_INFO("SNAPSHOT_CHECK", "All chunks referenced by snapshot %s at revision %d exist", + snapshotID, revision) + } + + snapshotIDIndex += 1 + } + + + if showStatistics { + for snapshotID, snapshotList := range snapshotMap { + + snapshotChunks := make(map[string]bool) + + for _, snapshot := range snapshotList { + + chunks := make(map[string]bool) + for _, chunkID := range manager.GetSnapshotChunks(snapshot) { + chunks[chunkID] = true + snapshotChunks[chunkID] = true + } + + var totalChunkSize int64 + var uniqueChunkSize int64 + + for chunkID, _ := range chunks { + chunkSize := chunkSizeMap[chunkID] + totalChunkSize += chunkSize + if chunkUniqueMap[chunkID] { + uniqueChunkSize += chunkSize + } + } + + files := "" + if snapshot.FileSize != 0 && snapshot.NumberOfFiles != 0 { + files = fmt.Sprintf("%d files (%s bytes), ", snapshot.NumberOfFiles, PrettyNumber(snapshot.FileSize)) + } + LOG_INFO("SNAPSHOT_CHECK", "Snapshot %s at revision %d: %s%s total chunk bytes, %s unique chunk bytes", + snapshot.ID, snapshot.Revision, files, PrettyNumber(totalChunkSize), PrettyNumber(uniqueChunkSize)) + } + + var totalChunkSize int64 + var uniqueChunkSize int64 + for chunkID, _ := range snapshotChunks { + chunkSize := chunkSizeMap[chunkID] + totalChunkSize += chunkSize + + if chunkSnapshotMap[chunkID] != -1 { + uniqueChunkSize += chunkSize + } + } + LOG_INFO("SNAPSHOT_CHECK", "Snapshot %s all revisions: %s total chunk bytes, %s unique chunk bytes", + snapshotID, PrettyNumber(totalChunkSize), PrettyNumber(uniqueChunkSize)) + } + } + + return true + +} + +// ConvertSequence converts a sequence of chunk hashes into a sequence of chunk ids. +func (manager *SnapshotManager) ConvertSequence(sequence []string) (result []string) { + result = make([]string, len(sequence)) + for i, hash := range sequence { + result[i] = manager.config.GetChunkIDFromHash(hash) + } + return result +} + +// PrintSnapshot prints the snapshot in the json format (with chunk hasher converted into chunk ids) +func (manager *SnapshotManager) PrintSnapshot(snapshot *Snapshot) bool { + + object := make(map[string]interface{}) + + object["id"] = snapshot.ID + object["revision"] = snapshot.Revision + object["tag"] = snapshot.Tag + object["start_time"] = snapshot.StartTime + object["end_time"] = snapshot.EndTime + + object["file_sequence"] = manager.ConvertSequence(snapshot.FileSequence) + object["chunk_sequence"] = manager.ConvertSequence(snapshot.ChunkSequence) + object["length_sequence"] = manager.ConvertSequence(snapshot.LengthSequence) + + object["chunks"] = manager.ConvertSequence(snapshot.ChunkHashes) + object["lengths"] = snapshot.ChunkLengths + + // By default the json serialization of a file entry contains the path in base64 format. This is + // to convert every file entry into an object which include the path in a more readable format. + var files []map[string]interface{} + for _, file := range snapshot.Files { + files = append(files, file.convertToObject(false)) + } + object["files"] = files + + description, err := json.MarshalIndent(object, "", " ") + + if err != nil { + LOG_ERROR("SNAPSHOT_PRINT", "Failed to marshal the snapshot %s at revision %d: %v", + snapshot.ID, snapshot.Revision, err) + return false + } + + fmt.Printf("%s\n", string(description)) + + return true +} + +// VerifySnapshot verifies that every file in the snapshot has the correct hash. It does this by downloading chunks +// and computing the whole file hash for each file. +func (manager *SnapshotManager) VerifySnapshot(snapshot *Snapshot) bool { + + err := manager.CheckSnapshot(snapshot) + + if err != nil { + LOG_ERROR("SNAPSHOT_CHECK", "Snapshot %s at revision %d has an error: %v", + snapshot.ID, snapshot.Revision, err) + return false + } + + files := make([]*Entry, 0, len(snapshot.Files) / 2) + for _, file := range snapshot.Files { + if file.IsFile() && file.Size != 0 { + files = append(files, file) + } + } + + sort.Sort(ByChunk(files)) + corruptedFiles := 0 + for _, file := range files { + if !manager.RetrieveFile(snapshot, file, func([]byte) {} ) { + corruptedFiles++ + } + LOG_TRACE("SNAPSHOT_VERIFY", "%s", file.Path) + } + + if corruptedFiles > 0 { + LOG_WARN("SNAPSHOT_VERIFY", "Snapshot %s at revision %d contains %d corrupted files", + snapshot.ID, snapshot.Revision, corruptedFiles) + return false + } else { + LOG_INFO("SNAPSHOT_VERIFY", "All files in snapshot %s at revision %d have been successfully verified", + snapshot.ID, snapshot.Revision) + return true + } +} + +// RetrieveFile retrieve the file in the specifed snapshot. +func (manager *SnapshotManager) RetrieveFile(snapshot *Snapshot, file *Entry, output func([]byte)()) bool { + + if file.Size == 0 { + return true + } + + manager.CreateChunkDownloader() + + fileHasher := manager.config.NewFileHasher() + alternateHash := false + if strings.HasPrefix(file.Hash, "#") { + alternateHash = true + } + + var chunk *Chunk + currentHash := "" + + for i := file.StartChunk; i <= file.EndChunk; i++ { + start := 0 + if i == file.StartChunk { + start = file.StartOffset + } + end := snapshot.ChunkLengths[i] + if i == file.EndChunk { + end = file.EndOffset + } + + hash := snapshot.ChunkHashes[i] + if currentHash != hash { + i := manager.chunkDownloader.AddChunk(hash) + chunk = manager.chunkDownloader.WaitForChunk(i) + currentHash = hash + } + + output(chunk.GetBytes()[start:end]) + if alternateHash { + fileHasher.Write([]byte(hex.EncodeToString([]byte(hash)))) + } else { + fileHasher.Write(chunk.GetBytes()[start:end]) + } + } + + fileHash := hex.EncodeToString(fileHasher.Sum(nil)) + if alternateHash { + fileHash = "#" + fileHash + } + if strings.ToLower(fileHash) != strings.ToLower(file.Hash) { + LOG_WARN("SNAPSHOT_HASH", "File %s has mismatched hashes: %s vs %s", file.Path, file.Hash, fileHash) + return false + } + return true +} + +// FindFile returns the file entry that has the given file name. +func (manager *SnapshotManager) FindFile(snapshot *Snapshot, filePath string) (*Entry) { + for _, entry := range snapshot.Files { + if entry.Path == filePath { + return entry + } + } + + LOG_ERROR("SNAPSHOT_FIND", "No file %s found in snapshot %s at revision %d", + filePath, snapshot.ID, snapshot.Revision) + return nil +} + +// PrintFile prints the specified file or the snapshot to stdout. +func (manager *SnapshotManager) PrintFile(snapshotID string, revision int, path string) bool { + + LOG_DEBUG("PRINT_PARAMETERS", "id: %s, revision: %d, path: %s", snapshotID, revision, path) + + var snapshot *Snapshot + + if revision <= 0 { + snapshot = manager.downloadLatestSnapshot(snapshotID) + if snapshot == nil { + LOG_ERROR("SNAPSHOT_PRINT", "No previous snapshot %s is not found", snapshotID) + return false + } + } else { + snapshot = manager.DownloadSnapshot(snapshotID, revision) + } + + if snapshot == nil { + return false + } + + patterns := []string{} + if path != "" { + patterns = []string{path} + } + + if !manager.DownloadSnapshotContents(snapshot, patterns) { + return false + } + + if path == "" { + manager.PrintSnapshot(snapshot) + return true + } + + file := manager.FindFile(snapshot, path) + var content [] byte + if !manager.RetrieveFile(snapshot, file, func(chunk []byte) { content = append(content, chunk...) }) { + LOG_ERROR("SNAPSHOT_RETRIEVE", "File %s is corrupted in snapshot %s at revision %d", + path, snapshot.ID, snapshot.Revision) + return false + } + + fmt.Printf("%s\n", string(content)) + + return true +} + +// Diff compares two snapshots, or two revision of a file if the file argument is given. +func (manager *SnapshotManager) Diff(top string, snapshotID string, revisions []int, + filePath string, compareByHash bool) bool { + + LOG_DEBUG("DIFF_PARAMETERS", "top: %s, id: %s, revision: %v, path: %s, compareByHash: %t", + top, snapshotID, revisions, filePath, compareByHash) + + var leftSnapshot *Snapshot + var rightSnapshot *Snapshot + var err error + + // If no or only one revision is specified, use the on-disk version for the right-hand side. + if len(revisions) <= 1 { + // Only scan the repository if filePath is not provided + if len(filePath) == 0 { + rightSnapshot, _, _, err = CreateSnapshotFromDirectory(snapshotID, top) + if err != nil { + LOG_ERROR("SNAPSHOT_LIST", "Failed to list the directory %s: %v", top, err) + return false + } + } + } else { + rightSnapshot = manager.DownloadSnapshot(snapshotID, revisions[1]) + } + + // If no revision is specified, use the latest revision as the left-hand side. + if len(revisions) < 1 { + leftSnapshot = manager.downloadLatestSnapshot(snapshotID) + if leftSnapshot == nil { + LOG_ERROR("SNAPSHOT_DIFF", "No previous snapshot %s is not found", snapshotID) + return false + } + } else { + leftSnapshot = manager.DownloadSnapshot(snapshotID, revisions[0]) + } + + + if len(filePath) > 0 { + + manager.DownloadSnapshotContents(leftSnapshot, nil) + if rightSnapshot != nil && rightSnapshot.Revision != 0 { + manager.DownloadSnapshotContents(rightSnapshot, nil) + } + + var leftFile []byte + if !manager.RetrieveFile(leftSnapshot, manager.FindFile(leftSnapshot, filePath), func(content []byte) { + leftFile = append(leftFile, content...) + }) { + LOG_ERROR("SNAPSHOT_DIFF", "File %s is corrupted in snapshot %s at revision %d", + filePath, leftSnapshot.ID, leftSnapshot.Revision) + return false + } + + var rightFile []byte + if rightSnapshot != nil { + if !manager.RetrieveFile(rightSnapshot, manager.FindFile(rightSnapshot, filePath), func(content []byte) { + rightFile = append(rightFile, content...) + }) { + LOG_ERROR("SNAPSHOT_DIFF", "File %s is corrupted in snapshot %s at revision %d", + filePath, rightSnapshot.ID, rightSnapshot.Revision) + return false + } + } else { + var err error + rightFile, err = ioutil.ReadFile(joinPath(top, filePath)) + if err != nil { + LOG_ERROR("SNAPSHOT_DIFF", "Failed to read %s from the repository: %v", filePath, err) + return false + } + } + + leftLines := strings.Split(string(leftFile), "\n") + rightLines := strings.Split(string(rightFile), "\n") + + after := 10 + before := 10 + var buffer [] string + on := false + distance := 0 + + for _, diff := range difflib.Diff(leftLines, rightLines) { + if diff.Delta == difflib.Common { + line := fmt.Sprintf(" %s", diff.Payload) + if on { + fmt.Printf("%s\n", line) + distance++ + if distance > after { + on = false + } + } else { + buffer = append(buffer, line) + if len(buffer) > before { + buffer = buffer[1:] + } + } + } else { + if !on { + fmt.Printf("\n%s\n\n", strings.Repeat(" -", 40)) + for _, line := range buffer { + fmt.Printf("%s\n", line) + } + buffer = nil + on = true + } + if diff.Delta == difflib.LeftOnly { + fmt.Printf("- %s\n", diff.Payload) + } else { + fmt.Printf("+ %s\n", diff.Payload) + } + distance = 0 + } + } + + return true + } + + // We only need to decode the 'files' sequence, not 'chunkhashes' or 'chunklengthes' + manager.DownloadSnapshotFileSequence(leftSnapshot, nil) + if rightSnapshot != nil && rightSnapshot.Revision != 0 { + manager.DownloadSnapshotFileSequence(rightSnapshot, nil) + } + + maxSize := int64(9) + maxSizeDigits := 1 + + // Find the max Size value in order for pretty alignment. + for _, file := range leftSnapshot.Files { + for !file.IsDir() && file.Size > maxSize { + maxSize = maxSize * 10 + 9 + maxSizeDigits += 1 + } + } + + for _, file := range rightSnapshot.Files { + for !file.IsDir() && file.Size > maxSize { + maxSize = maxSize * 10 + 9 + maxSizeDigits += 1 + } + } + + buffer := make([]byte, 32 * 1024) + + var i, j int + for i < len(leftSnapshot.Files) || j < len(rightSnapshot.Files) { + + if i >= len(leftSnapshot.Files) { + if rightSnapshot.Files[j].IsFile() { + LOG_INFO("SNAPSHOT_DIFF", "+ %s", rightSnapshot.Files[j].String(maxSizeDigits)) + } + j++ + } else if j >= len(rightSnapshot.Files) { + if leftSnapshot.Files[i].IsFile() { + LOG_INFO("SNAPSHOT_DIFF", "- %s", leftSnapshot.Files[i].String(maxSizeDigits)) + } + i++ + } else { + + left := leftSnapshot.Files[i] + right := rightSnapshot.Files[j] + + if !left.IsFile() { + i++ + continue + } + + if !right.IsFile() { + j++ + continue + } + + c := left.Compare(right) + if c < 0 { + LOG_INFO("SNAPSHOT_DIFF", "- %s", left.String(maxSizeDigits)) + i++ + } else if c > 0 { + LOG_INFO("SNAPSHOT_DIFF", "+ %s", right.String(maxSizeDigits)) + j++ + } else { + same := false + if rightSnapshot.Revision == 0 { + if compareByHash && right.Size > 0 { + right.Hash = manager.config.ComputeFileHash(joinPath(top, right.Path), buffer) + same = left.Hash == right.Hash + } else { + same = right.IsSameAs(left) + } + } else { + same = left.Hash == right.Hash + } + + if !same { + LOG_INFO("SNAPSHOT_DIFF", " %s", left.String(maxSizeDigits)) + LOG_INFO("SNAPSHOT_DIFF", "* %s", right.String(maxSizeDigits)) + } + i++ + j++ + } + } + } + return true +} + +// ShowHistory shows how a file changes over different revisions. +func (manager *SnapshotManager) ShowHistory(top string, snapshotID string, revisions []int, + filePath string, showLocalHash bool) bool { + + LOG_DEBUG("HISTORY_PARAMETERS", "top: %s, id: %s, revisions: %v, path: %s, showLocalHash: %t", + top, snapshotID, revisions, filePath, showLocalHash) + + var err error + + if len(revisions) == 0 { + revisions, err = manager.ListSnapshotRevisions(snapshotID) + if err != nil { + LOG_ERROR("SNAPSHOT_LIST", "Failed to list all revisions for snapshot %s: %v", snapshotID, err) + return false + } + } + + var lastVersion *Entry + sort.Ints(revisions) + for _, revision := range revisions { + snapshot := manager.DownloadSnapshot(snapshotID, revision) + manager.DownloadSnapshotFileSequence(snapshot, nil) + file := manager.FindFile(snapshot, filePath) + + if file != nil { + + if !file.IsFile() { + continue + } + + modifiedFlag := "" + if lastVersion != nil && lastVersion.Hash != file.Hash { + modifiedFlag = "*" + } + LOG_INFO("SNAPSHOT_HISTORY", "%7d: %s%s", revision, file.String(15), modifiedFlag) + lastVersion = file + } else { + LOG_INFO("SNAPSHOT_HISTORY", "%7d:", revision) + } + + + } + + stat, err := os.Stat(joinPath(top, filePath)) + if stat != nil { + localFile := CreateEntry(filePath, stat.Size(), stat.ModTime().Unix(), 0) + modifiedFlag := "" + if lastVersion != nil && !lastVersion.IsSameAs(localFile) { + modifiedFlag = "*" + } + if showLocalHash { + localFile.Hash = manager.config.ComputeFileHash(joinPath(top, filePath), make([]byte, 32 * 1024)) + if lastVersion.Hash != localFile.Hash { + modifiedFlag = "*" + } + } + LOG_INFO("SNAPSHOT_HISTORY", "current: %s%s", localFile.String(15), modifiedFlag) + } else { + LOG_INFO("SNAPSHOT_HISTORY", "current:") + } + + return true +} + +// fossilizeChunk turns the chunk into a fossil. +func (manager *SnapshotManager) fossilizeChunk(chunkID string, filePath string, + exclusive bool, collection *FossilCollection) (bool) { + if exclusive { + err := manager.storage.DeleteFile(0, filePath) + if err != nil { + LOG_ERROR("CHUNK_DELETE", "Failed to remove the chunk %s: %v", chunkID, err) + return false + } else { + LOG_TRACE("CHUNK_DELETE", "Deleted chunk file %s", chunkID) + } + + } else { + fossilPath := filePath + ".fsl" + + err := manager.storage.MoveFile(0, filePath, fossilPath) + if err != nil { + if _, exist, _, _ := manager.storage.FindChunk(0, chunkID, true); exist { + err := manager.storage.DeleteFile(0, filePath) + if err == nil { + LOG_TRACE("CHUNK_DELETE", "Deleted chunk file %s as the fossil already exists", chunkID) + } + } else { + LOG_ERROR("CHUNK_DELETE", "Failed to fossilize the chunk %s: %v", chunkID, err) + return false + } + } else { + LOG_TRACE("CHUNK_FOSSILIZE", "Fossilized chunk %s", chunkID) + } + + collection.AddFossil(fossilPath) + } + + return true + +} + +// resurrectChunk turns the fossil back into a chunk +func (manager *SnapshotManager) resurrectChunk(fossilPath string, chunkID string) (bool) { + chunkPath, exist, _, err := manager.storage.FindChunk(0, chunkID, false) + if err != nil { + LOG_ERROR("CHUNK_FIND", "Failed to locate the path for the chunk %s: %v", chunkID, err) + return false + } + + if exist { + manager.storage.DeleteFile(0, fossilPath) + LOG_INFO("FOSSIL_RECREATE", "The chunk %s already exists", chunkID) + } else { + err := manager.storage.MoveFile(0, fossilPath, chunkPath) + if err != nil { + LOG_ERROR("FOSSIL_RESURRECT", "Failed to resurrect the chunk %s from the fossil %s: %v", + chunkID, fossilPath, err) + return false + } else { + LOG_INFO("FOSSIL_RESURRECT", "The chunk %s has been resurrected", fossilPath) + } + } + return true +} + + + +// PruneSnapshots deletes snapshots by revisions, tags, or a retention policy. The main idea is two-step +// fossil collection. +// 1. Delete snapshots specified by revision, retention policy, with a tag. Find any resulting unreferenced +// chunks, and mark them as fossils (by renaming). After that, create a fossil collection file containing +// fossils collected during current run, and temporary files encountered. Also in the file is the latest +// revision for each snapshot id. Save this file to a local directory. +// +// 2. On next run, check if there is any new revision for each snapshot. Or if the lastest revision is too +// old, for instance, more than 7 days. This step is to identify snapshots that were being created while +// step 1 is in progress. For each fossil reference by any of these snapshots, move them back to the +// normal chunk directory. +// +// Note that a snapshot being created when step 2 is in progress may reference a fossil. To avoid this +// problem, never remove the lastest revision (unless exclusive is true), and only cache chunks referenced +// by the lastest revision. +func (manager *SnapshotManager) PruneSnapshots(top string, selfID string, snapshotID string, revisionsToBeDeleted []int, + tags []string, retentions []string, + exhaustive bool, exclusive bool, ignoredIDs []string, + dryRun bool, deleteOnly bool, collectOnly bool) bool { + + LOG_DEBUG("DELETE_PARAMETERS", + "id: %s, revisions: %v, tags: %v, retentions: %v, exhaustive: %t, exclusive: %t, " + + "dryrun: %t, deleteOnly: %t, collectOnly: %t", + snapshotID, revisionsToBeDeleted, tags, retentions, + exhaustive, exclusive, dryRun, deleteOnly, collectOnly) + + if len(revisionsToBeDeleted) > 0 && (len(tags) > 0 || len(retentions) > 0) { + LOG_WARN("DELETE_OPTIONS", "Tags or retention policy will be ignored if at least one revision is specified") + } + + logDir := path.Join(top, DUPLICACY_DIRECTORY, "logs") + os.Mkdir(logDir, 0700) + logFileName := path.Join(logDir, time.Now().Format("prune-log-20060102-150405")) + logFile, err := os.OpenFile(logFileName, os.O_WRONLY | os.O_CREATE | os.O_TRUNC, 0600) + + defer func() { + if logFile != nil { + logFile.Close() + } + } () + + // A retention policy is specified in the form 'interval:age', where both 'interval' and 'age' are numbers of + // days. A retention policy applies to a snapshot if the snapshot is older than the age. For snapshots older + // than the retention age, only one snapshot can be kept per interval. if interval is 0, then no snapshot older + // than the retention age will be kept. + // + // For example, ["30:365", "7:30", "1:1"] means to keep one snapshot per month for snapshots older than a year, + // one snapshot per week for snapshots older than a month, and one snapshot per day for snapshot older than a day. + // + // Note that policies must be sorted by the ages in decreasing order. + // + type RetentionPolicy struct { + Age int + Interval int + } + var retentionPolicies [] RetentionPolicy + + // Parse the retention policy if needed. + if len(revisionsToBeDeleted) == 0 && len(retentions) > 0 { + + retentionRegex := regexp.MustCompile(`^([0-9]+):([0-9]+)$`) + + for _, retention := range retentions { + retention = strings.TrimSpace(retention) + + matched := retentionRegex.FindStringSubmatch(retention) + + if matched == nil { + LOG_ERROR("RETENTION_INVALID", "Invalid retention policy: %s", retention) + return false + } + + age, _ := strconv.Atoi(matched[2]) + interval, _ := strconv.Atoi(matched[1]) + + if age < 1 || interval < 0 { + LOG_ERROR("RETENTION_INVALID", "Invalid retention policy: %s", retention) + return false + } + + policy := RetentionPolicy { + Age : age, + Interval : interval, + } + + retentionPolicies = append(retentionPolicies, policy) + } + + if len(retentionPolicies) == 0 { + LOG_ERROR("RETENTION_INVALID", "Invalid retention policy: %v", retentions) + return false + } + + for i, policy := range retentionPolicies { + if i == 0 || policy.Age < retentionPolicies[i - 1].Age { + if policy.Interval == 0 { + LOG_INFO("RETENTION_POLICY", "Keep no snapshots older than %d days", policy.Age) + } else { + LOG_INFO("RETENTION_POLICY", "Keep 1 snapshot every %d day(s) if older than %d day(s)", + policy.Interval, policy.Age) + } + } + } + } + + allSnapshots := make(map[string] [] *Snapshot) + + // We must find all snapshots for all ids even if only one snapshot is specified to be deleted, + // because we need to find out which chunks are not referenced. + snapshotIDs, err := manager.ListSnapshotIDs() + if err != nil { + LOG_ERROR("SNAPSHOT_LIST", "Failed to list all snpashots: %v", err) + return false + } + + for _, id := range snapshotIDs { + revisions, err := manager.ListSnapshotRevisions(id) + if err != nil { + LOG_ERROR("SNAPSHOT_LIST", "Failed to list all revisions for snapshot %s: %v", id, err) + return false + } + + sort.Ints(revisions) + var snapshots [] *Snapshot + for _, revision := range revisions { + snapshot := manager.DownloadSnapshot(id, revision) + if snapshot != nil { + snapshots = append(snapshots, snapshot) + } + } + + if len(snapshots) > 0 { + allSnapshots[id] = snapshots + } + } + + chunkDir := "chunks/" + + collectionRegex := regexp.MustCompile(`^([0-9]+)$`) + + collectionDir := "fossils" + manager.snapshotCache.CreateDirectory(0, collectionDir) + + collections, _, err := manager.snapshotCache.ListFiles(0, collectionDir) + maxCollectionNumber := 0 + + referencedFossils := make(map[string]bool) + + // Find fossil collections previsouly created, and delete fossils and temporary files in them if they are + // deletable. + for _, collectionName := range collections { + + if collectOnly { + continue + } + + matched := collectionRegex.FindStringSubmatch(collectionName) + if matched == nil{ + continue + } + + collectionNumber, _ := strconv.Atoi(matched[1]) + if collectionNumber > maxCollectionNumber { + maxCollectionNumber = collectionNumber + } + + collectionFile := path.Join(collectionDir, collectionName) + manager.fileChunk.Reset(false) + + err := manager.snapshotCache.DownloadFile(0, collectionFile, manager.fileChunk) + if err != nil { + LOG_ERROR("FOSSIL_COLLECT", "Failed to read the fossil collection file %s: %v", collectionFile, err) + return false + } + + var collection FossilCollection + err = json.Unmarshal(manager.fileChunk.GetBytes(), &collection) + if err != nil { + LOG_ERROR("FOSSIL_COLLECT", "Failed to load the fossil collection file %s: %v", collectionFile, err) + return false + } + + for _, fossil := range collection.Fossils { + referencedFossils[fossil] = true + } + + LOG_INFO("FOSSIL_COLLECT", "Fossil collection %s found", collectionName) + + isDeletable, newSnapshots := collection.IsDeletable(manager.storage.IsStrongConsistent(), + ignoredIDs, allSnapshots) + + if isDeletable || exclusive { + + LOG_INFO("FOSSIL_DELETABLE", "Fossils from collection %s is eligible for deletion", collectionName) + + newChunks := make(map[string]bool) + + for _, newSnapshot := range newSnapshots { + for _, chunk := range manager.GetSnapshotChunks(newSnapshot) { + newChunks[chunk] = true + } + } + + for _, fossil := range collection.Fossils { + + chunk := fossil[len(chunkDir):] + chunk = strings.Replace(chunk, "/", "", -1) + chunk = strings.Replace(chunk, ".fsl", "", -1) + + if _, found := newChunks[chunk]; found { + // The fossil is referenced so it can't be deleted. + if dryRun { + LOG_INFO("FOSSIL_RESURRECT", "Fossil %s would be resurrected: %v", chunk) + continue + } + + manager.resurrectChunk(fossil, chunk) + fmt.Fprintf(logFile, "Resurrected fossil %s (collection %s)\n", chunk, collectionName) + + } else { + if dryRun { + LOG_INFO("FOSSIL_DELETE", "The chunk %s would be permanently removed", chunk) + } else { + manager.storage.DeleteFile(0, fossil) + LOG_INFO("FOSSIL_DELETE", "The chunk %s has been permanently removed", chunk) + fmt.Fprintf(logFile, "Deleted fossil %s (collection %s)\n", chunk, collectionName) + } + } + } + + // Delete all temporary files if they still exist. + for _, temporary := range collection.Temporaries { + if dryRun { + LOG_INFO("TEMPORARY_DELETE", "The temporary file %s would be deleted", temporary) + } else { + // Fail silently, since temporary files are supposed to be renamed or deleted after upload is done + manager.storage.DeleteFile(0, temporary) + LOG_INFO("TEMPORARY_DELETE", "The temporary file %s has been deleted", temporary) + fmt.Fprintf(logFile, "Deleted temporary %s (collection %s)\n", temporary, collectionName) + } + } + + if !dryRun { + err = manager.snapshotCache.DeleteFile(0, collectionFile) + if err != nil { + LOG_WARN("FOSSIL_FILE", "Failed to remove the fossil collection file %s: %v", collectionFile, err) + } + } + LOG_TRACE("FOSSIL_END", "Finished processing fossil collection %s", collectionName) + } else { + LOG_INFO("FOSSIL_POSTPONE", + "Fossils from collection %s can't be deleted because deletion criteria aren't met", + collectionName) + } + } + + if deleteOnly { + return true + } + + toBeDeleted := 0 + + revisionMap := make(map[int]bool) + for _, revision := range revisionsToBeDeleted { + revisionMap[revision] = true + } + + tagMap := make(map[string]bool) + for _, tag := range tags { + tagMap[tag] = true + } + + // Find the snapshots that need to be deleted + for id, snapshots := range allSnapshots { + + if len(snapshotID) > 0 && id != snapshotID { + continue + } + + if len(revisionsToBeDeleted) > 0 { + // If revisions are specified ignore tags and the retention policy. + for _, snapshot := range snapshots { + if _, found := revisionMap[snapshot.Revision]; found { + snapshot.Flag = true + toBeDeleted++ + } + } + + continue + } else if len(retentionPolicies) > 0 { + + if len(snapshots) <= 1 { + continue + } + + lastSnapshotTime := int64(0) + now := time.Now().Unix() + i := 0 + for j, snapshot := range snapshots { + + if !exclusive && j == len(snapshots) - 1 { + continue + } + + if len(tagMap) > 0 { + if _, found := tagMap[snapshot.Tag]; found { + continue + } + } + + // Find out which retent policy applies based on the age. + for i < len(retentionPolicies) && + int(now - snapshot.StartTime) 0 { + for _, snapshot := range snapshots { + if _, found := tagMap[snapshot.Tag]; found { + snapshot.Flag = true + toBeDeleted++ + } + } + + } + } + + if toBeDeleted == 0 && exhaustive == false { + LOG_INFO("SNAPSHOT_NONE", "No snapshot to delete") + return false + } + + chunkRegex := regexp.MustCompile(`^[0-9a-f]+$`) + + referencedChunks := make(map[string]bool) + + // Now build all chunks referened by snapshot not deleted + for _, snapshots := range allSnapshots { + + if len(snapshots) > 0 { + latest := snapshots[len(snapshots) - 1] + if latest.Flag && !exclusive { + LOG_ERROR("SNAPSHOT_DELETE", + "The latest snapshot %s at revision %d can't be deleted in non-exclusive mode", + latest.ID, latest.Revision) + return false + } + } + + for _, snapshot := range snapshots { + if snapshot.Flag { + LOG_INFO("SNAPSHOT_DELETE", "Deleting snapshot %s at revision %d", snapshot.ID, snapshot.Revision) + continue + } + + chunks := manager.GetSnapshotChunks(snapshot) + + for _, chunk := range chunks { + // The initial value is 'false'. When a referenced chunk is found it will change the value to 'true'. + referencedChunks[chunk] = false + } + } + } + + collection := CreateFossilCollection(allSnapshots) + + if exhaustive { + + // In exhaustive, we scan the entire chunk tree to find dangling chunks and temporaries. + allFiles, _ := manager.ListAllFiles(manager.storage, chunkDir) + for _, file := range allFiles { + if file[len(file) - 1] == '/' { + continue + } + + if strings.HasSuffix(file, ".tmp") { + + // This is a temporary chunk file. It can be a result of a restore operation still in progress, or + // a left-over from a restore operation that was terminated abruptly. + if dryRun { + LOG_INFO("CHUNK_TEMPORARY", "Found temporary file %s", file) + continue + } + + if exclusive { + // In exclusive mode, we assume no other restore operation is running concurrently. + err := manager.storage.DeleteFile(0, chunkDir + file) + if err != nil { + LOG_ERROR("CHUNK_TEMPORARY", "Failed to remove the temporary file %s: %v", file, err) + return false + } else { + LOG_DEBUG("CHUNK_TEMPORARY", "Deleted temporary file %s", file) + } + fmt.Fprintf(logFile, "Deleted temporary %s\n", file) + } else { + collection.AddTemporary(file) + } + continue + } else if strings.HasSuffix(file, ".fsl") { + // This is a fossil. If it is unreferenced, it can be a result of failing to save the fossil + // collection file after making it a fossil. + if _, found := referencedFossils[file]; !found { + if dryRun { + LOG_INFO("FOSSIL_UNREFERENCED", "Found unreferenced fossil %s", file) + continue + } + + chunk := strings.Replace(file, "/", "", -1) + chunk = strings.Replace(chunk, ".fsl", "", -1) + + if _, found := referencedChunks[chunk]; found { + manager.resurrectChunk(chunkDir + file, chunk) + } else { + err := manager.storage.DeleteFile(0, chunkDir + file) + if err != nil { + LOG_WARN("FOSSIL_DELETE", "Failed to remove the unreferenced fossil %s: %v", file, err) + } else { + LOG_DEBUG("FOSSIL_DELETE", "Deleted unreferenced fossil %s", file) + } + fmt.Fprintf(logFile, "Deleted unreferenced fossil %s\n", file) + } + } + + continue + } + + chunk := strings.Replace(file, "/", "", -1) + + if !chunkRegex.MatchString(chunk) { + LOG_WARN("CHUNK_UNKONWN_FILE", "File %s is not a chunk", file) + continue + } + + if value, found := referencedChunks[chunk]; !found { + + if dryRun { + LOG_INFO("CHUNK_UNREFERENCED", "Found unreferenced chunk %s", chunk) + continue + } + + manager.fossilizeChunk(chunk, chunkDir + file, exclusive, collection) + if exclusive { + fmt.Fprintf(logFile, "Deleted chunk %s (exclusive mode)\n", chunk) + } else { + fmt.Fprintf(logFile, "Marked fossil %s\n", chunk) + } + + } else if value { + + // Note that the initial value is false. So if the value is true it means another copy of the chunk + // exists in a higher-level directory. + + if dryRun { + LOG_INFO("CHUNK_REDUNDANT", "Found redundant chunk %s", chunk) + continue + } + + // This is a redundant chunk file (for instance D3/495A8D and D3/49/5A8D ) + err := manager.storage.DeleteFile(0, chunkDir + file) + if err != nil { + LOG_WARN("CHUNK_DELETE", "Failed to remove the redundant chunk file %s: %v", file, err) + } else { + LOG_TRACE("CHUNK_DELETE", "Removed the redundant chunk file %s", file) + } + fmt.Fprintf(logFile, "Deleted redundant chunk %s\n", file) + + } else { + referencedChunks[chunk] = true + LOG_DEBUG("CHUNK_KEEP", "Chunk %s is referenced", chunk) + } + } + } else { + // In non-exhaustive mode, only chunks that exist in the snapshots to be deleted but not other are identified + // as unreferenced chunks. + for _, snapshots := range allSnapshots { + for _, snapshot := range snapshots { + + if !snapshot.Flag { + continue + } + + chunks := manager.GetSnapshotChunks(snapshot) + + for _, chunk := range chunks { + + if _, found := referencedChunks[chunk]; found { + continue + } + + if dryRun { + LOG_INFO("CHUNK_UNREFERENCED", "Found unreferenced chunk %s", chunk) + continue + } + + chunkPath, exist, _, err := manager.storage.FindChunk(0, chunk, false) + if err != nil { + LOG_ERROR("CHUNK_FIND", "Failed to locate the path for the chunk %s: %v", chunk, err) + return false + } + + if !exist { + LOG_WARN("CHUNK_MISSING", "The chunk %s referenced by snapshot %s revision %d does not exist", + chunk, snapshot.ID, snapshot.Revision) + continue + } + + manager.fossilizeChunk(chunk, chunkPath, exclusive, collection) + if exclusive { + fmt.Fprintf(logFile, "Deleted chunk %s (exclusive mode)\n", chunk) + } else { + fmt.Fprintf(logFile, "Marked fossil %s\n", chunk) + } + + referencedChunks[chunk] = true + } + } + } + + + } + + // Save the fossil collection if it is not empty. + if !collection.IsEmpty() && !dryRun { + + collection.EndTime = time.Now().Unix() + + collectionNumber := maxCollectionNumber + 1 + collectionFile := path.Join(collectionDir, fmt.Sprintf("%d", collectionNumber)) + + description, err := json.Marshal(collection) + if err != nil { + LOG_ERROR("FOSSIL_COLLECT", "Failed to create a json file for the fossil collection: %v", err) + return false + } + + err = manager.snapshotCache.UploadFile(0, collectionFile, description) + if err != nil { + LOG_ERROR("FOSSIL_COLLECT", "Failed to save the fossil collection file %s: %v", collectionFile, err) + return false + } + + LOG_INFO("FOSSIL_COLLECT", "Fossil collection %d saved", collectionNumber) + fmt.Fprintf(logFile, "Fossil collection %d saved\n", collectionNumber) + } + + // Now delete the snapshot files. + for _, snapshots := range allSnapshots { + for _, snapshot := range snapshots { + if !snapshot.Flag || dryRun { + continue + } + + snapshotPath := fmt.Sprintf("snapshots/%s/%d", snapshot.ID, snapshot.Revision) + err = manager.storage.DeleteFile(0, snapshotPath) + if err != nil { + LOG_ERROR("SNAPSHOT_DELETE", "Failed to delete the snapshot %s at revision %d: %v", + snapshot.ID, snapshot.Revision, err) + return false + } else { + LOG_INFO("SNAPSHOT_DELETE", "The snapshot %s at revision %d has been removed", + snapshot.ID, snapshot.Revision) + } + manager.snapshotCache.DeleteFile(0, snapshotPath) + fmt.Fprintf(logFile, "Deleted snapshot %s at revision %d\n", snapshot.ID, snapshot.Revision) + } + } + + if collection.IsEmpty() && !dryRun && toBeDeleted != 0 && !exclusive { + LOG_INFO("FOSSIL_NONE", + "No fossil collection has been created since deleted snapshots did not reference any unique chunks") + } + + var latestSnapshot *Snapshot + if len(allSnapshots[selfID]) > 0 { + latestSnapshot = allSnapshots[selfID][len(allSnapshots[selfID]) - 1] + } + + manager.CleanSnapshotCache(latestSnapshot, allSnapshots) + + return true +} + + +// CheckSnapshot performs sanity checks on the given snapshot. +func (manager *SnapshotManager) CheckSnapshot(snapshot *Snapshot) (err error) { + + lastChunk := 0 + lastOffset := 0 + var lastEntry *Entry + + numberOfChunks := len(snapshot.ChunkHashes) + + if numberOfChunks != len(snapshot.ChunkLengths) { + return fmt.Errorf("The number of chunk hashes (%d) is different from the number of chunk lengths (%d)", + numberOfChunks, len(snapshot.ChunkLengths)) + } + + entries := make([]*Entry, len(snapshot.Files)) + copy(entries, snapshot.Files) + sort.Sort(ByChunk(entries)) + + for _, entry := range snapshot.Files { + if lastEntry != nil && lastEntry.Compare(entry) >= 0 && !strings.Contains(lastEntry.Path, "\ufffd") { + return fmt.Errorf("The entry %s appears before the entry %s", lastEntry.Path, entry.Path) + } + lastEntry = entry + } + + for _, entry := range entries { + + if !entry.IsFile() || entry.Size == 0 { + continue + } + + if entry.StartChunk < 0 { + return fmt.Errorf("The file %s starts at chunk %d", entry.Path, entry.StartChunk) + } + + if entry.EndChunk >= numberOfChunks { + return fmt.Errorf("The file %s ends at chunk %d while the number of chunks is %d", + entry.Path, entry.EndChunk, numberOfChunks) + } + + if entry.EndChunk < entry.StartChunk { + return fmt.Errorf("The file %s starts at chunk %d and ends at chunk %d", + entry.Path, entry.StartChunk, entry.EndChunk) + } + + if entry.StartOffset > 0 { + if entry.StartChunk < lastChunk { + return fmt.Errorf("The file %s starts at chunk %d while the last chunk is %d", + entry.Path, entry.StartChunk, lastChunk) + } + + if entry.StartChunk > lastChunk + 1 { + return fmt.Errorf("The file %s starts at chunk %d while the last chunk is %d", + entry.Path, entry.StartChunk, lastChunk) + } + + if entry.StartChunk == lastChunk && entry.StartOffset < lastOffset { + return fmt.Errorf("The file %s starts at offset %d of chunk %d while the last file ends at offset %d", + entry.Path, entry.StartOffset, entry.StartChunk, lastOffset) + } + + if entry.StartChunk == entry.EndChunk && entry.StartOffset > entry.EndOffset { + return fmt.Errorf("The file %s starts at offset %d and ends at offset %d of the same chunk %d", + entry.Path, entry.StartOffset, entry.EndOffset, entry.StartChunk) + } + } + + fileSize := int64(0) + + for i := entry.StartChunk; i <= entry.EndChunk; i++ { + + start := 0 + if i == entry.StartChunk { + start = entry.StartOffset + } + end := snapshot.ChunkLengths[i] + if i == entry.EndChunk { + end = entry.EndOffset + } + + fileSize += int64(end - start) + } + + if entry.Size != fileSize { + return fmt.Errorf("The file %s has a size of %d but the total size of chunks is %d", + entry.Path, entry.Size, fileSize) + } + + lastChunk = entry.EndChunk + lastOffset = entry.EndOffset + } + + if len(entries) > 0 && entries[0].StartChunk != 0 { + return fmt.Errorf("The first file starts at chunk %d", entries[0].StartChunk ) + } + if lastChunk < numberOfChunks - 1 { + return fmt.Errorf("The last file ends at chunk %d but the number of chunks is %d", lastChunk, numberOfChunks) + } + + return nil +} + +// DownloadFile downloads a non-chunk file from the storage. The only non-chunk files in the current implementation +// are snapshot files. +func (manager *SnapshotManager) DownloadFile(path string, derivationKey string) (content []byte) { + + if manager.storage.IsCacheNeeded() { + manager.fileChunk.Reset(false) + err := manager.snapshotCache.DownloadFile(0, path, manager.fileChunk) + if err == nil && len(manager.fileChunk.GetBytes()) > 0 { + LOG_DEBUG("DOWNLOAD_FILE_CACHE", "Loaded file %s from the snapshot cache", path) + return manager.fileChunk.GetBytes() + } + } + + manager.fileChunk.Reset(false) + err := manager.storage.DownloadFile(0, path, manager.fileChunk) + if err != nil { + LOG_ERROR("DOWNLOAD_FILE", "Failed to download the file %s: %v", path, err) + return nil + } + + err = manager.fileChunk.Decrypt(manager.config.FileKey, derivationKey) + if err != nil { + LOG_ERROR("DOWNLOAD_DECRYPT", "Failed to decrypt the file %s: %v", path, err) + return nil + } + + err = manager.snapshotCache.UploadFile(0, path, manager.fileChunk.GetBytes()) + if err != nil { + LOG_WARN("DOWNLOAD_FILE_CACHE", "Failed to add the file %s to the snapshot cache: %v", path, err) + } + + LOG_DEBUG("DOWNLOAD_FILE", "Downloaded file %s", path) + + return manager.fileChunk.GetBytes() +} + +// UploadFile uploads a non-chunk file from the storage. +func (manager *SnapshotManager) UploadFile(path string, derivationKey string, content []byte) bool { + manager.fileChunk.Reset(false) + manager.fileChunk.Write(content) + + if manager.storage.IsCacheNeeded() { + err := manager.snapshotCache.UploadFile(0, path, manager.fileChunk.GetBytes()) + if err != nil { + LOG_WARN("UPLOAD_CACHE", "Failed to cache the file %s: %v", path, err) + } else { + LOG_DEBUG("UPLOAD_FILE_CACHE", "Saved file %s to the snapshot cache", path) + } + } + + err := manager.fileChunk.Encrypt(manager.config.FileKey, derivationKey) + if err != nil { + LOG_ERROR("UPLOAD_File", "Failed to encrypt the file %s: %v", path, err) + return false + } + + err = manager.storage.UploadFile(0, path, manager.fileChunk.GetBytes()) + if err != nil { + LOG_ERROR("UPLOAD_File", "Failed to upload the file %s: %v", path, err) + return false + } + + LOG_DEBUG("UPLOAD_FILE", "Uploaded file %s", path) + + return true + +} diff --git a/duplicacy_snapshotmanager_test.go b/duplicacy_snapshotmanager_test.go new file mode 100644 index 0000000..797bd7a --- /dev/null +++ b/duplicacy_snapshotmanager_test.go @@ -0,0 +1,467 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "testing" + "os" + "fmt" + "time" + "path" + "strings" + "crypto/rand" + "encoding/json" + "encoding/hex" +) + +func createDummySnapshot(snapshotID string, revision int, endTime int64) * Snapshot { + return &Snapshot { + ID: snapshotID, + Revision: revision, + EndTime: endTime, + } +} + +func TestIsDeletable(t *testing.T) { + + //SetLoggingLevel(DEBUG) + + now := time.Now().Unix() + day := int64(3600 * 24) + + allSnapshots := make(map[string][] *Snapshot) + allSnapshots["host1"] = append([]*Snapshot{}, createDummySnapshot("host1", 1, now - 2 * day)) + allSnapshots["host2"] = append([]*Snapshot{}, createDummySnapshot("host2", 1, now - 2 * day)) + allSnapshots["host1"] = append(allSnapshots["host1"], createDummySnapshot("host1", 2, now - 1 * day)) + allSnapshots["host2"] = append(allSnapshots["host2"], createDummySnapshot("host2", 2, now - 1 * day)) + + collection := & FossilCollection { + EndTime: now - day - 3600, + LastRevisions: make(map[string] int), + } + + collection.LastRevisions["host1"] = 1 + collection.LastRevisions["host2"] = 1 + + isDeletable, newSnapshots := collection.IsDeletable(true, nil, allSnapshots) + if !isDeletable || len(newSnapshots) != 2 { + t.Errorf("Scenario 1: should be deletable, 2 new snapshots") + } + + collection.LastRevisions["host3"] = 1 + allSnapshots["host3"] = append([]*Snapshot{}, createDummySnapshot("host3", 1, now - 2 * day)) + + isDeletable, newSnapshots = collection.IsDeletable(true, nil, allSnapshots) + if isDeletable { + t.Errorf("Scenario 2: should not be deletable") + } + + allSnapshots["host3"] = append(allSnapshots["host3"], createDummySnapshot("host3", 2, now - day)) + isDeletable, newSnapshots = collection.IsDeletable(true, nil, allSnapshots) + if !isDeletable || len(newSnapshots) != 3 { + t.Errorf("Scenario 3: should be deletable, 3 new snapshots") + } + + collection.LastRevisions["host4"] = 1 + allSnapshots["host4"] = append([]*Snapshot{}, createDummySnapshot("host4", 1, now - 8 * day)) + + isDeletable, newSnapshots = collection.IsDeletable(true, nil, allSnapshots) + if !isDeletable || len(newSnapshots) != 3 { + t.Errorf("Scenario 4: should be deletable, 3 new snapshots") + } + + collection.LastRevisions["repository1@host5"] = 1 + allSnapshots["repository1@host5"] = append([]*Snapshot{}, createDummySnapshot("repository1@host5", 1, now - 3 * day)) + + collection.LastRevisions["repository2@host5"] = 1 + allSnapshots["repository2@host5"] = append([]*Snapshot{}, createDummySnapshot("repository2@host5", 1, now - 2 * day)) + + isDeletable, newSnapshots = collection.IsDeletable(true, nil, allSnapshots) + if isDeletable { + t.Errorf("Scenario 5: should not be deletable") + } + + allSnapshots["repository1@host5"] = append(allSnapshots["repository1@host5"], createDummySnapshot("repository1@host5", 2, now - day)) + isDeletable, newSnapshots = collection.IsDeletable(true, nil, allSnapshots) + if !isDeletable || len(newSnapshots) != 4 { + t.Errorf("Scenario 6: should be deletable, 4 new snapshots") + } +} + +func createTestSnapshotManager(testDir string) *SnapshotManager { + + os.RemoveAll(testDir) + os.MkdirAll(testDir, 0700) + + storage, _ := CreateFileStorage(testDir, 1) + storage.CreateDirectory(0, "chunks") + storage.CreateDirectory(0, "snapshots") + config := CreateConfig() + snapshotManager := CreateSnapshotManager(config, storage) + + cacheDir := path.Join(testDir, "cache") + snapshotCache, _ := CreateFileStorage(cacheDir, 1) + snapshotCache.CreateDirectory(0, "chunks") + snapshotCache.CreateDirectory(0, "snapshots") + + snapshotManager.snapshotCache = snapshotCache + return snapshotManager +} + +func uploadTestChunk(manager *SnapshotManager, content []byte) string { + + completionFunc := func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) { + LOG_INFO("UPLOAD_CHUNK", "Chunk %s size %d uploaded", chunk.GetID(), chunkSize) + } + + chunkUploader := CreateChunkUploader(manager.config, manager.storage, nil, testThreads, nil) + chunkUploader.completionFunc = completionFunc + chunkUploader.Start() + + chunk := CreateChunk(manager.config, true) + chunk.Reset(true) + chunk.Write(content) + chunkUploader.StartChunk(chunk, 0) + chunkUploader.Stop() + + return chunk.GetHash() +} + +func uploadRandomChunk(manager *SnapshotManager, chunkSize int) string { + content := make([]byte, chunkSize) + _, err := rand.Read(content) + if err != nil { + LOG_ERROR("UPLOAD_RANDOM", "Error generating random content: %v", err) + return "" + } + + return uploadTestChunk(manager, content) +} + +func createTestSnapshot(manager *SnapshotManager, snapshotID string, revision int, startTime int64, endTime int64, chunkHashes []string) { + + snapshot := &Snapshot { + ID: snapshotID, + Revision: revision, + StartTime: startTime, + EndTime: endTime, + ChunkHashes: chunkHashes, + } + + var chunkHashesInHex [] string + for _, chunkHash := range chunkHashes { + chunkHashesInHex = append(chunkHashesInHex, hex.EncodeToString([]byte(chunkHash))) + } + + sequence, _ := json.Marshal(chunkHashesInHex) + snapshot.ChunkSequence = []string { uploadTestChunk(manager, sequence) } + + description, _ := snapshot.MarshalJSON() + path := fmt.Sprintf("snapshots/%s/%d", snapshotID, snapshot.Revision) + manager.storage.CreateDirectory(0, "snapshots/" + snapshotID) + manager.UploadFile(path, path, description) +} + +func checkTestSnapshots(manager *SnapshotManager, expectedSnapshots int, expectedFossils int) { + + var snapshotIDs [] string + var err error + + chunks := make(map[string]bool) + files, _ := manager.ListAllFiles(manager.storage, "chunks/") + for _, file := range files { + if file[len(file) - 1] == '/' { + continue + } + chunk := strings.Replace(file, "/", "", -1) + chunks[chunk] = false + } + + snapshotIDs, err = manager.ListSnapshotIDs() + if err != nil { + LOG_ERROR("SNAPSHOT_LIST", "Failed to list all snpashots: %v", err) + return + } + + numberOfSnapshots := 0 + + for _, snapshotID := range snapshotIDs { + + revisions, err := manager.ListSnapshotRevisions(snapshotID) + if err != nil { + LOG_ERROR("SNAPSHOT_LIST", "Failed to list all revisions for snapshot %s: %v", snapshotID, err) + return + } + + for _, revision := range revisions { + snapshot := manager.DownloadSnapshot(snapshotID, revision) + numberOfSnapshots++ + + for _, chunk := range manager.GetSnapshotChunks(snapshot) { + chunks[chunk] = true + } + } + } + + numberOfFossils := 0 + for chunk, referenced := range chunks { + if !referenced { + LOG_INFO("UNREFERENCED_CHUNK", "Unreferenced chunk %s", chunk) + numberOfFossils++ + } + } + + if numberOfSnapshots != expectedSnapshots { + LOG_ERROR("SNAPSHOT_COUNT", "Expecting %d snapshots, got %d instead", expectedSnapshots, numberOfSnapshots) + } + + if numberOfFossils != expectedFossils { + LOG_ERROR("FOSSIL_COUNT", "Expecting %d unreferenced chunks, got %d instead", expectedFossils, numberOfFossils) + } +} + +func TestSingleRepositoryPrune(t *testing.T) { + + setTestingT(t) + + testDir := path.Join(os.TempDir(), "duplicacy_test", "snapshot_test") + + snapshotManager := createTestSnapshotManager(testDir) + + chunkSize := 1024 + chunkHash1 := uploadRandomChunk(snapshotManager, chunkSize) + chunkHash2 := uploadRandomChunk(snapshotManager, chunkSize) + chunkHash3 := uploadRandomChunk(snapshotManager, chunkSize) + chunkHash4 := uploadRandomChunk(snapshotManager, chunkSize) + + now := time.Now().Unix() + day := int64(24 * 3600) + t.Logf("Creating 1 snapshot") + createTestSnapshot(snapshotManager, "repository1", 1, now - 3 * day - 3600, now - 3 * day - 60, []string { chunkHash1, chunkHash2 }) + checkTestSnapshots(snapshotManager, 1, 2) + + t.Logf("Creating 2 snapshots") + createTestSnapshot(snapshotManager, "repository1", 2, now - 2 * day - 3600, now - 2 * day - 60, []string { chunkHash2, chunkHash3 }) + createTestSnapshot(snapshotManager, "repository1", 3, now - 1 * day - 3600, now - 1 * day - 60, []string { chunkHash3, chunkHash4 }) + checkTestSnapshots(snapshotManager, 3, 0) + + t.Logf("Removing snapshot repository1 revision 1 with --exclusive") + snapshotManager.PruneSnapshots(testDir, "repository1", "repository1", []int{1}, []string{}, []string{}, false, true, []string{}, false, false, false) + checkTestSnapshots(snapshotManager, 2, 0) + + t.Logf("Removing snapshot repository1 revision 2 without --exclusive") + snapshotManager.PruneSnapshots(testDir, "repository1", "repository1", []int{2}, []string{}, []string{}, false, false, []string{}, false, false, false) + checkTestSnapshots(snapshotManager, 1, 2) + + t.Logf("Creating 1 snapshot") + chunkHash5 := uploadRandomChunk(snapshotManager, chunkSize) + createTestSnapshot(snapshotManager, "repository1", 4, now + 1 * day - 3600 , now + 1 * day, []string { chunkHash4, chunkHash5 }) + checkTestSnapshots(snapshotManager, 2, 2) + + t.Logf("Prune without removing any snapshots -- fossils will be deleted") + snapshotManager.PruneSnapshots(testDir, "repository1", "repository1", []int{}, []string{}, []string{}, false, false, []string{}, false, false, false) + checkTestSnapshots(snapshotManager, 2, 0) +} + +func TestSingleHostPrune(t *testing.T) { + + setTestingT(t) + + testDir := path.Join(os.TempDir(), "duplicacy_test", "snapshot_test") + + snapshotManager := createTestSnapshotManager(testDir) + + chunkSize := 1024 + chunkHash1 := uploadRandomChunk(snapshotManager, chunkSize) + chunkHash2 := uploadRandomChunk(snapshotManager, chunkSize) + chunkHash3 := uploadRandomChunk(snapshotManager, chunkSize) + chunkHash4 := uploadRandomChunk(snapshotManager, chunkSize) + + now := time.Now().Unix() + day := int64(24 * 3600) + t.Logf("Creating 3 snapshots") + createTestSnapshot(snapshotManager, "vm1@host1", 1, now - 3 * day - 3600, now - 3 * day - 60, []string { chunkHash1, chunkHash2 }) + createTestSnapshot(snapshotManager, "vm1@host1", 2, now - 2 * day - 3600, now - 2 * day - 60, []string { chunkHash2, chunkHash3 }) + createTestSnapshot(snapshotManager, "vm2@host1", 1, now - 3 * day - 3600, now - 3 * day - 60, []string { chunkHash3, chunkHash4 }) + checkTestSnapshots(snapshotManager, 3, 0) + + t.Logf("Removing snapshot vm1@host1 revision 1 without --exclusive") + snapshotManager.PruneSnapshots(testDir, "vm1@host1", "vm1@host1", []int{1}, []string{}, []string{}, false, false, []string{}, false, false, false) + checkTestSnapshots(snapshotManager, 2, 2) + + t.Logf("Prune without removing any snapshots -- no fossils will be deleted") + snapshotManager.PruneSnapshots(testDir, "vm1@host1", "vm1@host1", []int{}, []string{}, []string{}, false, false, []string{}, false, false, false) + checkTestSnapshots(snapshotManager, 2, 2) + + t.Logf("Creating 1 snapshot") + chunkHash5 := uploadRandomChunk(snapshotManager, chunkSize) + createTestSnapshot(snapshotManager, "vm2@host1", 2, now + 1 * day - 3600 , now + 1 * day, []string { chunkHash4, chunkHash5 }) + checkTestSnapshots(snapshotManager, 3, 2) + + t.Logf("Prune without removing any snapshots -- fossils will be deleted") + snapshotManager.PruneSnapshots(testDir, "vm1@host1", "vm1@host1", []int{}, []string{}, []string{}, false, false, []string{}, false, false, false) + checkTestSnapshots(snapshotManager, 3, 0) + +} + +func TestMultipleHostPrune(t *testing.T) { + + setTestingT(t) + + testDir := path.Join(os.TempDir(), "duplicacy_test", "snapshot_test") + + snapshotManager := createTestSnapshotManager(testDir) + + chunkSize := 1024 + chunkHash1 := uploadRandomChunk(snapshotManager, chunkSize) + chunkHash2 := uploadRandomChunk(snapshotManager, chunkSize) + chunkHash3 := uploadRandomChunk(snapshotManager, chunkSize) + chunkHash4 := uploadRandomChunk(snapshotManager, chunkSize) + + now := time.Now().Unix() + day := int64(24 * 3600) + t.Logf("Creating 3 snapshot") + createTestSnapshot(snapshotManager, "vm1@host1", 1, now - 3 * day - 3600, now - 3 * day - 60, []string { chunkHash1, chunkHash2 }) + createTestSnapshot(snapshotManager, "vm1@host1", 2, now - 2 * day - 3600, now - 2 * day - 60, []string { chunkHash2, chunkHash3 }) + createTestSnapshot(snapshotManager, "vm2@host2", 1, now - 3 * day - 3600, now - 3 * day - 60, []string { chunkHash3, chunkHash4 }) + checkTestSnapshots(snapshotManager, 3, 0) + + t.Logf("Removing snapshot vm1@host1 revision 1 without --exclusive") + snapshotManager.PruneSnapshots(testDir, "vm1@host1", "vm1@host1", []int{1}, []string{}, []string{}, false, false, []string{}, false, false, false) + checkTestSnapshots(snapshotManager, 2, 2) + + t.Logf("Prune without removing any snapshots -- no fossils will be deleted") + snapshotManager.PruneSnapshots(testDir, "vm1@host1", "vm1@host1", []int{}, []string{}, []string{}, false, false, []string{}, false, false, false) + checkTestSnapshots(snapshotManager, 2, 2) + + t.Logf("Creating 1 snapshot") + chunkHash5 := uploadRandomChunk(snapshotManager, chunkSize) + createTestSnapshot(snapshotManager, "vm2@host2", 2, now + 1 * day - 3600 , now + 1 * day, []string {chunkHash4, chunkHash5}) + checkTestSnapshots(snapshotManager, 3, 2) + + t.Logf("Prune without removing any snapshots -- no fossils will be deleted") + snapshotManager.PruneSnapshots(testDir, "vm1@host1", "vm1@host1", []int{}, []string{}, []string{}, false, false, []string{}, false, false, false) + checkTestSnapshots(snapshotManager, 3, 2) + + t.Logf("Creating 1 snapshot") + chunkHash6 := uploadRandomChunk(snapshotManager, chunkSize) + createTestSnapshot(snapshotManager, "vm1@host1", 3, now + 1 * day - 3600 , now + 1 * day, []string {chunkHash5, chunkHash6}) + checkTestSnapshots(snapshotManager, 4, 2) + + t.Logf("Prune without removing any snapshots -- fossils will be deleted") + snapshotManager.PruneSnapshots(testDir, "vm1@host1", "vm1@host1", []int{}, []string{}, []string{}, false, false, []string{}, false, false, false) + checkTestSnapshots(snapshotManager, 4, 0) +} + +func TestPruneAndResurrect(t *testing.T) { + + setTestingT(t) + + testDir := path.Join(os.TempDir(), "duplicacy_test", "snapshot_test") + + snapshotManager := createTestSnapshotManager(testDir) + + chunkSize := 1024 + chunkHash1 := uploadRandomChunk(snapshotManager, chunkSize) + chunkHash2 := uploadRandomChunk(snapshotManager, chunkSize) + chunkHash3 := uploadRandomChunk(snapshotManager, chunkSize) + + now := time.Now().Unix() + day := int64(24 * 3600) + t.Logf("Creating 2 snapshots") + createTestSnapshot(snapshotManager, "vm1@host1", 1, now - 3 * day - 3600, now - 3 * day - 60, []string { chunkHash1, chunkHash2}) + createTestSnapshot(snapshotManager, "vm1@host1", 2, now - 2 * day - 3600, now - 2 * day - 60, []string { chunkHash2, chunkHash3}) + checkTestSnapshots(snapshotManager, 2, 0) + + t.Logf("Removing snapshot vm1@host1 revision 1 without --exclusive") + snapshotManager.PruneSnapshots(testDir, "vm1@host1", "vm1@host1", []int{1}, []string{}, []string{}, false, false, []string{}, false, false, false) + checkTestSnapshots(snapshotManager, 1, 2) + + t.Logf("Creating 1 snapshot") + chunkHash4 := uploadRandomChunk(snapshotManager, chunkSize) + createTestSnapshot(snapshotManager, "vm1@host1", 4, now + 1 * day - 3600 , now + 1 * day, []string { chunkHash4, chunkHash1}) + checkTestSnapshots(snapshotManager, 2, 2) + + t.Logf("Prune without removing any snapshots -- one fossil will be resurrected") + snapshotManager.PruneSnapshots(testDir, "vm1@host1", "vm1@host1", []int{}, []string{}, []string{}, false, false, []string{}, false, false, false) + checkTestSnapshots(snapshotManager, 2, 0) +} + +func TestInactiveHostPrune(t *testing.T) { + + setTestingT(t) + + testDir := path.Join(os.TempDir(), "duplicacy_test", "snapshot_test") + + snapshotManager := createTestSnapshotManager(testDir) + + chunkSize := 1024 + chunkHash1 := uploadRandomChunk(snapshotManager, chunkSize) + chunkHash2 := uploadRandomChunk(snapshotManager, chunkSize) + chunkHash3 := uploadRandomChunk(snapshotManager, chunkSize) + chunkHash4 := uploadRandomChunk(snapshotManager, chunkSize) + + now := time.Now().Unix() + day := int64(24 * 3600) + t.Logf("Creating 3 snapshot") + createTestSnapshot(snapshotManager, "vm1@host1", 1, now - 3 * day - 3600, now - 3 * day - 60, []string { chunkHash1, chunkHash2} ) + createTestSnapshot(snapshotManager, "vm1@host1", 2, now - 2 * day - 3600, now - 2 * day - 60, []string { chunkHash2, chunkHash3} ) + // Host2 is inactive + createTestSnapshot(snapshotManager, "vm2@host2", 1, now - 7 * day - 3600, now - 7 * day - 60, []string { chunkHash3, chunkHash4} ) + checkTestSnapshots(snapshotManager, 3, 0) + + t.Logf("Removing snapshot vm1@host1 revision 1") + snapshotManager.PruneSnapshots(testDir, "vm1@host1", "vm1@host1", []int{1}, []string{}, []string{}, false, false, []string{}, false, false, false) + checkTestSnapshots(snapshotManager, 2, 2) + + t.Logf("Prune without removing any snapshots -- no fossils will be deleted") + snapshotManager.PruneSnapshots(testDir, "vm1@host1", "vm1@host1", []int{}, []string{}, []string{}, false, false, []string{}, false, false, false) + checkTestSnapshots(snapshotManager, 2, 2) + + t.Logf("Creating 1 snapshot") + chunkHash5 := uploadRandomChunk(snapshotManager, chunkSize) + createTestSnapshot(snapshotManager, "vm1@host1", 3, now + 1 * day - 3600 , now + 1 * day, []string { chunkHash4, chunkHash5} ) + checkTestSnapshots(snapshotManager, 3, 2) + + t.Logf("Prune without removing any snapshots -- fossils will be deleted") + snapshotManager.PruneSnapshots(testDir, "vm1@host1", "vm1@host1", []int{}, []string{}, []string{}, false, false, []string{}, false, false, false) + checkTestSnapshots(snapshotManager, 3, 0) +} + +func TestRetentionPolicy(t *testing.T) { + + setTestingT(t) + + testDir := path.Join(os.TempDir(), "duplicacy_test", "snapshot_test") + + snapshotManager := createTestSnapshotManager(testDir) + + chunkSize := 1024 + var chunkHashes [] string + for i := 0; i < 30; i++ { + chunkHashes = append(chunkHashes, uploadRandomChunk(snapshotManager, chunkSize)) + } + + now := time.Now().Unix() + day := int64(24 * 3600) + t.Logf("Creating 30 snapshots") + for i := 0; i < 30; i++ { + createTestSnapshot(snapshotManager, "vm1@host1", i + 1, now - int64(30 - i) * day - 3600, now - int64(30 - i) * day - 60, []string { chunkHashes[i] }) + } + + checkTestSnapshots(snapshotManager, 30, 0) + + t.Logf("Removing snapshot vm1@host1 0:20 with --exclusive") + snapshotManager.PruneSnapshots(testDir, "vm1@host1", "vm1@host1", []int{}, []string{}, []string{"0:20"}, false, true, []string{}, false, false, false) + checkTestSnapshots(snapshotManager, 19, 0) + + t.Logf("Removing snapshot vm1@host1 -k 0:20 with --exclusive") + snapshotManager.PruneSnapshots(testDir, "vm1@host1", "vm1@host1", []int{}, []string{}, []string{"0:20"}, false, true, []string{}, false, false, false) + checkTestSnapshots(snapshotManager, 19, 0) + + t.Logf("Removing snapshot vm1@host1 -k 3:14 -k 2:7 with --exclusive") + snapshotManager.PruneSnapshots(testDir, "vm1@host1", "vm1@host1", []int{}, []string{}, []string{"3:14", "2:7"}, false, true, []string{}, false, false, false) + checkTestSnapshots(snapshotManager, 12, 0) +} diff --git a/duplicacy_storage.go b/duplicacy_storage.go new file mode 100644 index 0000000..1ea632a --- /dev/null +++ b/duplicacy_storage.go @@ -0,0 +1,439 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "fmt" + "regexp" + "strings" + "strconv" + "os" + "net" + "path" + "io/ioutil" + "runtime" + + "golang.org/x/crypto/ssh" + "golang.org/x/crypto/ssh/agent" +) + +type Storage interface { + // ListFiles return the list of files and subdirectories under 'dir' (non-recursively) + ListFiles(threadIndex int, dir string) (files []string, size []int64, err error) + + // DeleteFile deletes the file or directory at 'filePath'. + DeleteFile(threadIndex int, filePath string) (err error) + + // MoveFile renames the file. + MoveFile(threadIndex int, from string, to string) (err error) + + // CreateDirectory creates a new directory. + CreateDirectory(threadIndex int, dir string) (err error) + + // GetFileInfo returns the information about the file or directory at 'filePath'. + GetFileInfo(threadIndex int, filePath string) (exist bool, isDir bool, size int64, err error) + + // FindChunk finds the chunk with the specified id. If 'isFossil' is true, it will search for chunk files with + // the suffix '.fsl'. + FindChunk(threadIndex int, chunkID string, isFossil bool) (filePath string, exist bool, size int64, err error) + + // DownloadFile reads the file at 'filePath' into the chunk. + DownloadFile(threadIndex int, filePath string, chunk *Chunk) (err error) + + // UploadFile writes 'content' to the file at 'filePath'. + UploadFile(threadIndex int, filePath string, content []byte) (err error) + + // If a local snapshot cache is needed for the storage to avoid downloading/uploading chunks too often when + // managing snapshots. + IsCacheNeeded() (bool) + + // If the 'MoveFile' method is implemented. + IsMoveFileImplemented() (bool) + + // If the storage can guarantee strong consistency. + IsStrongConsistent() (bool) + + // If the storage supports fast listing of files names. + IsFastListing() (bool) + + // Enable the test mode. + EnableTestMode() + + // Set the maximum transfer speeds. + SetRateLimits(downloadRateLimit int, uploadRateLimit int) +} + +type RateLimitedStorage struct { + DownloadRateLimit int + UploadRateLimit int +} + +func (storage *RateLimitedStorage) SetRateLimits(downloadRateLimit int, uploadRateLimit int) { + storage.DownloadRateLimit = downloadRateLimit + storage.UploadRateLimit = uploadRateLimit +} + +func checkHostKey(repository string, hostname string, remote net.Addr, key ssh.PublicKey) error { + + if len(repository) == 0 { + return nil + } + + duplicacyDirectory := path.Join(repository, DUPLICACY_DIRECTORY) + hostFile := path.Join(duplicacyDirectory, "knowns_hosts") + file, err := os.OpenFile(hostFile, os.O_RDWR | os.O_CREATE, 0600) + if err != nil { + return err + } + + defer file.Close() + content, err := ioutil.ReadAll(file) + if err != nil { + return err + } + + lineRegex := regexp.MustCompile(`^([^\s]+)\s+(.+)`) + + keyString := string(ssh.MarshalAuthorizedKey(key)) + keyString = strings.Replace(keyString, "\n", "", -1) + remoteAddress := remote.String() + if strings.HasSuffix(remoteAddress, ":22") { + remoteAddress = remoteAddress[:len(remoteAddress) - len(":22")] + } + + for i, line := range strings.Split(string(content), "\n") { + matched := lineRegex.FindStringSubmatch(line) + if matched == nil { + continue + } + + if matched[1] == remote.String() { + if keyString != matched[2] { + LOG_WARN("HOSTKEY_OLD", "The existing key for '%s' is %s (file %s, line %d)", + remote.String(), matched[2], hostFile, i) + LOG_WARN("HOSTKEY_NEW", "The new key is '%s'", keyString) + return fmt.Errorf("The host key for '%s' has changed", remote.String()) + } else { + return nil + } + } + } + + file.Write([]byte(remote.String() + " " + keyString + "\n")) + return nil +} + +// CreateStorage creates a storage object based on the provide storage URL. +func CreateStorage(repository string, preference Preference, resetPassword bool, threads int) (storage Storage) { + + storageURL := preference.StorageURL + + isFileStorage := false + + if strings.HasPrefix(storageURL, "/") { + isFileStorage = true + } else if runtime.GOOS == "windows" { + if len(storageURL) > 3 && storageURL[1] == ':' && (storageURL[2] == '/' || storageURL[2] == '\\') { + volume := strings.ToLower(storageURL[:1]) + if volume[0] >= 'a' && volume[0] <= 'z' { + isFileStorage = true + } + } + + if !isFileStorage && strings.HasPrefix(storageURL, `\\`) { + isFileStorage = true + } + } + + if isFileStorage { + fileStorage, err := CreateFileStorage(storageURL, threads) + if err != nil { + LOG_ERROR("STORAGE_CREATE", "Failed to load the file storage at %s: %v", storageURL, err) + return nil + } + return fileStorage + } + + urlRegex := regexp.MustCompile(`^(\w+)://([\w\-]+@)?([^/]+)(/(.+))?`) + + matched := urlRegex.FindStringSubmatch(storageURL) + + if matched == nil { + LOG_ERROR("STORAGE_CREATE", "Unrecognizable storage URL: %s", storageURL) + return nil + } else if matched[1] == "sftp" { + server := matched[3] + username := matched[2] + storageDir := matched[5] + port := 22 + + if strings.Contains(server, ":") { + index := strings.Index(server, ":") + port, _ = strconv.Atoi(server[index + 1:]) + server = server[:index] + } + + if storageDir == "" { + LOG_ERROR("STORAGE_CREATE", "The SFTP storage directory can't be empty") + return nil + } + + if username != "" { + username = username[:len(username) - 1] + } + + password := "" + passwordCallback := func() (string, error) { + LOG_DEBUG("SSH_PASSWORD", "Attempting password login") + password = GetPassword(preference, "ssh_password", "Enter SSH password:", false, resetPassword) + return password, nil + } + + keyboardInteractive := func (user, instruction string, questions []string, echos []bool) (answers []string, + err error) { + if len(questions) == 1 { + LOG_DEBUG("SSH_INTERACTIVE", "Attempting keyboard interactive login") + password = GetPassword(preference, "ssh_password", "Enter SSH password:", false, resetPassword) + answers = []string { password } + return answers, nil + } else { + return nil, nil + } + } + + keyFile := "" + publicKeysCallback := func() ([]ssh.Signer, error) { + LOG_DEBUG("SSH_PUBLICKEY", "Attempting public key authentication") + + signers := []ssh.Signer {} + + agentSock := os.Getenv("SSH_AUTH_SOCK") + if agentSock != "" { + connection, err := net.Dial("unix", agentSock) + // TODO: looks like we need to close the connection + if err == nil { + LOG_DEBUG("SSH_AGENT", "Attempting public key authentication via agent") + sshAgent := agent.NewClient(connection) + signers, err = sshAgent.Signers() + if err != nil { + LOG_DEBUG("SSH_AGENT", "Can't log in using public key authentication via agent: %v", err) + } + } + } + + keyFile = GetPassword(preference, "ssh_key_file", "Enter the path of the private key file:", + true, resetPassword) + + var key ssh.Signer + var err error + + if keyFile == "" { + LOG_INFO("SSH_PUBLICKEY", "No private key file is provided") + } else { + var content []byte + content, err = ioutil.ReadFile(keyFile) + if err != nil { + LOG_INFO("SSH_PUBLICKEY", "Failed to read the private key file: %v", err) + } else { + key, err = ssh.ParsePrivateKey(content) + if err != nil { + LOG_INFO("SSH_PUBLICKEY", "Failed to parse the private key file %s: %v", keyFile, err) + } + } + } + + if key != nil { + signers = append(signers, key) + } + + if len(signers) > 0 { + return signers, nil + } else { + return nil, err + } + + } + + authMethods := [] ssh.AuthMethod { + ssh.PasswordCallback(passwordCallback), + ssh.KeyboardInteractive(keyboardInteractive), + ssh.PublicKeysCallback(publicKeysCallback), + } + + if RunInBackground { + + passwordKey := "ssh_password" + keyFileKey := "ssh_key_file" + if preference.Name != "default" { + passwordKey = preference.Name + "_" + passwordKey + keyFileKey = preference.Name + "_" + keyFileKey + } + + authMethods = [] ssh.AuthMethod {} + if keyringGet(passwordKey) != "" { + authMethods = append(authMethods, ssh.PasswordCallback(passwordCallback)) + authMethods = append(authMethods, ssh.KeyboardInteractive(keyboardInteractive)) + } + if keyringGet(keyFileKey) != "" || os.Getenv("SSH_AUTH_SOCK") != "" { + authMethods = append(authMethods, ssh.PublicKeysCallback(publicKeysCallback)) + } + } + + hostKeyChecker := func(hostname string, remote net.Addr, key ssh.PublicKey) error { + return checkHostKey(repository, hostname, remote, key) + } + + sftpStorage, err := CreateSFTPStorage(server, port, username, storageDir, authMethods, hostKeyChecker, threads) + if err != nil { + LOG_ERROR("STORAGE_CREATE", "Failed to load the SFTP storage at %s: %v", storageURL, err) + return nil + } + + if keyFile != "" { + SavePassword(preference, "ssh_key_file", keyFile) + } else if password != "" { + SavePassword(preference, "ssh_password", password) + } + return sftpStorage + } else if matched[1] == "s3" { + + // urlRegex := regexp.MustCompile(`^(\w+)://([\w\-]+@)?([^/]+)(/(.+))?`) + + region := matched[2] + endpoint := matched[3] + bucket := matched[5] + + if region != "" { + region = region[:len(region) - 1] + } + + if strings.EqualFold(endpoint, "amazon") || strings.EqualFold(endpoint, "amazon.com") { + endpoint = "" + } + + storageDir := "" + if strings.Contains(bucket, "/") { + firstSlash := strings.Index(bucket, "/") + storageDir = bucket[firstSlash + 1:] + bucket = bucket[:firstSlash] + } + + accessKey := GetPassword(preference, "s3_id", "Enter S3 Access Key ID:", true, resetPassword) + secretKey := GetPassword(preference, "s3_secret", "Enter S3 Secret Access Key:", true, resetPassword) + + s3Storage, err := CreateS3Storage(region, endpoint, bucket, storageDir, accessKey, secretKey, threads) + if err != nil { + LOG_ERROR("STORAGE_CREATE", "Failed to load the S3 storage at %s: %v", storageURL, err) + return nil + } + SavePassword(preference, "s3_id", accessKey) + SavePassword(preference, "s3_secret", secretKey) + + return s3Storage + } else if matched[1] == "dropbox" { + storageDir := matched[3] + matched[5] + token := GetPassword(preference, "dropbox_token", "Enter Dropbox access token:", true, resetPassword) + dropboxStorage, err := CreateDropboxStorage(token, storageDir, threads) + if err != nil { + LOG_ERROR("STORAGE_CREATE", "Failed to load the dropbox storage: %v", err) + return nil + } + SavePassword(preference, "dropbox_token", token) + return dropboxStorage + } else if matched[1] == "b2" { + bucket := matched[3] + + accountID := GetPassword(preference, "b2_id", "Enter Backblaze Account ID:", true, resetPassword) + applicationKey := GetPassword(preference, "b2_key", "Enter Backblaze Application Key:", true, resetPassword) + + b2Storage, err := CreateB2Storage(accountID, applicationKey, bucket, threads) + if err != nil { + LOG_ERROR("STORAGE_CREATE", "Failed to load the Backblaze B2 storage at %s: %v", storageURL, err) + return nil + } + SavePassword(preference, "b2_id", accountID) + SavePassword(preference, "b2_key", applicationKey) + return b2Storage + } else if matched[1] == "azure" { + account := matched[3] + container := matched[5] + + if container == "" { + LOG_ERROR("STORAGE_CREATE", "The container name for the Azure storage can't be empty") + return nil + } + + prompt := fmt.Sprintf("Enter the Access Key for the Azure storage account %s:", account) + accessKey := GetPassword(preference, "azure_key", prompt, true, resetPassword) + + azureStorage, err := CreateAzureStorage(account, accessKey, container, threads) + if err != nil { + LOG_ERROR("STORAGE_CREATE", "Failed to load the Azure storage at %s: %v", storageURL, err) + return nil + } + SavePassword(preference, "azure_key", accessKey) + return azureStorage + } else if matched[1] == "acd" { + storagePath := matched[3] + matched[4] + prompt := fmt.Sprintf("Enter the path of the Amazon Cloud Drive token file (downloadable from https://duplicacy.com/acd_start):") + tokenFile := GetPassword(preference, "acd_token", prompt, true, resetPassword) + acdStorage, err := CreateACDStorage(tokenFile, storagePath, threads) + if err != nil { + LOG_ERROR("STORAGE_CREATE", "Failed to load the Amazon Cloud Drive storage at %s: %v", storageURL, err) + return nil + } + SavePassword(preference, "acd_token", tokenFile) + return acdStorage + } else if matched[1] == "gcs" { + bucket := matched[3] + storageDir := matched[5] + prompt := fmt.Sprintf("Enter the path of the Google Cloud Storage token file (downloadable from https://duplicacy.com/gcs_start) or the service account credential file:") + tokenFile := GetPassword(preference, "gcs_token", prompt, true, resetPassword) + gcsStorage, err := CreateGCSStorage(tokenFile, bucket, storageDir, threads) + if err != nil { + LOG_ERROR("STORAGE_CREATE", "Failed to load the Google Cloud Storage backend at %s: %v", storageURL, err) + return nil + } + SavePassword(preference, "gcs_token", tokenFile) + return gcsStorage + } else if matched[1] == "gcd" { + storagePath := matched[3] + matched[4] + prompt := fmt.Sprintf("Enter the path of the Google Drive token file (downloadable from https://duplicacy.com/gcd_start):") + tokenFile := GetPassword(preference, "gcd_token", prompt, true, resetPassword) + gcdStorage, err := CreateGCDStorage(tokenFile, storagePath, threads) + if err != nil { + LOG_ERROR("STORAGE_CREATE", "Failed to load the Google Drive storage at %s: %v", storageURL, err) + return nil + } + SavePassword(preference, "gcd_token", tokenFile) + return gcdStorage + } else if matched[1] == "one" { + storagePath := matched[3] + matched[4] + prompt := fmt.Sprintf("Enter the path of the OneDrive token file (downloadable from https://duplicacy.com/one_start):") + tokenFile := GetPassword(preference, "one_token", prompt, true, resetPassword) + oneDriveStorage, err := CreateOneDriveStorage(tokenFile, storagePath, threads) + if err != nil { + LOG_ERROR("STORAGE_CREATE", "Failed to load the OneDrive storage at %s: %v", storageURL, err) + return nil + } + SavePassword(preference, "one_token", tokenFile) + return oneDriveStorage + } else if matched[1] == "hubic" { + storagePath := matched[3] + matched[4] + prompt := fmt.Sprintf("Enter the path of the Hubic token file (downloadable from https://duplicacy.com/hubic_start):") + tokenFile := GetPassword(preference, "hubic_token", prompt, true, resetPassword) + hubicStorage, err := CreateHubicStorage(tokenFile, storagePath, threads) + if err != nil { + LOG_ERROR("STORAGE_CREATE", "Failed to load the Hubic storage at %s: %v", storageURL, err) + return nil + } + SavePassword(preference, "hubic_token", tokenFile) + return hubicStorage + } else { + LOG_ERROR("STORAGE_CREATE", "The storage type '%s' is not supported", matched[1]) + return nil + } + +} diff --git a/duplicacy_storage_test.go b/duplicacy_storage_test.go new file mode 100644 index 0000000..194b6bd --- /dev/null +++ b/duplicacy_storage_test.go @@ -0,0 +1,450 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "os" + "fmt" + "time" + "flag" + "path" + "testing" + "strings" + "strconv" + "io/ioutil" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "runtime/debug" + + crypto_rand "crypto/rand" + "math/rand" +) + +var testStorageName string +var testRateLimit int +var testQuickMode bool +var testThreads int +var testFixedChunkSize bool + +func init() { + flag.StringVar(&testStorageName, "storage", "", "the test storage to use") + flag.IntVar(&testRateLimit, "limit-rate", 0, "maximum transfer speed in kbytes/sec") + flag.BoolVar(&testQuickMode, "quick", false, "quick test") + flag.IntVar(&testThreads, "threads", 1, "number of downloading/uploading threads") + flag.BoolVar(&testFixedChunkSize, "fixed-chunk-size", false, "fixed chunk size") + flag.Parse() +} + +func loadStorage(localStoragePath string, threads int) (Storage, error) { + + if testStorageName == "" || testStorageName == "file" { + return CreateFileStorage(localStoragePath, threads) + } + + config, err := ioutil.ReadFile("test_storage.conf") + if err != nil { + return nil, err + } + + storages := make(map[string]map[string]string) + + err = json.Unmarshal(config, &storages) + if err != nil { + return nil, err + } + + storage, found := storages[testStorageName] + if !found { + return nil, fmt.Errorf("No storage named '%s' found", testStorageName) + } + + if testStorageName == "sftp" { + port, _ := strconv.Atoi(storage["port"]) + return CreateSFTPStorageWithPassword(storage["server"], port, storage["username"], storage["directory"], storage["password"], threads) + } else if testStorageName == "s3" { + return CreateS3Storage(storage["region"], storage["endpoint"], storage["bucket"], storage["directory"], storage["access_key"], storage["secret_key"], threads) + } else if testStorageName == "dropbox" { + return CreateDropboxStorage(storage["token"], storage["directory"], threads) + } else if testStorageName == "b2" { + return CreateB2Storage(storage["account"], storage["key"], storage["bucket"], threads) + } else if testStorageName == "gcs-s3" { + return CreateS3Storage(storage["region"], storage["endpoint"], storage["bucket"], storage["directory"], storage["access_key"], storage["secret_key"], threads) + } else if testStorageName == "gcs" { + return CreateGCSStorage(storage["token_file"], storage["bucket"], storage["directory"], threads) + } else if testStorageName == "gcs-sa" { + return CreateGCSStorage(storage["token_file"], storage["bucket"], storage["directory"], threads) + } else if testStorageName == "azure" { + return CreateAzureStorage(storage["account"], storage["key"], storage["container"], threads) + } else if testStorageName == "acd" { + return CreateACDStorage(storage["token_file"], storage["storage_path"], threads) + } else if testStorageName == "gcd" { + return CreateGCDStorage(storage["token_file"], storage["storage_path"], threads) + } else if testStorageName == "one" { + return CreateOneDriveStorage(storage["token_file"], storage["storage_path"], threads) + } else if testStorageName == "hubic" { + return CreateHubicStorage(storage["token_file"], storage["storage_path"], threads) + } else { + return nil, fmt.Errorf("Invalid storage named: %s", testStorageName) + } +} + +func cleanStorage(storage Storage) { + + directories := make([]string, 0, 1024) + snapshots := make([]string, 0, 1024) + + directories = append(directories, "snapshots/") + + LOG_INFO("STORAGE_LIST", "Listing snapshots in the storage") + for len(directories) > 0 { + + dir := directories[len(directories) - 1] + directories = directories[:len(directories) - 1] + + files, _, err := storage.ListFiles(0, dir) + if err != nil { + LOG_ERROR("STORAGE_LIST", "Failed to list the directory %s: %v", dir, err) + return + } + + for _, file := range files { + if len(file) > 0 && file[len(file) - 1] == '/' { + directories = append(directories, dir + file) + } else { + snapshots = append(snapshots, dir + file) + } + } + } + + LOG_INFO("STORAGE_DELETE", "Deleting %d snapshots in the storage", len(snapshots)) + for _, snapshot := range snapshots { + storage.DeleteFile(0, snapshot) + } + + for _, chunk := range listChunks(storage) { + storage.DeleteFile(0, "chunks/" + chunk) + } + + storage.DeleteFile(0, "config") + + return +} + +func listChunks(storage Storage) (chunks []string) { + + directories := make([]string, 0, 1024) + + directories = append(directories, "chunks/") + + for len(directories) > 0 { + + dir := directories[len(directories) - 1] + directories = directories[:len(directories) - 1] + + files, _, err := storage.ListFiles(0, dir) + if err != nil { + LOG_ERROR("CHUNK_LIST", "Failed to list the directory %s: %v", dir, err) + return nil + } + + for _, file := range files { + if len(file) > 0 && file[len(file) - 1] == '/' { + directories = append(directories, dir + file) + } else { + chunk := dir + file + chunk = chunk[len("chunks/"):] + chunks = append(chunks, chunk) + } + } + } + + return +} + +func moveChunk(t *testing.T, storage Storage, chunkID string, isFossil bool, delay int) { + + filePath, exist, _, err := storage.FindChunk(0, chunkID, isFossil) + + if err != nil { + t.Errorf("Error find chunk %s: %v", chunkID, err) + return + } + + to := filePath + ".fsl" + if isFossil { + to = filePath[:len(filePath) - len(".fsl")] + } + + err = storage.MoveFile(0, filePath, to) + if err != nil { + t.Errorf("Error renaming file %s to %s: %v", filePath, to, err) + } + + time.Sleep(time.Duration(delay) * time.Second) + + _, exist, _, err = storage.FindChunk(0, chunkID, isFossil) + if err != nil { + t.Errorf("Error get file info for chunk %s: %v", chunkID, err) + } + + if exist { + t.Errorf("File %s still exists after renaming", filePath) + } + + _, exist, _, err = storage.FindChunk(0, chunkID, !isFossil) + if err != nil { + t.Errorf("Error get file info for %s: %v", to, err) + } + + if !exist { + t.Errorf("File %s doesn't exist", to) + } + +} + +func TestStorage(t *testing.T) { + + rand.Seed(time.Now().UnixNano()) + setTestingT(t) + SetLoggingLevel(INFO) + + defer func() { + if r := recover(); r != nil { + switch e := r.(type) { + case Exception: + t.Errorf("%s %s", e.LogID, e.Message) + debug.PrintStack() + default: + t.Errorf("%v", e) + debug.PrintStack() + } + } + } () + + testDir := path.Join(os.TempDir(), "duplicacy_test", "storage_test") + os.RemoveAll(testDir) + os.MkdirAll(testDir, 0700) + + LOG_INFO("STORAGE_TEST", "storage: %s", testStorageName) + + storage, err := loadStorage(testDir, 1) + if err != nil { + t.Errorf("Failed to create storage: %v", err) + return + } + storage.EnableTestMode() + storage.SetRateLimits(testRateLimit, testRateLimit) + + delay := 0 + if _, ok := storage.(*ACDStorage); ok { + delay = 5 + } + if _, ok := storage.(*HubicStorage); ok { + delay = 2 + } + + for _, dir := range []string { "chunks", "snapshots" } { + err = storage.CreateDirectory(0, dir) + if err != nil { + t.Errorf("Failed to create directory %s: %v", dir, err) + return + } + } + + storage.CreateDirectory(0, "snapshots/repository1") + storage.CreateDirectory(0, "snapshots/repository2") + time.Sleep(time.Duration(delay) * time.Second) + { + + // Upload fake snapshot files so that for storages having no concept of directories, + // ListFiles("snapshots") still returns correct snapshot IDs. + + // Create a random file not a text file to make ACD Storage happy. + content := make([]byte, 100) + _, err = crypto_rand.Read(content) + if err != nil { + t.Errorf("Error generating random content: %v", err) + return + } + + err = storage.UploadFile(0, "snapshots/repository1/1", content) + if err != nil { + t.Errorf("Error to upload snapshots/repository1/1: %v", err) + } + + err = storage.UploadFile(0, "snapshots/repository2/1", content) + if err != nil { + t.Errorf("Error to upload snapshots/repository2/1: %v", err) + } + } + + time.Sleep(time.Duration(delay) * time.Second) + + snapshotDirs, _, err := storage.ListFiles(0, "snapshots/") + if err != nil { + t.Errorf("Failed to list snapshot ids: %v", err) + return + } + + snapshotIDs := []string {} + for _, snapshotDir := range snapshotDirs { + if len(snapshotDir) > 0 && snapshotDir[len(snapshotDir) - 1] == '/' { + snapshotIDs = append(snapshotIDs, snapshotDir[:len(snapshotDir) - 1]) + } + } + + if len(snapshotIDs) < 2 { + t.Errorf("Snapshot directories not created") + return + } + + for _, snapshotID := range snapshotIDs { + snapshots, _, err := storage.ListFiles(0, "snapshots/" + snapshotID) + if err != nil { + t.Errorf("Failed to list snapshots for %s: %v", snapshotID, err) + return + } + for _, snapshot := range snapshots { + storage.DeleteFile(0, "snapshots/" + snapshotID + "/" + snapshot) + } + } + + time.Sleep(time.Duration(delay) * time.Second) + + storage.DeleteFile(0, "config") + + for _, file := range []string { "snapshots/repository1/1", "snapshots/repository2/1"} { + exist, _, _, err := storage.GetFileInfo(0, file) + if err != nil { + t.Errorf("Failed to get file info for %s: %v", file, err) + return + } + if exist { + t.Errorf("File %s still exists after deletion", file) + return + } + } + + numberOfFiles := 20 + maxFileSize := 64 * 1024 + + if testQuickMode { + numberOfFiles = 2 + } + + chunks := []string{} + + for i := 0; i < numberOfFiles; i++ { + content := make([]byte, rand.Int() % maxFileSize + 1) + _, err = crypto_rand.Read(content) + if err != nil { + t.Errorf("Error generating random content: %v", err) + return + } + + hasher := sha256.New() + hasher.Write(content) + chunkID := hex.EncodeToString(hasher.Sum(nil)) + chunks = append(chunks, chunkID) + + filePath, exist, _, err := storage.FindChunk(0, chunkID, false) + if err != nil { + t.Errorf("Failed to list the chunk %s: %v", chunkID, err) + return + } + if exist { + t.Errorf("Chunk %s already exists", chunkID) + } + + err = storage.UploadFile(0, filePath, content) + if err != nil { + t.Errorf("Failed to upload the file %s: %v", filePath, err) + return + } + LOG_INFO("STORAGE_CHUNK", "Uploaded chunk: %s, size: %d", chunkID, len(content)) + } + + allChunks := [] string {} + for _, file := range listChunks(storage) { + file = strings.Replace(file, "/", "", -1) + if len(file) == 64 { + allChunks = append(allChunks, file) + } + } + + LOG_INFO("STORAGE_FOSSIL", "Making %s a fossil", chunks[0]) + moveChunk(t, storage, chunks[0], false, delay) + LOG_INFO("STORAGE_FOSSIL", "Making %s a chunk", chunks[0]) + moveChunk(t, storage, chunks[0], true, delay) + + config := CreateConfig() + config.MinimumChunkSize = 100 + config.chunkPool = make(chan *Chunk, numberOfFiles * 2) + + chunk := CreateChunk(config, true) + + + for _, chunkID := range chunks { + + chunk.Reset(false) + filePath, exist, _, err := storage.FindChunk(0, chunkID, false) + if err != nil { + t.Errorf("Error getting file info for chunk %s: %v", chunkID, err) + continue + } else if !exist { + t.Errorf("Chunk %s does not exist", chunkID) + continue + } else { + err = storage.DownloadFile(0, filePath, chunk) + if err != nil { + t.Errorf("Error downloading file %s: %v", filePath, err) + continue + } + LOG_INFO("STORAGE_CHUNK", "Downloaded chunk: %s, size: %d", chunkID, chunk.GetLength()) + } + + hasher := sha256.New() + hasher.Write(chunk.GetBytes()) + hash := hex.EncodeToString(hasher.Sum(nil)) + + if hash != chunkID { + t.Errorf("File %s, hash %s, size %d", chunkID, hash, chunk.GetBytes()) + } + } + + LOG_INFO("STORAGE_FOSSIL", "Making %s a fossil", chunks[1]) + moveChunk(t, storage, chunks[1], false, delay) + + filePath, exist, _, err := storage.FindChunk(0, chunks[1], true) + if err != nil { + t.Errorf("Error getting file info for fossil %s: %v", chunks[1], err) + } else if !exist { + t.Errorf("Fossil %s does not exist", chunks[1]) + } else { + err = storage.DeleteFile(0, filePath) + if err != nil { + t.Errorf("Failed to delete file %s: %v", filePath) + } else { + time.Sleep(time.Duration(delay) * time.Second) + filePath, exist, _, err = storage.FindChunk(0, chunks[1], true) + if err != nil { + t.Errorf("Error get file info for deleted fossil %s: %v", chunks[1], err) + } else if exist { + t.Errorf("Fossil %s still exists after deletion", chunks[1]) + } + } + } + + for _, file := range allChunks { + + err = storage.DeleteFile(0, "chunks/" + file) + if err != nil { + t.Errorf("Failed to delete the file %s: %v", file, err) + return + } + } + +} diff --git a/duplicacy_utils.go b/duplicacy_utils.go new file mode 100644 index 0000000..899adbd --- /dev/null +++ b/duplicacy_utils.go @@ -0,0 +1,391 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "fmt" + "os" + "bufio" + "io" + "io/ioutil" + "time" + "path" + "path/filepath" + "regexp" + "strings" + "strconv" + "runtime" + "crypto/sha256" + + "golang.org/x/crypto/pbkdf2" + "github.com/gilbertchen/gopass" +) + +var RunInBackground bool = false + +type RateLimitedReader struct { + Content []byte + Rate float64 + Next int + StartTime time.Time +} + +func CreateRateLimitedReader(content []byte, rate int) (*RateLimitedReader) { + return &RateLimitedReader { + Content: content, + Rate: float64(rate * 1024), + Next: 0, + } +} + +func (reader *RateLimitedReader) Length() (int64) { + return int64(len(reader.Content)) +} + +func (reader *RateLimitedReader) Reset() { + reader.Next = 0 +} + + +func (reader *RateLimitedReader) Read(p []byte) (n int, err error) { + + if reader.Next >= len(reader.Content) { + return 0, io.EOF + } + + if reader.Rate <= 0 { + n := copy(p, reader.Content[reader.Next:]) + reader.Next += n + if reader.Next >= len(reader.Content) { + return n, io.EOF + } + return n, nil + } + + if reader.StartTime.IsZero() { + reader.StartTime = time.Now() + } + + elapsed := time.Since(reader.StartTime).Seconds() + delay := float64(reader.Next) / reader.Rate - elapsed + end := reader.Next + int(reader.Rate / 5) + if delay > 0 { + time.Sleep(time.Duration(delay * float64(time.Second))) + } else { + end += - int(delay * reader.Rate) + } + + if end > len(reader.Content) { + end = len(reader.Content) + } + + n = copy(p, reader.Content[reader.Next : end]) + reader.Next += n + return n, nil +} + +func RateLimitedCopy(writer io.Writer, reader io.Reader, rate int) (written int64, err error) { + if rate <= 0 { + return io.Copy(writer, reader) + } + for range time.Tick(time.Second / 5) { + n, err := io.CopyN(writer, reader, int64(rate * 1024 / 5)) + written += n + if err != nil { + if err == io.EOF { + return written, nil + } else { + return written, err + } + } + } + return written, nil +} + +// GenerateKeyFromPassword generates a key from the password. +func GenerateKeyFromPassword(password string) []byte { + return pbkdf2.Key([]byte(password), DEFAULT_KEY, 16384, 32, sha256.New) +} + +// GetPassword attempts to get the password from KeyChain/KeyRing, environment variables, or keyboard input. +func GetPassword(preference Preference, passwordType string, prompt string, + showPassword bool, resetPassword bool) (string) { + + passwordID := passwordType + if preference.Name != "default" { + passwordID = preference.Name + "_" + passwordID + } + + { + name := strings.ToUpper("duplicacy_" + passwordID) + LOG_DEBUG("PASSWORD_ENV_VAR", "Reading the environment variable %s", name) + if password, found := os.LookupEnv(name); found && password != "" { + return password + } + } + + if len(preference.Keys) > 0 && len(preference.Keys[passwordID]) > 0 { + LOG_DEBUG("PASSWORD_KEYCHAIN", "Reading %s from preferences", passwordID) + return preference.Keys[passwordID] + } + + if resetPassword && !RunInBackground { + keyringSet(passwordID, "") + } else { + password := keyringGet(passwordID) + if password != "" { + return password + } + + if RunInBackground { + LOG_INFO("PASSWORD_MISSING", "%s is not found in Keychain/Keyring", passwordID) + return "" + } + + } + + password := "" + fmt.Printf("%s", prompt) + if showPassword { + scanner := bufio.NewScanner(os.Stdin) + scanner.Scan() + password = scanner.Text() + } else { + passwordInBytes, err := gopass.GetPasswdMasked() + if err != nil { + LOG_ERROR("PASSWORD_READ", "Failed to read the password: %v", err) + return "" + } + password = string(passwordInBytes) + } + + return password +} + +// SavePassword saves the specified password in the keyring/keychain. +func SavePassword(preference Preference, passwordType string, password string) { + if password == "" || RunInBackground { + return + } + + if preference.DoNotSavePassword { + return + } + passwordID := passwordType + if preference.Name != "default" { + passwordID = preference.Name + "_" + passwordID + } + keyringSet(passwordID, password) +} + +// RemoveEmptyDirectories remove all empty subdirectoreies under top. +func RemoveEmptyDirectories(top string) { + + stack := make([]string, 0, 256) + + stack = append(stack, top) + + for len(stack) > 0 { + + dir := stack[len(stack) - 1] + stack = stack[:len(stack) - 1] + + files, err := ioutil.ReadDir(dir) + + if err != nil { + continue + } + + for _, file := range files { + if file.IsDir() && file.Name()[0] != '.' { + stack = append(stack, path.Join(dir, file.Name())) + } + } + + if len(files) == 0 { + if os.Remove(dir) != nil { + continue + } + + dir = path.Dir(dir) + for (len(dir) > len(top)) { + files, err := ioutil.ReadDir(dir) + if err != nil { + break + } + + if len(files) == 0 { + if os.Remove(dir) != nil { + break; + } + } + dir = path.Dir(dir) + } + } + } +} + + +// The following code was modified from the online article 'Matching Wildcards: An Algorithm', by Kirk J. Krauss, +// Dr. Dobb's, August 26, 2008. However, the version in the article doesn't handle cases like matching 'abcccd' +// against '*ccd', and the version here fixed that issue. +// +func matchPattern(text string, pattern string) bool { + + textLength := len(text) + patternLength := len(pattern) + afterLastWildcard := 0 + afterLastMatched := 0 + + t := 0 + p := 0 + + for { + if t >= textLength { + if p >= patternLength { + return true // "x" matches "x" + } else if pattern[p] == '*' { + p++ + continue // "x*" matches "x" or "xy" + } + return false // "x" doesn't match "xy" + } + + w := byte(0) + if p < patternLength { + w = pattern[p] + } + + if text[t] != w { + if w == '?' { + t++ + p++ + continue + } else if w == '*' { + p++ + afterLastWildcard = p + if p >= patternLength { + return true + } + } else if afterLastWildcard > 0 { + p = afterLastWildcard + t = afterLastMatched + t++ + } else { + return false + } + + for t < textLength && text[t] != pattern[p] && pattern[p] != '?' { + t++ + } + + if t >= textLength { + return false + } + afterLastMatched = t + } + t++ + p++ + } + +} + +// MatchPath returns 'true' if the file 'filePath' is excluded by the specified 'patterns'. Each pattern starts with +// either '+' or '-', whereas '-' indicates exclusion and '+' indicates inclusion. Wildcards like '*' and '?' may +// appear in the patterns. In case no matching pattern is found, the file will be excluded if all patterns are +// include patterns, and included otherwise. +func MatchPath(filePath string, patterns [] string) (included bool) { + + allIncludes := true + for _, pattern := range patterns { + + if pattern[0] == '+' { + if matchPattern(filePath, pattern[1:]) { + return true + } + } else if pattern[0] == '-' { + allIncludes = false + if matchPattern(filePath, pattern[1:]) { + return false + } + } + } + + return !allIncludes +} + +func joinPath(components ...string) string { + + combinedPath := path.Join(components...) + if len(combinedPath) > 257 && runtime.GOOS == "windows" { + combinedPath = `\\?\` + filepath.Join(components...) + } + return combinedPath +} + +func PrettyNumber(number int64) (string) { + + G := int64(1024 * 1024 * 1024) + M := int64(1024 * 1024) + K := int64(1024) + + if number > 1000 * G { + return fmt.Sprintf("%dG", number / G) + } else if number > G { + return fmt.Sprintf("%d,%03dM", number / (1000 * M), (number / M) % 1000) + } else if number > M { + return fmt.Sprintf("%d,%03dK", number / (1000 * K), (number / K) % 1000) + } else if number > K { + return fmt.Sprintf("%dK", number / K) + } else { + return fmt.Sprintf("%d", number) + } +} + +func PrettySize(size int64) (string) { + if size > 1024 * 1024 { + return fmt.Sprintf("%.2fM", float64(size) / (1024.0 * 1024.0)) + } else if size > 1024 { + return fmt.Sprintf("%.0fK", float64(size) / 1024.0) + } else { + return fmt.Sprintf("%d", size) + } +} + +func PrettyTime(seconds int64) (string) { + + day := int64(3600 * 24) + + if seconds > day * 2 { + return fmt.Sprintf("%d days %02d:%02d:%02d", + seconds / day, (seconds % day) / 3600, (seconds % 3600) / 60, seconds % 60) + } else if seconds > day { + return fmt.Sprintf("1 day %02d:%02d:%02d", (seconds % day) / 3600, (seconds % 3600) / 60, seconds % 60) + } else if seconds > 0 { + return fmt.Sprintf("%02d:%02d:%02d", seconds / 3600, (seconds % 3600) / 60, seconds % 60) + } else { + return "n/a" + } +} + +func AtoSize(sizeString string) (int) { + sizeString = strings.ToLower(sizeString) + + sizeRegex := regexp.MustCompile(`^([0-9]+)([mk])?$`) + matched := sizeRegex.FindStringSubmatch(sizeString) + if matched == nil { + return 0 + } + + size, _ := strconv.Atoi(matched[1]) + + if matched[2] == "m" { + size *= 1024 * 1024 + } else if matched[2] == "k" { + size *= 1024 + } + + return size +} diff --git a/duplicacy_utils_others.go b/duplicacy_utils_others.go new file mode 100644 index 0000000..172ba97 --- /dev/null +++ b/duplicacy_utils_others.go @@ -0,0 +1,85 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +// +build !windows + +package duplicacy + +import ( + "os" + "bytes" + "syscall" + "path/filepath" + + "github.com/gilbertchen/xattr" +) + +func Readlink(path string) (isRegular bool, s string, err error) { + s, err = os.Readlink(path) + return false, s, err +} + +func GetOwner(entry *Entry, fileInfo *os.FileInfo) { + stat, ok := (*fileInfo).Sys().(*syscall.Stat_t) + if ok && stat != nil { + entry.UID = int(stat.Uid) + entry.GID = int(stat.Gid) + } else { + entry.UID = -1 + entry.GID = -1 + } +} + +func SetOwner(fullPath string, entry *Entry, fileInfo *os.FileInfo) (bool) { + stat, ok := (*fileInfo).Sys().(*syscall.Stat_t) + if ok && stat != nil && (int(stat.Uid) != entry.UID || int(stat.Gid) != entry.GID) { + if entry.UID != -1 && entry.GID != -1 { + err := os.Chown(fullPath, entry.UID, entry.GID) + if err != nil { + LOG_ERROR("RESTORE_CHOWN", "Failed to change uid or gid: %v", err) + return false + } + } + } + + return true +} + +func (entry *Entry) ReadAttributes(top string) { + + fullPath := filepath.Join(top, entry.Path) + attributes, _ := xattr.Listxattr(fullPath) + if len(attributes) > 0 { + entry.Attributes = make(map[string][]byte) + for _, name := range attributes { + attribute, err := xattr.Getxattr(fullPath, name) + if err == nil { + entry.Attributes[name] = attribute + } + } + } +} + +func (entry *Entry) SetAttributesToFile(fullPath string) { + names, _ := xattr.Listxattr(fullPath) + + for _, name := range names { + + newAttribute, found := entry.Attributes[name] + if found { + oldAttribute, _ := xattr.Getxattr(fullPath, name) + if bytes.Equal(oldAttribute, newAttribute) { + xattr.Setxattr(fullPath, name, newAttribute) + } + delete(entry.Attributes, name) + } else { + xattr.Removexattr(fullPath, name) + } + } + + for name, attribute := range entry.Attributes { + xattr.Setxattr(fullPath, name, attribute) + } + +} diff --git a/duplicacy_utils_test.go b/duplicacy_utils_test.go new file mode 100644 index 0000000..83883c5 --- /dev/null +++ b/duplicacy_utils_test.go @@ -0,0 +1,139 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "io" + "io/ioutil" + "time" + "bytes" + + crypto_rand "crypto/rand" + + "testing" + +) + +func TestMatchPattern(t *testing.T) { + + // Test cases were copied from Matching Wildcards: An Empirical Way to Tame an Algorithm + // By Kirk J. Krauss, October 07, 2014 + + DATA := [] struct { + text string + pattern string + matched bool + } { + // Cases with repeating character sequences. + { "abcccd", "*ccd", true }, + { "mississipissippi", "*issip*ss*", true }, + { "xxxx*zzzzzzzzy*f", "xxxx*zzy*fffff", false }, + { "xxxx*zzzzzzzzy*f", "xxx*zzy*f", true }, + { "xxxxzzzzzzzzyf", "xxxx*zzy*fffff", false }, + { "xxxxzzzzzzzzyf", "xxxx*zzy*f", true }, + { "xyxyxyzyxyz", "xy*z*xyz", true }, + { "mississippi", "*sip*", true }, + { "xyxyxyxyz", "xy*xyz", true }, + { "mississippi", "mi*sip*", true }, + { "ababac", "*abac*", true }, + { "ababac", "*abac*", true }, + { "aaazz", "a*zz*", true }, + { "a12b12", "*12*23", false }, + { "a12b12", "a12b", false }, + { "a12b12", "*12*12*", true }, + + // More double wildcard scenarios. + { "XYXYXYZYXYz", "XY*Z*XYz", true }, + { "missisSIPpi", "*SIP*", true }, + { "mississipPI", "*issip*PI", true }, + { "xyxyxyxyz", "xy*xyz", true }, + { "miSsissippi", "mi*sip*", true }, + { "miSsissippi", "mi*Sip*", false }, + { "abAbac", "*Abac*", true }, + { "abAbac", "*Abac*", true }, + { "aAazz", "a*zz*", true }, + { "A12b12", "*12*23", false }, + { "a12B12", "*12*12*", true }, + { "oWn", "*oWn*", true }, + + // Completely tame (no wildcards) cases. + { "bLah", "bLah", true }, + { "bLah", "bLaH", false }, + + // Simple mixed wildcard tests suggested by IBMer Marlin Deckert. + { "a", "*?", true }, + { "ab", "*?", true }, + { "abc", "*?", true }, + + // More mixed wildcard tests including coverage for false positives. + { "a", "??", false }, + { "ab", "?*?", true }, + { "ab", "*?*?*", true }, + { "abc", "?*?*?", true }, + { "abc", "?*?*&?", false }, + { "abcd", "?b*??", true }, + { "abcd", "?a*??", false }, + { "abcd", "?*?c?", true }, + { "abcd", "?*?d?", false }, + { "abcde", "?*b*?*d*?", true }, + + // Single-character-match cases. + { "bLah", "bL?h", true }, + { "bLaaa", "bLa?", false }, + { "bLah", "bLa?", true }, + { "bLaH", "?Lah", false }, + { "bLaH", "?LaH", true }, + } + + for _, data := range DATA { + if matchPattern(data.text, data.pattern) != data.matched { + t.Errorf("text: %s, pattern %s, expected: %t", data.text, data.pattern, data.matched) + } + } + +} + +func TestRateLimit(t *testing.T) { + content := make([]byte, 100 * 1024) + _, err := crypto_rand.Read(content) + if err != nil { + t.Errorf("Error generating random content: %v", err) + return + } + + expectedRate := 10 + rateLimiter := CreateRateLimitedReader(content, expectedRate) + + startTime := time.Now() + n, err := io.Copy(ioutil.Discard, rateLimiter) + if err != nil { + t.Errorf("Error reading from the rate limited reader: %v", err) + return + } + if int(n) != len(content) { + t.Errorf("Wrote %s bytes instead of %s", n, len(content)) + return + } + + elapsed := time.Since(startTime) + actualRate := float64(len(content)) / elapsed.Seconds() / 1024 + t.Logf("Elapsed time: %s, actual rate: %.3f kB/s, expected rate: %d kB/s", elapsed, actualRate, expectedRate) + + startTime = time.Now() + n, err = RateLimitedCopy(ioutil.Discard, bytes.NewBuffer(content), expectedRate) + if err != nil { + t.Errorf("Error writing with rate limit: %v", err) + return + } + if int(n) != len(content) { + t.Errorf("Copied %s bytes instead of %s", n, len(content)) + return + } + + elapsed = time.Since(startTime) + actualRate = float64(len(content)) / elapsed.Seconds() / 1024 + t.Logf("Elapsed time: %s, actual rate: %.3f kB/s, expected rate: %d kB/s", elapsed, actualRate, expectedRate) + +} diff --git a/duplicacy_utils_windows.go b/duplicacy_utils_windows.go new file mode 100644 index 0000000..1fcb88d --- /dev/null +++ b/duplicacy_utils_windows.go @@ -0,0 +1,115 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package duplicacy + +import ( + "os" + "fmt" + "syscall" + "unsafe" +) + +type symbolicLinkReparseBuffer struct { + SubstituteNameOffset uint16 + SubstituteNameLength uint16 + PrintNameOffset uint16 + PrintNameLength uint16 + Flags uint32 + PathBuffer [1]uint16 +} + +type mountPointReparseBuffer struct { + SubstituteNameOffset uint16 + SubstituteNameLength uint16 + PrintNameOffset uint16 + PrintNameLength uint16 + PathBuffer [1]uint16 +} + +type reparseDataBuffer struct { + ReparseTag uint32 + ReparseDataLength uint16 + Reserved uint16 + + // GenericReparseBuffer + reparseBuffer byte +} +const ( + FSCTL_GET_REPARSE_POINT = 0x900A8 + MAXIMUM_REPARSE_DATA_BUFFER_SIZE = 16 * 1024 + IO_REPARSE_TAG_MOUNT_POINT = 0xA0000003 + IO_REPARSE_TAG_SYMLINK = 0xA000000C + IO_REPARSE_TAG_DEDUP = 0x80000013 + SYMBOLIC_LINK_FLAG_DIRECTORY = 0x1 + + FILE_READ_ATTRIBUTES = 0x0080 +) + +// We copied golang source code for Readlink but made a simple modification here: use FILE_READ_ATTRIBUTES instead of +// GENERIC_READ to read the symlink, because the latter would cause a Access Denied error on links such as +// C:\Documents and Settings + +// Readlink returns the destination of the named symbolic link. +func Readlink(path string) (isRegular bool, s string, err error) { + fd, err := syscall.CreateFile(syscall.StringToUTF16Ptr(path), FILE_READ_ATTRIBUTES, + syscall.FILE_SHARE_READ, nil, syscall.OPEN_EXISTING, + syscall.FILE_FLAG_OPEN_REPARSE_POINT|syscall.FILE_FLAG_BACKUP_SEMANTICS, 0) + if err != nil { + return false, "", err + } + defer syscall.CloseHandle(fd) + + rdbbuf := make([]byte, syscall.MAXIMUM_REPARSE_DATA_BUFFER_SIZE) + var bytesReturned uint32 + err = syscall.DeviceIoControl(fd, syscall.FSCTL_GET_REPARSE_POINT, nil, 0, &rdbbuf[0], + uint32(len(rdbbuf)), &bytesReturned, nil) + if err != nil { + return false, "", err + } + + rdb := (*reparseDataBuffer)(unsafe.Pointer(&rdbbuf[0])) + switch rdb.ReparseTag { + case IO_REPARSE_TAG_SYMLINK: + data := (*symbolicLinkReparseBuffer)(unsafe.Pointer(&rdb.reparseBuffer)) + p := (*[0xffff]uint16)(unsafe.Pointer(&data.PathBuffer[0])) + if data.PrintNameLength > 0 { + s = syscall.UTF16ToString(p[data.PrintNameOffset/2 : (data.PrintNameLength + data.PrintNameOffset)/2]) + } else { + s = syscall.UTF16ToString(p[data.SubstituteNameOffset/2 : (data.SubstituteNameLength + data.SubstituteNameOffset)/2]) + } + case IO_REPARSE_TAG_MOUNT_POINT: + data := (*mountPointReparseBuffer)(unsafe.Pointer(&rdb.reparseBuffer)) + p := (*[0xffff]uint16)(unsafe.Pointer(&data.PathBuffer[0])) + if data.PrintNameLength > 0 { + s = syscall.UTF16ToString(p[data.PrintNameOffset/2 : (data.PrintNameLength + data.PrintNameOffset)/2]) + } else { + s = syscall.UTF16ToString(p[data.SubstituteNameOffset/2 : (data.SubstituteNameLength + data.SubstituteNameOffset)/2]) + } + case IO_REPARSE_TAG_DEDUP: + return true, "", nil + default: + // the path is not a symlink or junction but another type of reparse + // point + return false, "", fmt.Errorf("Unhandled reparse point type %x", rdb.ReparseTag) + } + + return false, s, nil +} + +func GetOwner(entry *Entry, fileInfo *os.FileInfo) { + entry.UID = -1 + entry.GID = -1 +} + +func SetOwner(fullPath string, entry *Entry, fileInfo *os.FileInfo) (bool) { + return true +} + +func (entry *Entry) ReadAttributes(top string) { +} + +func (entry *Entry) SetAttributesToFile(fullPath string) { + +} diff --git a/main/duplicacy_main.go b/main/duplicacy_main.go new file mode 100644 index 0000000..cc87143 --- /dev/null +++ b/main/duplicacy_main.go @@ -0,0 +1,1667 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Licensed under the Fair Source License 0.9 (https://fair.io/) +// User Limitation: 5 users + +package main + +import ( + "os" + "fmt" + "path" + "path/filepath" + "regexp" + "strings" + "strconv" + "os/exec" + "encoding/json" + + "github.com/gilbertchen/cli" + + "github.com/gilbertchen/duplicacy" +) + +const ( + ArgumentExitCode = 3 +) + +var ScriptEnabled bool + +func getRepositoryPreference(context *cli.Context, storageName string) (repository string, + preference *duplicacy.Preference) { + + repository, err := os.Getwd() + if err != nil { + duplicacy.LOG_ERROR("REPOSITORY_PATH", "Failed to retrieve the current working directory: %v", err) + return "", nil + } + + for { + stat, err := os.Stat(path.Join(repository, duplicacy.DUPLICACY_DIRECTORY)) + if err != nil && !os.IsNotExist(err) { + duplicacy.LOG_ERROR("REPOSITORY_PATH", "Failed to retrieve the information about the directory %s: %v", + repository, err) + return "", nil + } + + if stat != nil && stat.IsDir() { + break + } + + parent := path.Dir(repository) + if parent == repository || parent == "" { + duplicacy.LOG_ERROR("REPOSITORY_PATH", "Repository has not been initialized") + return "", nil + } + repository = parent + } + + duplicacy.LoadPreferences(repository) + + duplicacy.SetKeyringFile(path.Join(repository, duplicacy.DUPLICACY_DIRECTORY, "keyring")) + + if storageName == "" { + storageName = context.String("storage") + } + + if storageName == "" { + return repository, &duplicacy.Preferences[0] + } + + preference = duplicacy.FindPreference(storageName) + + if preference == nil { + duplicacy.LOG_ERROR("STORAGE_NONE", "No storage named '%s' is found", storageName) + return "", nil + } + return repository, preference +} + +func getRevisions(context *cli.Context) (revisions[] int) { + + flags := context.StringSlice("r") + + rangeRegex := regexp.MustCompile(`^([0-9]+)-([0-9]+)$`) + numberRegex := regexp.MustCompile(`^([0-9]+)$`) + + for _, flag := range flags { + matched := rangeRegex.FindStringSubmatch(flag) + if matched != nil { + start, _ := strconv.Atoi(matched[1]) + end, _ := strconv.Atoi(matched[2]) + if end > start { + for r := start; r <= end; r++ { + revisions = append(revisions, r) + } + continue + } + } + + matched = numberRegex.FindStringSubmatch(flag) + if matched != nil { + r, _ := strconv.Atoi(matched[1]) + revisions = append(revisions, r) + continue + } + + fmt.Fprintf(context.App.Writer, "Invalid revision: %s.\n\n", flag) + cli.ShowCommandHelp(context, context.Command.Name) + os.Exit(ArgumentExitCode) + } + + return revisions + +} + +func setGlobalOptions(context *cli.Context) { + if context.GlobalBool("log") { + duplicacy.EnableLogHeader() + } + + if context.GlobalBool("stack") { + duplicacy.EnableStackTrace() + } + + if context.GlobalBool("verbose") { + duplicacy.SetLoggingLevel(duplicacy.TRACE) + } + + if context.GlobalBool("debug") { + duplicacy.SetLoggingLevel(duplicacy.DEBUG) + } + + ScriptEnabled = true + if context.GlobalBool("no-script") { + ScriptEnabled = false + } + + duplicacy.RunInBackground = context.GlobalBool("background") +} + +func runScript(context *cli.Context, repository string, storageName string, phase string) bool { + + if !ScriptEnabled { + return false + } + + scriptDir, _ := filepath.Abs(path.Join(repository, duplicacy.DUPLICACY_DIRECTORY, "scripts")) + scriptName := phase + "-" + context.Command.Name + + script := path.Join(scriptDir, scriptName) + if _, err := os.Stat(script); err != nil { + scriptName = storageName + "-" + scriptName + script = path.Join(scriptDir, scriptName) + if _, err = os.Stat(script); err != nil { + return false + } + } + + duplicacy.LOG_INFO("SCRIPT_RUN", "Running %s script", scriptName) + + output, err := exec.Command(script, os.Args...).CombinedOutput() + for _, line := range strings.Split(string(output), "\n") { + line := strings.TrimSpace(line) + if line != "" { + duplicacy.LOG_INFO("SCRIPT_OUTPUT", line) + } + } + + if err != nil { + duplicacy.LOG_WARN("SCRIPT_ERROR", "Failed to run script: %v", err) + return false + } + + return true +} + +func initRepository(context *cli.Context) { + configRespository(context, true) +} + +func addStorage(context *cli.Context) { + configRespository(context, false) +} + +func configRespository(context *cli.Context, init bool) { + + setGlobalOptions(context) + defer duplicacy.CatchLogException() + + numberOfArgs := 3 + if init { + numberOfArgs = 2 + } + if len(context.Args()) != numberOfArgs { + fmt.Fprintf(context.App.Writer, "The %s command requires %d arguments.\n\n", + context.Command.Name, numberOfArgs) + cli.ShowCommandHelp(context, context.Command.Name) + os.Exit(ArgumentExitCode) + } + + var storageName string + var snapshotID string + var storageURL string + + if init { + storageName = "default" + snapshotID = context.Args()[0] + storageURL = context.Args()[1] + } else { + storageName = context.Args()[0] + snapshotID = context.Args()[1] + storageURL = context.Args()[2] + } + + var repository string + var err error + + if init { + repository, err = os.Getwd() + if err != nil { + duplicacy.LOG_ERROR("REPOSITORY_PATH", "Failed to retrieve the current working directory: %v", err) + return + } + + duplicacyDirectory := path.Join(repository, duplicacy.DUPLICACY_DIRECTORY) + if stat, _ := os.Stat(path.Join(duplicacyDirectory, "preferences")); stat != nil { + duplicacy.LOG_ERROR("REPOSITORY_INIT", "The repository %s has already been initialized", repository) + return + } + + err = os.Mkdir(duplicacyDirectory, 0744) + if err != nil && !os.IsExist(err) { + duplicacy.LOG_ERROR("REPOSITORY_INIT", "Failed to create the directory %s: %v", + duplicacy.DUPLICACY_DIRECTORY, err) + return + } + + duplicacy.SetKeyringFile(path.Join(duplicacyDirectory, "keyring")) + + } else { + repository, _ = getRepositoryPreference(context, "") + if duplicacy.FindPreference(storageName) != nil { + duplicacy.LOG_ERROR("STORAGE_DUPLICATE", "There is already a storage named '%s'", storageName) + return + } + } + + preference := duplicacy.Preference { + Name: storageName, + SnapshotID : snapshotID, + StorageURL : storageURL, + Encrypted: context.Bool("encrypt"), + } + + storage := duplicacy.CreateStorage(repository, preference, true, 1) + storagePassword := "" + if preference.Encrypted { + prompt := fmt.Sprintf("Enter storage password for %s:", preference.StorageURL) + storagePassword = duplicacy.GetPassword(preference, "password", prompt, false, true) + } + + existingConfig, _, err := duplicacy.DownloadConfig(storage, storagePassword) + if err != nil { + duplicacy.LOG_ERROR("STORAGE_CONFIG", "Failed to download the configuration file from the storage: %v", err) + return + } + + if existingConfig != nil { + duplicacy.LOG_INFO("STORAGE_CONFIGURED", + "The storage '%s' has already been initialized", preference.StorageURL) + if existingConfig.CompressionLevel >= -1 && existingConfig.CompressionLevel <= 9 { + duplicacy.LOG_INFO("STORAGE_FORMAT", "This storage is configured to use the pre-1.2.0 format") + } else if existingConfig.CompressionLevel != 100 { + duplicacy.LOG_ERROR("STORAGE_COMPRESSION", "This storage is configured with an invalid compression level %d", existingConfig.CompressionLevel) + return + } + + // Don't print config in the background mode + if !duplicacy.RunInBackground { + existingConfig.Print() + } + } else { + compressionLevel := 100 + + averageChunkSize := duplicacy.AtoSize(context.String("chunk-size")) + if averageChunkSize == 0 { + fmt.Fprintf(context.App.Writer, "Invalid average chunk size: %s.\n\n", context.String("chunk-size")) + cli.ShowCommandHelp(context, context.Command.Name) + os.Exit(ArgumentExitCode) + } + + size := 1 + for size * 2 <= averageChunkSize { + size *= 2 + } + + if size != averageChunkSize { + fmt.Fprintf(context.App.Writer, "Invalid average chunk size: %d is not a power of 2.\n\n", + averageChunkSize) + cli.ShowCommandHelp(context, context.Command.Name) + os.Exit(ArgumentExitCode) + } + + maximumChunkSize := 4 * averageChunkSize + minimumChunkSize := averageChunkSize / 4 + + if context.String("max-chunk-size") != "" { + maximumChunkSize = duplicacy.AtoSize(context.String("max-chunk-size")) + if maximumChunkSize < averageChunkSize { + fmt.Fprintf(context.App.Writer, "Invalid maximum chunk size: %s.\n\n", + context.String("max-chunk-size")) + cli.ShowCommandHelp(context, context.Command.Name) + os.Exit(ArgumentExitCode) + } + } + + if context.String("min-chunk-size") != "" { + minimumChunkSize = duplicacy.AtoSize(context.String("min-chunk-size")) + if minimumChunkSize > averageChunkSize || minimumChunkSize == 0 { + fmt.Fprintf(context.App.Writer, "Invalid minimum chunk size: %s.\n\n", + context.String("min-chunk-size")) + cli.ShowCommandHelp(context, context.Command.Name) + os.Exit(ArgumentExitCode) + } + } + + if preference.Encrypted { + repeatedPassword := duplicacy.GetPassword(preference, "password", "Re-enter storage password:", + false, true) + if repeatedPassword != storagePassword { + duplicacy.LOG_ERROR("STORAGE_PASSWORD", "Storage passwords do not match") + return + } + } + + var otherConfig *duplicacy.Config + if context.String("copy") != "" { + + otherPreference := duplicacy.FindPreference(context.String("copy")) + + if otherPreference == nil { + + } + + otherStorage := duplicacy.CreateStorage(repository, *otherPreference, false, 1) + + otherPassword := "" + if otherPreference.Encrypted { + prompt := fmt.Sprintf("Enter storage password for %s:", otherPreference.StorageURL) + otherPassword = duplicacy.GetPassword(*otherPreference, "password", prompt, false, false) + } + + otherConfig, _, err = duplicacy.DownloadConfig(otherStorage, otherPassword) + if err != nil { + duplicacy.LOG_ERROR("STORAGE_COPY", "Failed to download the configuration file from the storage: %v", + err) + return + } + + if otherConfig == nil { + duplicacy.LOG_ERROR("STORAGE_NOT_CONFIGURED", + "The storage to copy the configuration from has not been initialized") + } + } + + duplicacy.ConfigStorage(storage, compressionLevel, averageChunkSize, maximumChunkSize, + minimumChunkSize, storagePassword, otherConfig) + } + + duplicacy.Preferences = append(duplicacy.Preferences, preference) + + duplicacy.SavePreferences(repository) + + duplicacy.LOG_INFO("REPOSITORY_INIT", "%s will be backed up to %s with id %s", + repository, preference.StorageURL, preference.SnapshotID) +} + +type TriBool struct { + Value int +} + +func (triBool *TriBool) Set(value string) error { + value = strings.ToLower(value) + if value == "yes" || value == "true" || value == "1" { + triBool.Value = 2 + return nil + } else if value == "no" || value == "false" || value == "0" { + triBool.Value = 1 + return nil + } else if value == "" { + // Only set to true if it hasn't been set before. This is necessary because for 'encrypt, e' this may + // be called twice, the second time with a value of "" + if triBool.Value == 0 { + triBool.Value = 2 + } + return nil + } else { + return fmt.Errorf("Invalid boolean value '%s'", value) + } +} + +// IsBoolFlag implements the private interface flag.boolFlag to indicate that this is a bool flag so the argument +// is optional +func (triBool *TriBool) IsBoolFlag() bool { return true } + +func (triBool *TriBool) String() string { + return "" +} + +func (triBool *TriBool) IsSet() bool { + return triBool.Value != 0 +} + +func (triBool *TriBool) IsTrue() bool { + return triBool.Value == 2 +} + +func setPreference(context *cli.Context) { + + setGlobalOptions(context) + defer duplicacy.CatchLogException() + + if len(context.Args()) > 0 { + fmt.Fprintf(context.App.Writer, "The %s command takes no arguments.\n\n", context.Command.Name) + cli.ShowCommandHelp(context, context.Command.Name) + os.Exit(ArgumentExitCode) + } + + storageName := context.String("storage") + + repository, oldPreference := getRepositoryPreference(context, storageName) + + if oldPreference == nil { + duplicacy.LOG_ERROR("STORAGE_SET", "The storage '%s' has not been added to the repository %s", + storageName, repository) + return + } + + newPreference := *oldPreference + + triBool := context.Generic("e").(*TriBool) + if triBool.IsSet() { + newPreference.Encrypted = triBool.IsTrue() + } + + triBool = context.Generic("no-backup").(*TriBool) + if triBool.IsSet() { + newPreference.BackupProhibited = triBool.IsTrue() + } + + triBool = context.Generic("no-restore").(*TriBool) + if triBool.IsSet() { + newPreference.RestoreProhibited = triBool.IsTrue() + } + + triBool = context.Generic("no-save-password").(*TriBool) + if triBool.IsSet() { + newPreference.DoNotSavePassword = triBool.IsTrue() + } + + key := context.String("key") + value := context.String("value") + + if len(key) > 0 { + + // Make a deep copy of the keys otherwise we would be change both preferences at once. + newKeys := make(map[string]string) + for k, v := range newPreference.Keys { + newKeys[k] = v + } + newPreference.Keys = newKeys + + if len(value) == 0 { + delete(newPreference.Keys, key) + } else { + if len(newPreference.Keys) == 0 { + newPreference.Keys = make(map[string]string) + } + newPreference.Keys[key] = value + } + } + + if duplicacy.IsTracing() { + description, _ := json.MarshalIndent(newPreference, "", " ") + fmt.Printf("%s\n", description) + } + + if newPreference.Equal(oldPreference) { + duplicacy.LOG_INFO("STORAGE_SET", "The options for storage %s have not been modified", + oldPreference.StorageURL) + } else { + *oldPreference = newPreference + duplicacy.SavePreferences(repository) + duplicacy.LOG_INFO("STORAGE_SET", "New options for storage %s have been saved", oldPreference.StorageURL) + } +} + +func changePassword(context *cli.Context) { + + setGlobalOptions(context) + defer duplicacy.CatchLogException() + + if len(context.Args()) != 0 { + fmt.Fprintf(context.App.Writer, "The %s command requires no arguments.\n\n", + context.Command.Name) + cli.ShowCommandHelp(context, context.Command.Name) + os.Exit(ArgumentExitCode) + } + + repository, preference := getRepositoryPreference(context, "") + + storage := duplicacy.CreateStorage(repository, *preference, false, 1) + if storage == nil { + return + } + + password := "" + if preference.Encrypted { + password = duplicacy.GetPassword(*preference, "password", "Enter old password for storage %s:", false, true) + } + + config, _, err := duplicacy.DownloadConfig(storage, password) + if err != nil { + duplicacy.LOG_ERROR("STORAGE_CONFIG", "Failed to download the configuration file from the storage: %v", err) + return + } + + if config == nil { + duplicacy.LOG_ERROR("STORAGE_NOT_CONFIGURED", "The storage has not been initialized") + return + } + + newPassword := duplicacy.GetPassword(*preference, "password", "Enter new storage password:", false, true) + repeatedPassword := duplicacy.GetPassword(*preference, "password", "Re-enter new storage password:", false, true) + if repeatedPassword != newPassword { + duplicacy.LOG_ERROR("PASSWORD_CHANGE", "The new passwords do not match") + return + } + if newPassword == password { + duplicacy.LOG_ERROR("PASSWORD_CHANGE", "The new password is the same as the old one") + return + } + + duplicacy.UploadConfig(storage, config, newPassword) + + duplicacy.SavePassword(*preference, "password", newPassword) + + duplicacy.LOG_INFO("STORAGE_SET", "The password for storage %s has been changed", preference.StorageURL) +} + + +func backupRepository(context *cli.Context) { + setGlobalOptions(context) + defer duplicacy.CatchLogException() + + if len(context.Args()) != 0 { + fmt.Fprintf(context.App.Writer, "The %s command requires no arguments.\n\n", context.Command.Name) + cli.ShowCommandHelp(context, context.Command.Name) + os.Exit(ArgumentExitCode) + } + + repository, preference := getRepositoryPreference(context, "") + + if preference.BackupProhibited { + duplicacy.LOG_ERROR("BACKUP_DISABLED", "Backup from this repository to %s was disabled by the preference", + preference.StorageURL) + return + } + + runScript(context, repository, preference.Name, "pre") + + threads := context.Int("threads") + if threads < 1 { + threads = 1 + } + + duplicacy.LOG_INFO("STORAGE_SET", "Storage set to %s", preference.StorageURL) + storage := duplicacy.CreateStorage(repository, *preference, false, threads) + if storage == nil { + return + } + + password := "" + if preference.Encrypted { + password = duplicacy.GetPassword(*preference, "password", "Enter storage password:", false, false) + } + + quickMode := true + if context.Bool("hash") { + quickMode = false + } + + showStatistics := context.Bool("stats") + + enableVSS := context.Bool("vss") + + uploadRateLimit := context.Int("limit-rate") + storage.SetRateLimits(0, uploadRateLimit) + backupManager := duplicacy.CreateBackupManager(preference.SnapshotID, storage, repository, password) + duplicacy.SavePassword(*preference, "password", password) + + backupManager.SetupSnapshotCache(repository, preference.Name) + backupManager.Backup(repository, quickMode, threads, context.String("t"), showStatistics, enableVSS) + + runScript(context, repository, preference.Name, "post") +} + +func restoreRepository(context *cli.Context) { + setGlobalOptions(context) + defer duplicacy.CatchLogException() + + revision := context.Int("r") + if revision <= 0 { + fmt.Fprintf(context.App.Writer, "The revision flag is not specified or invalid\n\n") + cli.ShowCommandHelp(context, context.Command.Name) + os.Exit(ArgumentExitCode) + } + + repository, preference := getRepositoryPreference(context, "") + + if preference.RestoreProhibited { + duplicacy.LOG_ERROR("RESTORE_DISABLED", "Restore from %s to this repository was disabled by the preference", + preference.StorageURL) + return + } + + runScript(context, repository, preference.Name, "pre") + + threads := context.Int("threads") + if threads < 1 { + threads = 1 + } + + duplicacy.LOG_INFO("STORAGE_SET", "Storage set to %s", preference.StorageURL) + storage := duplicacy.CreateStorage(repository, *preference, false, threads) + if storage == nil { + return + } + + password := "" + if preference.Encrypted { + password = duplicacy.GetPassword(*preference, "password", "Enter storage password:", false, false) + } + + quickMode := !context.Bool("hash") + overwrite := context.Bool("overwrite") + deleteMode := context.Bool("delete") + showStatistics := context.Bool("stats") + + var patterns [] string + for _, pattern := range context.Args() { + + for strings.HasPrefix(pattern, "--") { + pattern = pattern[1:] + } + + for strings.HasPrefix(pattern, "++") { + pattern = pattern[1:] + } + + if pattern[0] != '+' && pattern[0] != '-' { + pattern = "+" + pattern + } + + if pattern == "+" || pattern == "-" { + continue + } + + patterns = append(patterns, pattern) + + } + + storage.SetRateLimits(context.Int("limit-rate"), 0) + backupManager := duplicacy.CreateBackupManager(preference.SnapshotID, storage, repository, password) + duplicacy.SavePassword(*preference, "password", password) + + backupManager.SetupSnapshotCache(repository, preference.Name) + backupManager.Restore(repository, revision, true, quickMode, threads, overwrite, deleteMode, showStatistics, patterns) + + runScript(context, repository, preference.Name, "post") +} + +func listSnapshots(context *cli.Context) { + setGlobalOptions(context) + defer duplicacy.CatchLogException() + + if len(context.Args()) != 0 { + fmt.Fprintf(context.App.Writer, "The %s command requires no arguments.\n\n", context.Command.Name) + cli.ShowCommandHelp(context, context.Command.Name) + os.Exit(ArgumentExitCode) + } + + repository, preference := getRepositoryPreference(context, "") + + duplicacy.LOG_INFO("STORAGE_SET", "Storage set to %s", preference.StorageURL) + + runScript(context, repository, preference.Name, "pre") + + resetPassword := context.Bool("reset-passwords") + storage := duplicacy.CreateStorage(repository, *preference, resetPassword, 1) + if storage == nil { + return + } + + password := "" + if preference.Encrypted { + password = duplicacy.GetPassword(*preference, "password", "Enter storage password:", + false, resetPassword) + } + + tag := context.String("t") + revisions := getRevisions(context) + + backupManager := duplicacy.CreateBackupManager(preference.SnapshotID, storage, repository, password) + duplicacy.SavePassword(*preference, "password", password) + + id := preference.SnapshotID + if context.Bool("all") { + id = "" + } else if context.String("id") != "" { + id = context.String("id") + } + + showFiles := context.Bool("files") + showChunks := context.Bool("chunks") + + backupManager.SetupSnapshotCache(repository, preference.Name) + backupManager.SnapshotManager.ListSnapshots(id, revisions, tag, showFiles, showChunks) + + runScript(context, repository, preference.Name, "post") +} + +func checkSnapshots(context *cli.Context) { + setGlobalOptions(context) + defer duplicacy.CatchLogException() + + if len(context.Args()) != 0 { + fmt.Fprintf(context.App.Writer, "The %s command requires no arguments.\n\n", context.Command.Name) + cli.ShowCommandHelp(context, context.Command.Name) + os.Exit(ArgumentExitCode) + } + + repository, preference := getRepositoryPreference(context, "") + + duplicacy.LOG_INFO("STORAGE_SET", "Storage set to %s", preference.StorageURL) + + runScript(context, repository, preference.Name, "pre") + + storage := duplicacy.CreateStorage(repository, *preference, false, 1) + if storage == nil { + return + } + + password := "" + if preference.Encrypted { + password = duplicacy.GetPassword(*preference, "password", "Enter storage password:", false, false) + } + + tag := context.String("t") + revisions := getRevisions(context) + + backupManager := duplicacy.CreateBackupManager(preference.SnapshotID, storage, repository, password) + duplicacy.SavePassword(*preference, "password", password) + + id := preference.SnapshotID + if context.Bool("all") { + id = "" + } else if context.String("id") != "" { + id = context.String("id") + } + + showStatistics := context.Bool("stats") + checkFiles := context.Bool("files") + searchFossils := context.Bool("fossils") + resurrect := context.Bool("resurrect") + + backupManager.SetupSnapshotCache(repository, preference.Name) + backupManager.SnapshotManager.CheckSnapshots(id, revisions, tag, showStatistics, checkFiles, searchFossils, resurrect) + + runScript(context, repository, preference.Name, "post") +} + +func printFile(context *cli.Context) { + setGlobalOptions(context) + defer duplicacy.CatchLogException() + + if len(context.Args()) > 1 { + fmt.Fprintf(context.App.Writer, "The %s command requires at most 1 argument.\n\n", context.Command.Name) + cli.ShowCommandHelp(context, context.Command.Name) + os.Exit(ArgumentExitCode) + } + + repository, preference := getRepositoryPreference(context, "") + + runScript(context, repository, preference.Name, "pre") + + // Do not print out storage for this command + //duplicacy.LOG_INFO("STORAGE_SET", "Storage set to %s", preference.StorageURL) + storage := duplicacy.CreateStorage(repository, *preference, false, 1) + if storage == nil { + return + } + + password := "" + if preference.Encrypted { + password = duplicacy.GetPassword(*preference, "password", "Enter storage password:", false, false) + } + + revision := context.Int("r") + + snapshotID := preference.SnapshotID + if context.String("id") != "" { + snapshotID = context.String("id") + } + + backupManager := duplicacy.CreateBackupManager(preference.SnapshotID, storage, repository, password) + duplicacy.SavePassword(*preference, "password", password) + + backupManager.SetupSnapshotCache(repository, preference.Name) + + file := "" + if len(context.Args()) > 0 { + file = context.Args()[0] + } + backupManager.SnapshotManager.PrintFile(snapshotID, revision, file) + + runScript(context, repository, preference.Name, "post") +} + +func diff(context *cli.Context) { + setGlobalOptions(context) + defer duplicacy.CatchLogException() + + if len(context.Args()) > 1 { + fmt.Fprintf(context.App.Writer, "The %s command requires 0 or 1 argument.\n\n", context.Command.Name) + cli.ShowCommandHelp(context, context.Command.Name) + os.Exit(ArgumentExitCode) + } + + repository, preference := getRepositoryPreference(context, "") + + runScript(context, repository, preference.Name, "pre") + + duplicacy.LOG_INFO("STORAGE_SET", "Storage set to %s", preference.StorageURL) + storage := duplicacy.CreateStorage(repository, *preference, false, 1) + if storage == nil { + return + } + + + password := "" + if preference.Encrypted { + password = duplicacy.GetPassword(*preference, "password", "Enter storage password:", false, false) + } + + revisions := context.IntSlice("r") + if len(revisions) > 2 { + fmt.Fprintf(context.App.Writer, "The %s command requires at most 2 revisions.\n", context.Command.Name) + os.Exit(ArgumentExitCode) + } + + snapshotID := preference.SnapshotID + if context.String("id") != "" { + snapshotID = context.String("id") + } + + path := "" + if len(context.Args()) > 0 { + path = context.Args()[0] + } + + compareByHash := context.Bool("hash") + backupManager := duplicacy.CreateBackupManager(preference.SnapshotID, storage, repository, password) + duplicacy.SavePassword(*preference, "password", password) + + backupManager.SetupSnapshotCache(repository, preference.Name) + backupManager.SnapshotManager.Diff(repository, snapshotID, revisions, path, compareByHash) + + runScript(context, repository, preference.Name, "post") +} + +func showHistory(context *cli.Context) { + setGlobalOptions(context) + defer duplicacy.CatchLogException() + + if len(context.Args()) != 1 { + fmt.Fprintf(context.App.Writer, "The %s command requires 1 argument.\n\n", context.Command.Name) + cli.ShowCommandHelp(context, context.Command.Name) + os.Exit(ArgumentExitCode) + } + + repository, preference := getRepositoryPreference(context, "") + + runScript(context, repository, preference.Name, "pre") + + duplicacy.LOG_INFO("STORAGE_SET", "Storage set to %s", preference.StorageURL) + storage := duplicacy.CreateStorage(repository, *preference, false, 1) + if storage == nil { + return + } + + password := "" + if preference.Encrypted { + password = duplicacy.GetPassword(*preference, "password", "Enter storage password:", false, false) + } + + snapshotID := preference.SnapshotID + if context.String("id") != "" { + snapshotID = context.String("id") + } + + path := context.Args()[0] + + revisions := getRevisions(context) + showLocalHash := context.Bool("hash") + backupManager := duplicacy.CreateBackupManager(preference.SnapshotID, storage, repository, password) + duplicacy.SavePassword(*preference, "password", password) + + backupManager.SetupSnapshotCache(repository, preference.Name) + backupManager.SnapshotManager.ShowHistory(repository, snapshotID, revisions, path, showLocalHash) + + runScript(context, repository, preference.Name, "post") +} + +func pruneSnapshots(context *cli.Context) { + setGlobalOptions(context) + defer duplicacy.CatchLogException() + + if len(context.Args()) != 0 { + fmt.Fprintf(context.App.Writer, "The %s command requires no arguments.\n\n", context.Command.Name) + cli.ShowCommandHelp(context, context.Command.Name) + os.Exit(ArgumentExitCode) + } + + repository, preference := getRepositoryPreference(context, "") + + runScript(context, repository, preference.Name, "pre") + + duplicacy.LOG_INFO("STORAGE_SET", "Storage set to %s", preference.StorageURL) + storage := duplicacy.CreateStorage(repository, *preference, false, 1) + if storage == nil { + return + } + + password := "" + if preference.Encrypted { + password = duplicacy.GetPassword(*preference, "password", "Enter storage password:", false, false) + } + + revisions := getRevisions(context) + tags := context.StringSlice("t") + retentions := context.StringSlice("keep") + selfID := preference.SnapshotID + snapshotID := preference.SnapshotID + if context.Bool("all") { + snapshotID = "" + } else if context.String("id") != "" { + snapshotID = context.String("id") + } + + ignoredIDs := context.StringSlice("ignore") + exhaustive := context.Bool("exhaustive") + exclusive := context.Bool("exclusive") + dryRun := context.Bool("dry-run") + deleteOnly := context.Bool("delete-only") + collectOnly := context.Bool("collect-only") + + if !storage.IsMoveFileImplemented() && !exclusive { + fmt.Fprintf(context.App.Writer, "The --exclusive option must be enabled for storage %s\n", + preference.StorageURL) + os.Exit(ArgumentExitCode) + } + + backupManager := duplicacy.CreateBackupManager(preference.SnapshotID, storage, repository, password) + duplicacy.SavePassword(*preference, "password", password) + + backupManager.SetupSnapshotCache(repository, preference.Name) + backupManager.SnapshotManager.PruneSnapshots(repository, selfID, snapshotID, revisions, tags, retentions, + exhaustive, exclusive, ignoredIDs, dryRun, deleteOnly, collectOnly) + + runScript(context, repository, preference.Name, "post") +} + +func copySnapshots(context *cli.Context) { + setGlobalOptions(context) + defer duplicacy.CatchLogException() + + if len(context.Args()) != 0 { + fmt.Fprintf(context.App.Writer, "The %s command requires no arguments.\n\n", context.Command.Name) + cli.ShowCommandHelp(context, context.Command.Name) + os.Exit(ArgumentExitCode) + } + + repository, source := getRepositoryPreference(context, context.String("from")) + + runScript(context, repository, source.Name, "pre") + + duplicacy.LOG_INFO("STORAGE_SET", "Source storage set to %s", source.StorageURL) + sourceStorage := duplicacy.CreateStorage(repository, *source, false, 1) + if sourceStorage == nil { + return + } + + sourcePassword := "" + if source.Encrypted { + sourcePassword = duplicacy.GetPassword(*source, "password", "Enter source storage password:", false, false) + } + + sourceManager := duplicacy.CreateBackupManager(source.SnapshotID, sourceStorage, repository, sourcePassword) + sourceManager.SetupSnapshotCache(repository, source.Name) + duplicacy.SavePassword(*source, "password", sourcePassword) + + + _, destination := getRepositoryPreference(context, context.String("to")) + + if destination.Name == source.Name { + duplicacy.LOG_ERROR("COPY_IDENTICAL", "The source storage and the destination storage are the same") + return + } + + if destination.BackupProhibited { + duplicacy.LOG_ERROR("COPY_DISABLED", "Copying snapshots to %s was disabled by the preference", + destination.StorageURL) + return + } + + + duplicacy.LOG_INFO("STORAGE_SET", "Destination storage set to %s", destination.StorageURL) + destinationStorage := duplicacy.CreateStorage(repository, *destination, false, 1) + if destinationStorage == nil { + return + } + + destinationPassword := "" + if destination.Encrypted { + destinationPassword = duplicacy.GetPassword(*destination, "password", + "Enter destination storage password:",false, false) + } + + sourceStorage.SetRateLimits(context.Int("download-rate-limit"), 0) + destinationStorage.SetRateLimits(0, context.Int("upload-rate-limit")) + + destinationManager := duplicacy.CreateBackupManager(destination.SnapshotID, destinationStorage, repository, + destinationPassword) + duplicacy.SavePassword(*destination, "password", destinationPassword) + destinationManager.SetupSnapshotCache(repository, destination.Name) + + revisions := getRevisions(context) + snapshotID := "" + if context.String("id") != "" { + snapshotID = context.String("id") + } + + threads := context.Int("threads") + if threads < 1 { + threads = 1 + } + + sourceManager.CopySnapshots(destinationManager, snapshotID, revisions, threads) + runScript(context, repository, source.Name, "post") +} + +func infoStorage(context *cli.Context) { + setGlobalOptions(context) + defer duplicacy.CatchLogException() + + if len(context.Args()) != 1 { + fmt.Fprintf(context.App.Writer, "The %s command requires a storage URL argument.\n\n", context.Command.Name) + cli.ShowCommandHelp(context, context.Command.Name) + os.Exit(ArgumentExitCode) + } + + repository := context.String("repository") + if repository != "" { + duplicacy.SetKeyringFile(path.Join(repository, duplicacy.DUPLICACY_DIRECTORY, "keyring")) + } + + isEncrypted := context.Bool("e") + preference := duplicacy.Preference { + Name: "default", + SnapshotID: "default", + StorageURL: context.Args()[0], + Encrypted: isEncrypted, + DoNotSavePassword: true, + } + + password := "" + if isEncrypted { + password = duplicacy.GetPassword(preference, "password", "Enter the storage password:", false, false) + } + + storage := duplicacy.CreateStorage("", preference, context.Bool("reset-passwords"), 1) + config, isStorageEncrypted, err := duplicacy.DownloadConfig(storage, password) + + if isStorageEncrypted { + duplicacy.LOG_INFO("STORAGE_ENCRYPTED", "The storage is encrypted with a password") + } else if err != nil { + duplicacy.LOG_ERROR("STORAGE_ERROR", "%v", err) + } else if config == nil { + duplicacy.LOG_INFO("STORAGE_NOT_INITIALIZED", "The storage has not been initialized") + } else { + config.Print() + } +} + +func main() { + + duplicacy.SetLoggingLevel(duplicacy.INFO) + + app := cli.NewApp() + + app.Commands = []cli.Command { + { + Name: "init", + Flags: []cli.Flag { + cli.BoolFlag { + Name: "encrypt, e", + Usage: "encrypt the storage with a password", + }, + cli.StringFlag { + Name: "chunk-size, c", + Value: "4M", + Usage: "the average size of chunks", + Argument: "4M", + }, + cli.StringFlag { + Name: "max-chunk-size, max", + Usage: "the maximum size of chunks (defaults to chunk-size * 4)", + Argument: "16M", + }, + cli.StringFlag { + Name: "min-chunk-size, min", + Usage: "the minimum size of chunks (defaults to chunk-size / 4)", + Argument: "1M", + }, + }, + Usage: "Initialize the storage if necessary and the current directory as the repository", + ArgsUsage: " ", + Action: initRepository, + }, + { + Name: "backup", + Flags: []cli.Flag { + cli.BoolFlag { + Name: "hash", + Usage: "detect file differences by hash (rather than size and timestamp)", + }, + cli.StringFlag { + Name: "t", + Usage: "assign a tag to the backup", + Argument: "", + }, + cli.BoolFlag { + Name: "stats", + Usage: "show statistics during and after backup", + }, + cli.IntFlag { + Name: "threads", + Value: 1, + Usage: "number of uploading threads", + Argument: "", + }, + cli.IntFlag { + Name: "limit-rate", + Value: 0, + Usage: "the maximum upload rate (in kilobytes/sec)", + Argument: "", + }, + cli.BoolFlag { + Name: "vss", + Usage: "enable the Volume Shadow Copy service (Windows only)", + }, + cli.StringFlag { + Name: "storage", + Usage: "backup to the specified storage instead of the default one", + Argument: "", + }, + }, + Usage: "Save a snapshot of the repository to the storage", + ArgsUsage: " ", + Action: backupRepository, + }, + + { + Name: "restore", + Flags: []cli.Flag { + cli.IntFlag { + Name: "r", + Usage: "the revision number of the snapshot (required)", + Argument: "", + }, + cli.BoolFlag { + Name: "hash", + Usage: "detect file differences by hash (rather than size and timestamp)", + }, + cli.BoolFlag { + Name: "overwrite", + Usage: "overwrite existing files in the repository", + }, + cli.BoolFlag { + Name: "delete", + Usage: "delete files not in the snapshot", + }, + cli.BoolFlag { + Name: "stats", + Usage: "show statistics during and after restore", + }, + cli.IntFlag { + Name: "threads", + Value: 1, + Usage: "number of downloading threads", + Argument: "", + }, + cli.IntFlag { + Name: "limit-rate", + Value: 0, + Usage: "the maximum download rate (in kilobytes/sec)", + Argument: "", + }, + cli.StringFlag { + Name: "storage", + Usage: "restore from the specified storage instead of the default one", + Argument: "", + }, + }, + Usage: "Restore the repository to a previously saved snapshot", + ArgsUsage: "[--] [pattern] ...", + Action: restoreRepository, + }, + + { + Name: "list", + Flags: []cli.Flag { + cli.BoolFlag { + Name: "all, a", + Usage: "list snapshots with any id", + }, + cli.StringFlag { + Name: "id", + Usage: "list snapshots with the specified id rather than the default one", + Argument: "", + }, + cli.StringSliceFlag { + Name: "r", + Usage: "the revision number of the snapshot", + Argument: "", + }, + cli.StringFlag { + Name: "t", + Usage: "list snapshots with the specified tag", + Argument: "", + }, + cli.BoolFlag { + Name: "files", + Usage: "print the file list in each snapshot", + }, + cli.BoolFlag { + Name: "chunks", + Usage: "print chunks in each snapshot or all chunks if no snapshot specified", + }, + cli.BoolFlag { + Name: "reset-passwords", + Usage: "take passwords from input rather than keychain/keyring", + }, + cli.StringFlag { + Name: "storage", + Usage: "retrieve snapshots from the specified storage", + Argument: "", + }, + }, + Usage: "List snapshots", + ArgsUsage: " ", + Action: listSnapshots, + }, + { + Name: "check", + Flags: []cli.Flag { + cli.BoolFlag { + Name: "all, a", + Usage: "check snapshots with any id", + }, + cli.StringFlag { + Name: "id", + Usage: "check snapshots with the specified id rather than the default one", + Argument: "", + }, + cli.StringSliceFlag { + Name: "r", + Usage: "the revision number of the snapshot", + Argument: "", + }, + cli.StringFlag { + Name: "t", + Usage: "check snapshots with the specified tag", + Argument: "", + }, + cli.BoolFlag { + Name: "fossils", + Usage: "search fossils if a chunk can't be found", + }, + cli.BoolFlag { + Name: "resurrect", + Usage: "turn referenced fossils back into chunks", + }, + cli.BoolFlag { + Name: "files", + Usage: "verify the integrity of every file", + }, + cli.BoolFlag { + Name: "stats", + Usage: "show deduplication statistics (imply -all and all revisions)", + }, + cli.StringFlag { + Name: "storage", + Usage: "retrieve snapshots from the specified storage", + Argument: "", + }, + }, + Usage: "Check the integrity of snapshots", + ArgsUsage: " ", + Action: checkSnapshots, + }, + { + Name: "cat", + Flags: []cli.Flag { + cli.StringFlag { + Name: "id", + Usage: "retrieve from the snapshot with the specified id", + Argument: "", + }, + cli.IntFlag { + Name: "r", + Usage: "the revision number of the snapshot", + Argument: "", + }, + cli.StringFlag { + Name: "storage", + Usage: "retrieve the file from the specified storage", + Argument: "", + }, + }, + Usage: "Print to stdout the specified file, or the snapshot content if no file is specified", + ArgsUsage: "[]", + Action: printFile, + }, + + { + Name: "diff", + Flags: []cli.Flag { + cli.StringFlag { + Name: "id", + Usage: "diff snapshots with the specified id", + Argument: "", + }, + cli.IntSliceFlag { + Name: "r", + Usage: "the revision number of the snapshot", + Argument: "", + }, + cli.BoolFlag { + Name: "hash", + Usage: "compute the hashes of on-disk files", + }, + cli.StringFlag { + Name: "storage", + Usage: "retrieve files from the specified storage", + Argument: "", + }, + }, + Usage: "Compare two snapshots or two revisions of a file", + ArgsUsage: "[]", + Action: diff, + }, + + { + Name: "history", + Flags: []cli.Flag { + cli.StringFlag { + Name: "id", + Usage: "find the file in the snapshot with the specified id", + Argument: "", + }, + cli.StringSliceFlag { + Name: "r", + Usage: "show history of the specified revisions", + Argument: "", + }, + cli.BoolFlag { + Name: "hash", + Usage: "show the hash of the on-disk file", + }, + cli.StringFlag { + Name: "storage", + Usage: "retrieve files from the specified storage", + Argument: "", + }, + }, + Usage: "Show the history of a file", + ArgsUsage: "", + Action: showHistory, + }, + + { + Name: "prune", + Flags: []cli.Flag { + cli.StringFlag { + Name: "id", + Usage: "delete snapshots with the specified id instead of the default one", + Argument: "", + }, + cli.BoolFlag { + Name: "all, a", + Usage: "match against all snapshot IDs", + }, + cli.StringSliceFlag { + Name: "r", + Usage: "delete snapshots with the specified revisions", + Argument: "", + }, + cli.StringSliceFlag { + Name: "t", + Usage: "delete snapshots with the specifed tags", + Argument: "", + }, + cli.StringSliceFlag { + Name: "keep", + Usage: "keep 1 snapshot every n days for snapshots older than m days", + Argument: "", + }, + cli.BoolFlag { + Name: "exhaustive", + Usage: "remove all unreferenced chunks (not just those referenced by deleted snapshots)", + }, + cli.BoolFlag { + Name: "exclusive", + Usage: "assume exclusive acess to the storage (disable two-step fossil collection)", + }, + cli.BoolFlag { + Name: "dry-run, d", + Usage: "show what would have been deleted", + }, + cli.BoolFlag { + Name: "delete-only", + Usage: "delete fossils previsouly collected (if deletable) and don't collect fossils", + }, + cli.BoolFlag { + Name: "collect-only", + Usage: "identify and collect fossils, but don't delete fossils previously collected", + }, + cli.StringSliceFlag { + Name: "ignore", + Usage: "ignore snapshots with the specified id when deciding if fossils can be deleted", + Argument: "", + }, + cli.StringFlag { + Name: "storage", + Usage: "prune snapshots from the specified storage", + Argument: "", + }, + }, + Usage: "Prune snapshots by revision, tag, or retention policy", + ArgsUsage: " ", + Action: pruneSnapshots, + }, + + + { + Name: "password", + Flags: []cli.Flag { + cli.StringFlag { + Name: "storage", + Usage: "change the password used to access the specified storage", + Argument: "", + }, + }, + Usage: "Change the storage password", + ArgsUsage: " ", + Action: changePassword, + }, + + { + Name: "add", + Flags: []cli.Flag { + cli.BoolFlag { + Name: "encrypt, e", + Usage: "Encrypt the storage with a password", + }, + cli.StringFlag { + Name: "chunk-size, c", + Value: "4M", + Usage: "the average size of chunks", + Argument: "4M", + }, + cli.StringFlag { + Name: "max-chunk-size, max", + Usage: "the maximum size of chunks (defaults to chunk-size * 4)", + Argument: "16M", + }, + cli.StringFlag { + Name: "min-chunk-size, min", + Usage: "the minimum size of chunks (defaults to chunk-size / 4)", + Argument: "1M", + }, + cli.StringFlag { + Name: "copy", + Usage: "make the new storage compatible with an existing one to allow for copy operations", + Argument: "", + }, + }, + Usage: "Add an additional storage to be used for the existing repository", + ArgsUsage: " ", + Action: addStorage, + }, + + { + Name: "set", + Flags: []cli.Flag { + cli.GenericFlag { + Name: "encrypt, e", + Usage: "encrypt the storage with a password", + Value: &TriBool{}, + Arg: "true", + }, + cli.GenericFlag { + Name: "no-backup", + Usage: "backup to this storage is prohibited", + Value: &TriBool{}, + Arg: "true", + }, + cli.GenericFlag { + Name: "no-restore", + Usage: "restore from this storage is prohibited", + Value: &TriBool{}, + Arg: "true", + }, + cli.GenericFlag { + Name: "no-save-password", + Usage: "don't save password or access keys to keychain/keyring", + Value: &TriBool{}, + Arg: "true", + }, + cli.StringFlag { + Name: "key", + Usage: "add a key/password whose value is supplied by the -value option", + }, + cli.StringFlag { + Name: "value", + Usage: "the value of the key/password", + }, + cli.StringFlag { + Name: "storage", + Usage: "use the specified storage instead of the default one", + Argument: "", + }, + }, + Usage: "Change the options for the default or specified storage", + ArgsUsage: " ", + Action: setPreference, + }, + { + Name: "copy", + Flags: []cli.Flag { + cli.StringFlag { + Name: "id", + Usage: "copy snapshots with the specified id instead of all snapshot ids", + Argument: "", + }, + cli.StringSliceFlag { + Name: "r", + Usage: "copy snapshots with the specified revisions", + Argument: "", + }, + cli.StringFlag { + Name: "from", + Usage: "copy snapshots from the specified storage", + Argument: "", + }, + cli.StringFlag { + Name: "to", + Usage: "copy snapshots to the specified storage", + Argument: "", + }, + cli.IntFlag { + Name: "download-limit-rate", + Value: 0, + Usage: "the maximum download rate (in kilobytes/sec)", + Argument: "", + }, + cli.IntFlag { + Name: "upload-limit-rate", + Value: 0, + Usage: "the maximum upload rate (in kilobytes/sec)", + Argument: "", + }, + cli.IntFlag { + Name: "threads", + Value: 1, + Usage: "number of downloading threads", + Argument: "", + }, + }, + Usage: "Copy snapshots between compatible storages", + ArgsUsage: " ", + Action: copySnapshots, + }, + + { + Name: "info", + Flags: []cli.Flag { + cli.BoolFlag { + Name: "encrypt, e", + Usage: "The storage is encrypted with a password", + }, + cli.StringFlag { + Name: "repository", + Usage: "retrieve saved passwords from the specified repository", + Argument: "", + }, + cli.BoolFlag { + Name: "reset-passwords", + Usage: "take passwords from input rather than keychain/keyring", + }, + }, + Usage: "Show the information about the specified storage", + ArgsUsage: "", + Action: infoStorage, + }, + + } + + app.Flags = []cli.Flag { + cli.BoolFlag { + Name: "verbose, v", + Usage: "show more detailed information", + }, + cli.BoolFlag { + Name: "debug, d", + Usage: "show even more detailed information, useful for debugging", + }, + cli.BoolFlag { + Name: "log", + Usage: "enable log-style output", + }, + cli.BoolFlag { + Name: "stack", + Usage: "print the stack trace when an error occurs", + }, + cli.BoolFlag { + Name: "no-script", + Usage: "do not run script before or after command execution", + }, + cli.BoolFlag { + Name: "background", + Usage: "read passwords, tokens, or keys only from keychain/keyring or env", + }, + } + + app.HideVersion = true + app.Name = "duplicacy" + app.HelpName = "duplicacy" + app.Usage = "A new generation cloud backup tool based on lock-free deduplication" + app.Version = "2.0.0" + err := app.Run(os.Args) + if err != nil { + os.Exit(2) + } + +}