mirror of
https://github.com/rclone/rclone.git
synced 2025-12-06 00:03:32 +00:00
b2: Implement server side copy for files > 5GB - fixes #3991
This factors copy out of SetModTime and Copy so it can be called from both places. This also reworks all the multipart uploading to use sync.Errgroup and memory pooling like the other backends. This makes it more memory efficient and handle errors better. See: https://forum.rclone.org/t/copying-files-within-a-b2-bucket/16680/10
This commit is contained in:
204
backend/b2/b2.go
204
backend/b2/b2.go
@@ -33,6 +33,7 @@ import (
|
||||
"github.com/rclone/rclone/lib/bucket"
|
||||
"github.com/rclone/rclone/lib/encoder"
|
||||
"github.com/rclone/rclone/lib/pacer"
|
||||
"github.com/rclone/rclone/lib/pool"
|
||||
"github.com/rclone/rclone/lib/rest"
|
||||
)
|
||||
|
||||
@@ -54,6 +55,9 @@ const (
|
||||
minChunkSize = 5 * fs.MebiByte
|
||||
defaultChunkSize = 96 * fs.MebiByte
|
||||
defaultUploadCutoff = 200 * fs.MebiByte
|
||||
largeFileCopyCutoff = 4 * fs.GibiByte // 5E9 is the max
|
||||
memoryPoolFlushTime = fs.Duration(time.Minute) // flush the cached buffers after this long
|
||||
memoryPoolUseMmap = false
|
||||
)
|
||||
|
||||
// Globals
|
||||
@@ -113,6 +117,16 @@ Files above this size will be uploaded in chunks of "--b2-chunk-size".
|
||||
This value should be set no larger than 4.657GiB (== 5GB).`,
|
||||
Default: defaultUploadCutoff,
|
||||
Advanced: true,
|
||||
}, {
|
||||
Name: "copy_cutoff",
|
||||
Help: `Cutoff for switching to multipart copy
|
||||
|
||||
Any files larger than this that need to be server side copied will be
|
||||
copied in chunks of this size.
|
||||
|
||||
The minimum is 0 and the maximum is 4.6GB.`,
|
||||
Default: largeFileCopyCutoff,
|
||||
Advanced: true,
|
||||
}, {
|
||||
Name: "chunk_size",
|
||||
Help: `Upload chunk size. Must fit in memory.
|
||||
@@ -150,6 +164,18 @@ The duration before the download authorization token will expire.
|
||||
The minimum value is 1 second. The maximum value is one week.`,
|
||||
Default: fs.Duration(7 * 24 * time.Hour),
|
||||
Advanced: true,
|
||||
}, {
|
||||
Name: "memory_pool_flush_time",
|
||||
Default: memoryPoolFlushTime,
|
||||
Advanced: true,
|
||||
Help: `How often internal memory buffer pools will be flushed.
|
||||
Uploads which requires additional buffers (f.e multipart) will use memory pool for allocations.
|
||||
This option controls how often unused buffers will be removed from the pool.`,
|
||||
}, {
|
||||
Name: "memory_pool_use_mmap",
|
||||
Default: memoryPoolUseMmap,
|
||||
Advanced: true,
|
||||
Help: `Whether to use mmap buffers in internal memory pool.`,
|
||||
}, {
|
||||
Name: config.ConfigEncoding,
|
||||
Help: config.ConfigEncodingHelp,
|
||||
@@ -173,10 +199,13 @@ type Options struct {
|
||||
Versions bool `config:"versions"`
|
||||
HardDelete bool `config:"hard_delete"`
|
||||
UploadCutoff fs.SizeSuffix `config:"upload_cutoff"`
|
||||
CopyCutoff fs.SizeSuffix `config:"copy_cutoff"`
|
||||
ChunkSize fs.SizeSuffix `config:"chunk_size"`
|
||||
DisableCheckSum bool `config:"disable_checksum"`
|
||||
DownloadURL string `config:"download_url"`
|
||||
DownloadAuthorizationDuration fs.Duration `config:"download_auth_duration"`
|
||||
MemoryPoolFlushTime fs.Duration `config:"memory_pool_flush_time"`
|
||||
MemoryPoolUseMmap bool `config:"memory_pool_use_mmap"`
|
||||
Enc encoder.MultiEncoder `config:"encoding"`
|
||||
}
|
||||
|
||||
@@ -199,7 +228,8 @@ type Fs struct {
|
||||
uploads map[string][]*api.GetUploadURLResponse // Upload URLs by buckedID
|
||||
authMu sync.Mutex // lock for authorizing the account
|
||||
pacer *fs.Pacer // To pace and retry the API calls
|
||||
bufferTokens chan []byte // control concurrency of multipart uploads
|
||||
uploadToken *pacer.TokenDispenser // control concurrency
|
||||
pool *pool.Pool // memory pool
|
||||
}
|
||||
|
||||
// Object describes a b2 object
|
||||
@@ -335,7 +365,6 @@ func (f *Fs) setUploadChunkSize(cs fs.SizeSuffix) (old fs.SizeSuffix, err error)
|
||||
err = checkUploadChunkSize(cs)
|
||||
if err == nil {
|
||||
old, f.opt.ChunkSize = f.opt.ChunkSize, cs
|
||||
f.fillBufferTokens() // reset the buffer tokens
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -396,6 +425,13 @@ func NewFs(name, root string, m configmap.Mapper) (fs.Fs, error) {
|
||||
_bucketType: make(map[string]string, 1),
|
||||
uploads: make(map[string][]*api.GetUploadURLResponse),
|
||||
pacer: fs.NewPacer(pacer.NewDefault(pacer.MinSleep(minSleep), pacer.MaxSleep(maxSleep), pacer.DecayConstant(decayConstant))),
|
||||
uploadToken: pacer.NewTokenDispenser(fs.Config.Transfers),
|
||||
pool: pool.New(
|
||||
time.Duration(opt.MemoryPoolFlushTime),
|
||||
int(opt.ChunkSize),
|
||||
fs.Config.Transfers,
|
||||
opt.MemoryPoolUseMmap,
|
||||
),
|
||||
}
|
||||
f.setRoot(root)
|
||||
f.features = (&fs.Features{
|
||||
@@ -410,7 +446,6 @@ func NewFs(name, root string, m configmap.Mapper) (fs.Fs, error) {
|
||||
f.srv.SetHeader(testModeHeader, testMode)
|
||||
fs.Debugf(f, "Setting test header \"%s: %s\"", testModeHeader, testMode)
|
||||
}
|
||||
f.fillBufferTokens()
|
||||
err = f.authorizeAccount(ctx)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "failed to authorize account")
|
||||
@@ -533,32 +568,25 @@ func (f *Fs) clearUploadURL(bucketID string) {
|
||||
f.uploadMu.Unlock()
|
||||
}
|
||||
|
||||
// Fill up (or reset) the buffer tokens
|
||||
func (f *Fs) fillBufferTokens() {
|
||||
f.bufferTokens = make(chan []byte, fs.Config.Transfers)
|
||||
for i := 0; i < fs.Config.Transfers; i++ {
|
||||
f.bufferTokens <- nil
|
||||
// getBuf gets a buffer of f.opt.ChunkSize and an upload token
|
||||
//
|
||||
// If noBuf is set then it just gets an upload token
|
||||
func (f *Fs) getBuf(noBuf bool) (buf []byte) {
|
||||
f.uploadToken.Get()
|
||||
if !noBuf {
|
||||
buf = f.pool.Get()
|
||||
}
|
||||
}
|
||||
|
||||
// getUploadBlock gets a block from the pool of size chunkSize
|
||||
func (f *Fs) getUploadBlock() []byte {
|
||||
buf := <-f.bufferTokens
|
||||
if buf == nil {
|
||||
buf = make([]byte, f.opt.ChunkSize)
|
||||
}
|
||||
// fs.Debugf(f, "Getting upload block %p", buf)
|
||||
return buf
|
||||
}
|
||||
|
||||
// putUploadBlock returns a block to the pool of size chunkSize
|
||||
func (f *Fs) putUploadBlock(buf []byte) {
|
||||
buf = buf[:cap(buf)]
|
||||
if len(buf) != int(f.opt.ChunkSize) {
|
||||
panic("bad blocksize returned to pool")
|
||||
// putBuf returns a buffer to the memory pool and an upload token
|
||||
//
|
||||
// If noBuf is set then it just returns the upload token
|
||||
func (f *Fs) putBuf(buf []byte, noBuf bool) {
|
||||
if !noBuf {
|
||||
f.pool.Put(buf)
|
||||
}
|
||||
// fs.Debugf(f, "Returning upload block %p", buf)
|
||||
f.bufferTokens <- buf
|
||||
f.uploadToken.Put()
|
||||
}
|
||||
|
||||
// Return an Object from a path
|
||||
@@ -1205,6 +1233,63 @@ func (f *Fs) CleanUp(ctx context.Context) error {
|
||||
return f.purge(ctx, f.rootBucket, f.rootDirectory, true)
|
||||
}
|
||||
|
||||
// copy does a server side copy from dstObj <- srcObj
|
||||
//
|
||||
// If newInfo is nil then the metadata will be copied otherwise it
|
||||
// will be replaced with newInfo
|
||||
func (f *Fs) copy(ctx context.Context, dstObj *Object, srcObj *Object, newInfo *api.File) (err error) {
|
||||
if srcObj.size >= int64(f.opt.CopyCutoff) {
|
||||
if newInfo == nil {
|
||||
newInfo, err = srcObj.getMetaData(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
up, err := f.newLargeUpload(ctx, dstObj, nil, srcObj, f.opt.CopyCutoff, true, newInfo)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return up.Upload(ctx)
|
||||
}
|
||||
|
||||
dstBucket, dstPath := dstObj.split()
|
||||
err = f.makeBucket(ctx, dstBucket)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
destBucketID, err := f.getBucketID(ctx, dstBucket)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
opts := rest.Opts{
|
||||
Method: "POST",
|
||||
Path: "/b2_copy_file",
|
||||
}
|
||||
var request = api.CopyFileRequest{
|
||||
SourceID: srcObj.id,
|
||||
Name: f.opt.Enc.FromStandardPath(dstPath),
|
||||
DestBucketID: destBucketID,
|
||||
}
|
||||
if newInfo == nil {
|
||||
request.MetadataDirective = "COPY"
|
||||
} else {
|
||||
request.MetadataDirective = "REPLACE"
|
||||
request.ContentType = newInfo.ContentType
|
||||
request.Info = newInfo.Info
|
||||
}
|
||||
var response api.FileInfo
|
||||
err = f.pacer.Call(func() (bool, error) {
|
||||
resp, err := f.srv.CallJSON(ctx, &opts, &request, &response)
|
||||
return f.shouldRetry(ctx, resp, err)
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return dstObj.decodeMetaDataFileInfo(&response)
|
||||
}
|
||||
|
||||
// Copy src to this remote using server side copy operations.
|
||||
//
|
||||
// This is stored with the remote path given
|
||||
@@ -1215,47 +1300,21 @@ func (f *Fs) CleanUp(ctx context.Context) error {
|
||||
//
|
||||
// If it isn't possible then return fs.ErrorCantCopy
|
||||
func (f *Fs) Copy(ctx context.Context, src fs.Object, remote string) (fs.Object, error) {
|
||||
dstBucket, dstPath := f.split(remote)
|
||||
err := f.makeBucket(ctx, dstBucket)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
srcObj, ok := src.(*Object)
|
||||
if !ok {
|
||||
fs.Debugf(src, "Can't copy - not same remote type")
|
||||
return nil, fs.ErrorCantCopy
|
||||
}
|
||||
destBucketID, err := f.getBucketID(ctx, dstBucket)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
opts := rest.Opts{
|
||||
Method: "POST",
|
||||
Path: "/b2_copy_file",
|
||||
}
|
||||
var request = api.CopyFileRequest{
|
||||
SourceID: srcObj.id,
|
||||
Name: f.opt.Enc.FromStandardPath(dstPath),
|
||||
MetadataDirective: "COPY",
|
||||
DestBucketID: destBucketID,
|
||||
}
|
||||
var response api.FileInfo
|
||||
err = f.pacer.Call(func() (bool, error) {
|
||||
resp, err := f.srv.CallJSON(ctx, &opts, &request, &response)
|
||||
return f.shouldRetry(ctx, resp, err)
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
o := &Object{
|
||||
// Temporary Object under construction
|
||||
dstObj := &Object{
|
||||
fs: f,
|
||||
remote: remote,
|
||||
}
|
||||
err = o.decodeMetaDataFileInfo(&response)
|
||||
err := f.copy(ctx, dstObj, srcObj, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return o, nil
|
||||
return dstObj, nil
|
||||
}
|
||||
|
||||
// Hashes returns the supported hash sets.
|
||||
@@ -1526,28 +1585,10 @@ func (o *Object) SetModTime(ctx context.Context, modTime time.Time) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, bucketPath := o.split()
|
||||
info.Info[timeKey] = timeString(modTime)
|
||||
opts := rest.Opts{
|
||||
Method: "POST",
|
||||
Path: "/b2_copy_file",
|
||||
}
|
||||
var request = api.CopyFileRequest{
|
||||
SourceID: o.id,
|
||||
Name: o.fs.opt.Enc.FromStandardPath(bucketPath), // copy to same name
|
||||
MetadataDirective: "REPLACE",
|
||||
ContentType: info.ContentType,
|
||||
Info: info.Info,
|
||||
}
|
||||
var response api.FileInfo
|
||||
err = o.fs.pacer.Call(func() (bool, error) {
|
||||
resp, err := o.fs.srv.CallJSON(ctx, &opts, &request, &response)
|
||||
return o.fs.shouldRetry(ctx, resp, err)
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return o.decodeMetaDataFileInfo(&response)
|
||||
|
||||
// Copy to the same name, overwriting the metadata only
|
||||
return o.fs.copy(ctx, o, o, info)
|
||||
}
|
||||
|
||||
// Storable returns if this object is storable
|
||||
@@ -1723,7 +1764,8 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op
|
||||
}
|
||||
if size == -1 {
|
||||
// Check if the file is large enough for a chunked upload (needs to be at least two chunks)
|
||||
buf := o.fs.getUploadBlock()
|
||||
buf := o.fs.getBuf(false)
|
||||
|
||||
n, err := io.ReadFull(in, buf)
|
||||
if err == nil {
|
||||
bufReader := bufio.NewReader(in)
|
||||
@@ -1733,22 +1775,24 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op
|
||||
|
||||
if err == nil {
|
||||
fs.Debugf(o, "File is big enough for chunked streaming")
|
||||
up, err := o.fs.newLargeUpload(ctx, o, in, src)
|
||||
up, err := o.fs.newLargeUpload(ctx, o, in, src, o.fs.opt.ChunkSize, false, nil)
|
||||
if err != nil {
|
||||
o.fs.putUploadBlock(buf)
|
||||
o.fs.putBuf(buf, false)
|
||||
return err
|
||||
}
|
||||
// NB Stream returns the buffer and token
|
||||
return up.Stream(ctx, buf)
|
||||
} else if err == io.EOF || err == io.ErrUnexpectedEOF {
|
||||
fs.Debugf(o, "File has %d bytes, which makes only one chunk. Using direct upload.", n)
|
||||
defer o.fs.putUploadBlock(buf)
|
||||
defer o.fs.putBuf(buf, false)
|
||||
size = int64(n)
|
||||
in = bytes.NewReader(buf[:n])
|
||||
} else {
|
||||
o.fs.putBuf(buf, false)
|
||||
return err
|
||||
}
|
||||
} else if size > int64(o.fs.opt.UploadCutoff) {
|
||||
up, err := o.fs.newLargeUpload(ctx, o, in, src)
|
||||
up, err := o.fs.newLargeUpload(ctx, o, in, src, o.fs.opt.ChunkSize, false, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user