1
0
mirror of https://github.com/rclone/rclone.git synced 2025-12-06 00:03:32 +00:00

oracleobjectstorage: Use rclone's rate limiter in mutipart transfers

This commit is contained in:
Manoj Ghosh
2023-07-30 13:38:51 -07:00
committed by Nick Craig-Wood
parent de185de215
commit 27f5297e8d
10 changed files with 1152 additions and 159 deletions

View File

@@ -13,7 +13,6 @@ import (
"github.com/oracle/oci-go-sdk/v65/common"
"github.com/oracle/oci-go-sdk/v65/objectstorage"
"github.com/oracle/oci-go-sdk/v65/objectstorage/transfer"
)
const (
@@ -128,18 +127,3 @@ func useBYOKCopyObject(fs *Fs, request *objectstorage.CopyObjectRequest) {
request.OpcSseCustomerKeySha256 = common.String(fs.opt.SSECustomerKeySha256)
}
}
func useBYOKUpload(fs *Fs, request *transfer.UploadRequest) {
if fs.opt.SSEKMSKeyID != "" {
request.OpcSseKmsKeyId = common.String(fs.opt.SSEKMSKeyID)
}
if fs.opt.SSECustomerAlgorithm != "" {
request.OpcSseCustomerAlgorithm = common.String(fs.opt.SSECustomerAlgorithm)
}
if fs.opt.SSECustomerKey != "" {
request.OpcSseCustomerKey = common.String(fs.opt.SSECustomerKey)
}
if fs.opt.SSECustomerKeySha256 != "" {
request.OpcSseCustomerKeySha256 = common.String(fs.opt.SSECustomerKeySha256)
}
}

View File

@@ -6,6 +6,7 @@ package oracleobjectstorage
import (
"context"
"fmt"
"sort"
"strings"
"time"
@@ -196,6 +197,32 @@ func (f *Fs) listMultipartUploadsAll(ctx context.Context) (uploadsMap map[string
// for "dir" and it returns "dirKey"
func (f *Fs) listMultipartUploads(ctx context.Context, bucketName, directory string) (
uploads []*objectstorage.MultipartUpload, err error) {
return f.listMultipartUploadsObject(ctx, bucketName, directory, false)
}
// listMultipartUploads finds first outstanding multipart uploads for (bucket, key)
//
// Note that rather lazily we treat key as a prefix, so it matches
// directories and objects. This could surprise the user if they ask
// for "dir" and it returns "dirKey"
func (f *Fs) findLatestMultipartUpload(ctx context.Context, bucketName, directory string) (
uploads []*objectstorage.MultipartUpload, err error) {
pastUploads, err := f.listMultipartUploadsObject(ctx, bucketName, directory, true)
if err != nil {
return nil, err
}
if len(pastUploads) > 0 {
sort.Slice(pastUploads, func(i, j int) bool {
return pastUploads[i].TimeCreated.After(pastUploads[j].TimeCreated.Time)
})
return pastUploads[:1], nil
}
return nil, err
}
func (f *Fs) listMultipartUploadsObject(ctx context.Context, bucketName, directory string, exact bool) (
uploads []*objectstorage.MultipartUpload, err error) {
uploads = []*objectstorage.MultipartUpload{}
req := objectstorage.ListMultipartUploadsRequest{
@@ -217,7 +244,13 @@ func (f *Fs) listMultipartUploads(ctx context.Context, bucketName, directory str
if directory != "" && item.Object != nil && !strings.HasPrefix(*item.Object, directory) {
continue
}
uploads = append(uploads, &response.Items[index])
if exact {
if *item.Object == directory {
uploads = append(uploads, &response.Items[index])
}
} else {
uploads = append(uploads, &response.Items[index])
}
}
if response.OpcNextPage == nil {
break
@@ -226,3 +259,34 @@ func (f *Fs) listMultipartUploads(ctx context.Context, bucketName, directory str
}
return uploads, nil
}
func (f *Fs) listMultipartUploadParts(ctx context.Context, bucketName, bucketPath string, uploadID string) (
uploadedParts map[int]objectstorage.MultipartUploadPartSummary, err error) {
uploadedParts = make(map[int]objectstorage.MultipartUploadPartSummary)
req := objectstorage.ListMultipartUploadPartsRequest{
NamespaceName: common.String(f.opt.Namespace),
BucketName: common.String(bucketName),
ObjectName: common.String(bucketPath),
UploadId: common.String(uploadID),
Limit: common.Int(1000),
}
var response objectstorage.ListMultipartUploadPartsResponse
for {
err = f.pacer.Call(func() (bool, error) {
response, err = f.srv.ListMultipartUploadParts(ctx, req)
return shouldRetry(ctx, response.HTTPResponse(), err)
})
if err != nil {
return uploadedParts, err
}
for _, item := range response.Items {
uploadedParts[*item.PartNumber] = item
}
if response.OpcNextPage == nil {
break
}
req.Page = response.OpcNextPage
}
return uploadedParts, nil
}

View File

@@ -0,0 +1,334 @@
//go:build !plan9 && !solaris && !js
// +build !plan9,!solaris,!js
package oracleobjectstorage
import (
"bytes"
"context"
"crypto/md5"
"encoding/base64"
"fmt"
"io"
"sort"
"strconv"
"sync"
"github.com/oracle/oci-go-sdk/v65/common"
"github.com/oracle/oci-go-sdk/v65/objectstorage"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/chunksize"
"github.com/rclone/rclone/lib/atexit"
"github.com/rclone/rclone/lib/pacer"
"github.com/rclone/rclone/lib/readers"
"golang.org/x/sync/errgroup"
)
var warnStreamUpload sync.Once
func (o *Object) uploadMultipart(
ctx context.Context,
putReq *objectstorage.PutObjectRequest,
in io.Reader,
src fs.ObjectInfo) (err error) {
uploadID, uploadedParts, err := o.createMultipartUpload(ctx, putReq)
if err != nil {
fs.Errorf(o, "failed to create multipart upload-id err: %v", err)
return err
}
return o.uploadParts(ctx, putReq, in, src, uploadID, uploadedParts)
}
func (o *Object) createMultipartUpload(ctx context.Context, putReq *objectstorage.PutObjectRequest) (
uploadID string, uploadedParts map[int]objectstorage.MultipartUploadPartSummary, err error) {
bucketName, bucketPath := o.split()
f := o.fs
if f.opt.AttemptResumeUpload {
fs.Debugf(o, "attempting to resume upload for %v (if any)", o.remote)
resumeUploads, err := o.fs.findLatestMultipartUpload(ctx, bucketName, bucketPath)
if err == nil && len(resumeUploads) > 0 {
uploadID = *resumeUploads[0].UploadId
uploadedParts, err = f.listMultipartUploadParts(ctx, bucketName, bucketPath, uploadID)
if err == nil {
fs.Debugf(o, "resuming with existing upload id: %v", uploadID)
return uploadID, uploadedParts, err
}
}
}
req := objectstorage.CreateMultipartUploadRequest{
NamespaceName: common.String(o.fs.opt.Namespace),
BucketName: common.String(bucketName),
}
req.Object = common.String(bucketPath)
if o.fs.opt.StorageTier != "" {
storageTier, ok := objectstorage.GetMappingStorageTierEnum(o.fs.opt.StorageTier)
if !ok {
return "", nil, fmt.Errorf("not a valid storage tier: %v", o.fs.opt.StorageTier)
}
req.StorageTier = storageTier
}
o.applyMultipartUploadOptions(putReq, &req)
var resp objectstorage.CreateMultipartUploadResponse
err = o.fs.pacer.Call(func() (bool, error) {
resp, err = o.fs.srv.CreateMultipartUpload(ctx, req)
return shouldRetry(ctx, resp.HTTPResponse(), err)
})
if err != nil {
return "", nil, err
}
uploadID = *resp.UploadId
fs.Debugf(o, "created new upload id: %v", uploadID)
return uploadID, nil, err
}
func (o *Object) uploadParts(
ctx context.Context,
putReq *objectstorage.PutObjectRequest,
in io.Reader,
src fs.ObjectInfo,
uploadID string,
uploadedParts map[int]objectstorage.MultipartUploadPartSummary) (err error) {
bucketName, bucketPath := o.split()
f := o.fs
// make concurrency machinery
concurrency := f.opt.UploadConcurrency
if concurrency < 1 {
concurrency = 1
}
uploadParts := f.opt.MaxUploadParts
if uploadParts < 1 {
uploadParts = 1
} else if uploadParts > maxUploadParts {
uploadParts = maxUploadParts
}
// calculate size of parts
partSize := f.opt.ChunkSize
fileSize := src.Size()
// size can be -1 here meaning we don't know the size of the incoming file. We use ChunkSize
// buffers here (default 5 MiB). With a maximum number of parts (10,000) this will be a file of
// 48 GiB which seems like a not too unreasonable limit.
if fileSize == -1 {
warnStreamUpload.Do(func() {
fs.Logf(f, "Streaming uploads using chunk size %v will have maximum file size of %v",
f.opt.ChunkSize, fs.SizeSuffix(int64(partSize)*int64(uploadParts)))
})
} else {
partSize = chunksize.Calculator(o, fileSize, uploadParts, f.opt.ChunkSize)
}
uploadCtx, cancel := context.WithCancel(ctx)
defer atexit.OnError(&err, func() {
cancel()
if o.fs.opt.LeavePartsOnError {
return
}
fs.Debugf(o, "Cancelling multipart upload")
errCancel := o.fs.abortMultiPartUpload(
context.Background(),
bucketName,
bucketPath,
uploadID)
if errCancel != nil {
fs.Debugf(o, "Failed to cancel multipart upload: %v", errCancel)
} else {
fs.Debugf(o, "canceled and aborted multipart upload: %v", uploadID)
}
})()
var (
g, gCtx = errgroup.WithContext(uploadCtx)
finished = false
partsMu sync.Mutex // to protect parts
parts []*objectstorage.CommitMultipartUploadPartDetails
off int64
md5sMu sync.Mutex
md5s []byte
tokens = pacer.NewTokenDispenser(concurrency)
memPool = o.fs.getMemoryPool(int64(partSize))
)
addMd5 := func(md5binary *[md5.Size]byte, partNum int64) {
md5sMu.Lock()
defer md5sMu.Unlock()
start := partNum * md5.Size
end := start + md5.Size
if extend := end - int64(len(md5s)); extend > 0 {
md5s = append(md5s, make([]byte, extend)...)
}
copy(md5s[start:end], (*md5binary)[:])
}
for partNum := int64(1); !finished; partNum++ {
// Get a block of memory from the pool and token which limits concurrency.
tokens.Get()
buf := memPool.Get()
free := func() {
// return the memory and token
memPool.Put(buf)
tokens.Put()
}
// Fail fast, in case an errgroup managed function returns an error
// gCtx is cancelled. There is no point in uploading all the other parts.
if gCtx.Err() != nil {
free()
break
}
// Read the chunk
var n int
n, err = readers.ReadFill(in, buf) // this can never return 0, nil
if err == io.EOF {
if n == 0 && partNum != 1 { // end if no data and if not first chunk
free()
break
}
finished = true
} else if err != nil {
free()
return fmt.Errorf("multipart upload failed to read source: %w", err)
}
buf = buf[:n]
partNum := partNum
fs.Debugf(o, "multipart upload starting chunk %d size %v offset %v/%v", partNum, fs.SizeSuffix(n), fs.SizeSuffix(off), fs.SizeSuffix(fileSize))
off += int64(n)
g.Go(func() (err error) {
defer free()
partLength := int64(len(buf))
// create checksum of buffer for integrity checking
md5sumBinary := md5.Sum(buf)
addMd5(&md5sumBinary, partNum-1)
md5sum := base64.StdEncoding.EncodeToString(md5sumBinary[:])
if uploadedPart, ok := uploadedParts[int(partNum)]; ok {
if md5sum == *uploadedPart.Md5 {
fs.Debugf(o, "matched uploaded part found, part num %d, skipping part, md5=%v", partNum, md5sum)
partsMu.Lock()
parts = append(parts, &objectstorage.CommitMultipartUploadPartDetails{
PartNum: uploadedPart.PartNumber,
Etag: uploadedPart.Etag,
})
partsMu.Unlock()
return nil
}
}
req := objectstorage.UploadPartRequest{
NamespaceName: common.String(o.fs.opt.Namespace),
BucketName: common.String(bucketName),
ObjectName: common.String(bucketPath),
UploadId: common.String(uploadID),
UploadPartNum: common.Int(int(partNum)),
ContentLength: common.Int64(partLength),
ContentMD5: common.String(md5sum),
}
o.applyPartUploadOptions(putReq, &req)
var resp objectstorage.UploadPartResponse
err = f.pacer.Call(func() (bool, error) {
req.UploadPartBody = io.NopCloser(bytes.NewReader(buf))
resp, err = f.srv.UploadPart(gCtx, req)
if err != nil {
if partNum <= int64(concurrency) {
return shouldRetry(gCtx, resp.HTTPResponse(), err)
}
// retry all chunks once have done the first batch
return true, err
}
partsMu.Lock()
parts = append(parts, &objectstorage.CommitMultipartUploadPartDetails{
PartNum: common.Int(int(partNum)),
Etag: resp.ETag,
})
partsMu.Unlock()
return false, nil
})
if err != nil {
fs.Errorf(o, "multipart upload failed to upload part:%d err: %v", partNum, err)
return fmt.Errorf("multipart upload failed to upload part: %w", err)
}
return nil
})
}
err = g.Wait()
if err != nil {
return err
}
// sort the completed parts by part number
sort.Slice(parts, func(i, j int) bool {
return *parts[i].PartNum < *parts[j].PartNum
})
var resp objectstorage.CommitMultipartUploadResponse
resp, err = o.commitMultiPart(ctx, uploadID, parts)
if err != nil {
return err
}
fs.Debugf(o, "multipart upload %v committed.", uploadID)
hashOfHashes := md5.Sum(md5s)
wantMultipartMd5 := base64.StdEncoding.EncodeToString(hashOfHashes[:]) + "-" + strconv.Itoa(len(parts))
gotMultipartMd5 := *resp.OpcMultipartMd5
if wantMultipartMd5 != gotMultipartMd5 {
fs.Errorf(o, "multipart upload corrupted: multipart md5 differ: expecting %s but got %s", wantMultipartMd5, gotMultipartMd5)
return fmt.Errorf("multipart upload corrupted: md5 differ: expecting %s but got %s", wantMultipartMd5, gotMultipartMd5)
}
fs.Debugf(o, "multipart upload %v md5 matched: expecting %s and got %s", uploadID, wantMultipartMd5, gotMultipartMd5)
return nil
}
// commits the multipart upload
func (o *Object) commitMultiPart(ctx context.Context, uploadID string, parts []*objectstorage.CommitMultipartUploadPartDetails) (resp objectstorage.CommitMultipartUploadResponse, err error) {
bucketName, bucketPath := o.split()
req := objectstorage.CommitMultipartUploadRequest{
NamespaceName: common.String(o.fs.opt.Namespace),
BucketName: common.String(bucketName),
ObjectName: common.String(bucketPath),
UploadId: common.String(uploadID),
}
var partsToCommit []objectstorage.CommitMultipartUploadPartDetails
for _, part := range parts {
partsToCommit = append(partsToCommit, *part)
}
req.PartsToCommit = partsToCommit
err = o.fs.pacer.Call(func() (bool, error) {
resp, err = o.fs.srv.CommitMultipartUpload(ctx, req)
// if multipart is corrupted, we will abort the uploadId
if o.isMultiPartUploadCorrupted(err) {
fs.Debugf(o, "multipart uploadId %v is corrupted, aborting...", uploadID)
errCancel := o.fs.abortMultiPartUpload(
context.Background(),
bucketName,
bucketPath,
uploadID)
if errCancel != nil {
fs.Debugf(o, "Failed to abort multipart upload: %v, ignoring.", errCancel)
} else {
fs.Debugf(o, "aborted multipart upload: %v", uploadID)
}
return false, err
}
return shouldRetry(ctx, resp.HTTPResponse(), err)
})
return resp, err
}
func (o *Object) isMultiPartUploadCorrupted(err error) bool {
if err == nil {
return false
}
// Check if this ocierr object, and if it is multipart commit error
if ociError, ok := err.(common.ServiceError); ok {
// If it is a timeout then we want to retry that
if ociError.GetCode() == "InvalidUploadPart" {
return true
}
}
return false
}

View File

@@ -4,24 +4,26 @@
package oracleobjectstorage
import (
"bytes"
"context"
"encoding/base64"
"encoding/hex"
"fmt"
"io"
"net/http"
"os"
"regexp"
"strconv"
"strings"
"time"
"golang.org/x/net/http/httpguts"
"github.com/ncw/swift/v2"
"github.com/oracle/oci-go-sdk/v65/common"
"github.com/oracle/oci-go-sdk/v65/objectstorage"
"github.com/oracle/oci-go-sdk/v65/objectstorage/transfer"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/hash"
"github.com/rclone/rclone/lib/atexit"
)
// ------------------------------------------------------------
@@ -367,6 +369,25 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (io.ReadClo
return resp.HTTPResponse().Body, nil
}
func isZeroLength(streamReader io.Reader) bool {
switch v := streamReader.(type) {
case *bytes.Buffer:
return v.Len() == 0
case *bytes.Reader:
return v.Len() == 0
case *strings.Reader:
return v.Len() == 0
case *os.File:
fi, err := v.Stat()
if err != nil {
return false
}
return fi.Size() == 0
default:
return false
}
}
// Update an object if it has changed
func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (err error) {
bucketName, bucketPath := o.split()
@@ -379,11 +400,59 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op
size := src.Size()
multipart := size >= int64(o.fs.opt.UploadCutoff)
if isZeroLength(in) {
multipart = false
}
req := objectstorage.PutObjectRequest{
NamespaceName: common.String(o.fs.opt.Namespace),
BucketName: common.String(bucketName),
ObjectName: common.String(bucketPath),
}
// Set the mtime in the metadata
modTime := src.ModTime(ctx)
metadata := map[string]string{
metaMtime: swift.TimeToFloatString(modTime),
// Fetch metadata if --metadata is in use
meta, err := fs.GetMetadataOptions(ctx, src, options)
if err != nil {
return fmt.Errorf("failed to read metadata from source object: %w", err)
}
req.OpcMeta = make(map[string]string, len(meta)+2)
// merge metadata into request and user metadata
for k, v := range meta {
pv := common.String(v)
k = strings.ToLower(k)
switch k {
case "cache-control":
req.CacheControl = pv
case "content-disposition":
req.ContentDisposition = pv
case "content-encoding":
req.ContentEncoding = pv
case "content-language":
req.ContentLanguage = pv
case "content-type":
req.ContentType = pv
case "tier":
// ignore
case "mtime":
// mtime in meta overrides source ModTime
metaModTime, err := time.Parse(time.RFC3339Nano, v)
if err != nil {
fs.Debugf(o, "failed to parse metadata %s: %q: %v", k, v, err)
} else {
modTime = metaModTime
}
case "btime":
// write as metadata since we can't set it
req.OpcMeta[k] = v
default:
req.OpcMeta[k] = v
}
}
// Set the mtime in the metadata
req.OpcMeta[metaMtime] = swift.TimeToFloatString(modTime)
// read the md5sum if available
// - for non-multipart
@@ -404,114 +473,53 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op
// - a multipart upload
// - the ETag is not an MD5, e.g. when using SSE/SSE-C
// provided checksums aren't disabled
metadata[metaMD5Hash] = md5sumBase64
req.OpcMeta[metaMD5Hash] = md5sumBase64
}
}
}
}
// Guess the content type
mimeType := fs.MimeType(ctx, src)
// Set the content type if it isn't set already
if req.ContentType == nil {
req.ContentType = common.String(fs.MimeType(ctx, src))
}
if size >= 0 {
req.ContentLength = common.Int64(size)
}
if md5sumBase64 != "" {
req.ContentMD5 = &md5sumBase64
}
o.applyPutOptions(&req, options...)
useBYOKPutObject(o.fs, &req)
if o.fs.opt.StorageTier != "" {
storageTier, ok := objectstorage.GetMappingPutObjectStorageTierEnum(o.fs.opt.StorageTier)
if !ok {
return fmt.Errorf("not a valid storage tier: %v", o.fs.opt.StorageTier)
}
req.StorageTier = storageTier
}
// Check metadata keys and values are valid
for key, value := range req.OpcMeta {
if !httpguts.ValidHeaderFieldName(key) {
fs.Errorf(o, "Dropping invalid metadata key %q", key)
delete(req.OpcMeta, key)
} else if value == "" {
fs.Errorf(o, "Dropping nil metadata value for key %q", key)
delete(req.OpcMeta, key)
} else if !httpguts.ValidHeaderFieldValue(value) {
fs.Errorf(o, "Dropping invalid metadata value %q for key %q", value, key)
delete(req.OpcMeta, key)
}
}
if multipart {
chunkSize := int64(o.fs.opt.ChunkSize)
uploadRequest := transfer.UploadRequest{
NamespaceName: common.String(o.fs.opt.Namespace),
BucketName: common.String(bucketName),
ObjectName: common.String(bucketPath),
ContentType: common.String(mimeType),
PartSize: common.Int64(chunkSize),
AllowMultipartUploads: common.Bool(true),
AllowParrallelUploads: common.Bool(true),
ObjectStorageClient: o.fs.srv,
EnableMultipartChecksumVerification: common.Bool(!o.fs.opt.DisableChecksum),
NumberOfGoroutines: common.Int(o.fs.opt.UploadConcurrency),
Metadata: metadataWithOpcPrefix(metadata),
}
if o.fs.opt.StorageTier != "" {
storageTier, ok := objectstorage.GetMappingPutObjectStorageTierEnum(o.fs.opt.StorageTier)
if !ok {
return fmt.Errorf("not a valid storage tier: %v", o.fs.opt.StorageTier)
}
uploadRequest.StorageTier = storageTier
}
o.applyMultiPutOptions(&uploadRequest, options...)
useBYOKUpload(o.fs, &uploadRequest)
uploadStreamRequest := transfer.UploadStreamRequest{
UploadRequest: uploadRequest,
StreamReader: in,
}
uploadMgr := transfer.NewUploadManager()
var uploadID = ""
defer atexit.OnError(&err, func() {
if uploadID == "" {
return
}
if o.fs.opt.LeavePartsOnError {
return
}
fs.Debugf(o, "Cancelling multipart upload")
errCancel := o.fs.abortMultiPartUpload(
context.Background(),
bucketName,
bucketPath,
uploadID)
if errCancel != nil {
fs.Debugf(o, "Failed to cancel multipart upload: %v", errCancel)
}
})()
err = o.fs.pacer.Call(func() (bool, error) {
uploadResponse, err := uploadMgr.UploadStream(ctx, uploadStreamRequest)
var httpResponse *http.Response
if err == nil {
if uploadResponse.Type == transfer.MultipartUpload {
if uploadResponse.MultipartUploadResponse != nil {
httpResponse = uploadResponse.MultipartUploadResponse.HTTPResponse()
}
} else {
if uploadResponse.SinglepartUploadResponse != nil {
httpResponse = uploadResponse.SinglepartUploadResponse.HTTPResponse()
}
}
}
if err != nil {
uploadID := ""
if uploadResponse.MultipartUploadResponse != nil && uploadResponse.MultipartUploadResponse.UploadID != nil {
uploadID = *uploadResponse.MultipartUploadResponse.UploadID
fs.Debugf(o, "multipart streaming upload failed, aborting uploadID: %v, may retry", uploadID)
_ = o.fs.abortMultiPartUpload(ctx, bucketName, bucketPath, uploadID)
}
}
return shouldRetry(ctx, httpResponse, err)
})
err = o.uploadMultipart(ctx, &req, in, src)
if err != nil {
fs.Errorf(o, "multipart streaming upload failed %v", err)
return err
}
} else {
req := objectstorage.PutObjectRequest{
NamespaceName: common.String(o.fs.opt.Namespace),
BucketName: common.String(bucketName),
ObjectName: common.String(bucketPath),
ContentType: common.String(mimeType),
PutObjectBody: io.NopCloser(in),
OpcMeta: metadata,
}
if size >= 0 {
req.ContentLength = common.Int64(size)
}
if o.fs.opt.StorageTier != "" {
storageTier, ok := objectstorage.GetMappingPutObjectStorageTierEnum(o.fs.opt.StorageTier)
if !ok {
return fmt.Errorf("not a valid storage tier: %v", o.fs.opt.StorageTier)
}
req.StorageTier = storageTier
}
o.applyPutOptions(&req, options...)
useBYOKPutObject(o.fs, &req)
var resp objectstorage.PutObjectResponse
err = o.fs.pacer.Call(func() (bool, error) {
err = o.fs.pacer.CallNoRetry(func() (bool, error) {
req.PutObjectBody = io.NopCloser(in)
resp, err = o.fs.srv.PutObject(ctx, req)
return shouldRetry(ctx, resp.HTTPResponse(), err)
})
@@ -591,28 +599,24 @@ func (o *Object) applyGetObjectOptions(req *objectstorage.GetObjectRequest, opti
}
}
func (o *Object) applyMultiPutOptions(req *transfer.UploadRequest, options ...fs.OpenOption) {
// Apply upload options
for _, option := range options {
key, value := option.Header()
lowerKey := strings.ToLower(key)
switch lowerKey {
case "":
// ignore
case "content-encoding":
req.ContentEncoding = common.String(value)
case "content-language":
req.ContentLanguage = common.String(value)
case "content-type":
req.ContentType = common.String(value)
default:
if strings.HasPrefix(lowerKey, ociMetaPrefix) {
req.Metadata[lowerKey] = value
} else {
fs.Errorf(o, "Don't know how to set key %q on upload", key)
}
}
}
func (o *Object) applyMultipartUploadOptions(putReq *objectstorage.PutObjectRequest, req *objectstorage.CreateMultipartUploadRequest) {
req.ContentType = putReq.ContentType
req.ContentLanguage = putReq.ContentLanguage
req.ContentEncoding = putReq.ContentEncoding
req.ContentDisposition = putReq.ContentDisposition
req.CacheControl = putReq.CacheControl
req.Metadata = metadataWithOpcPrefix(putReq.OpcMeta)
req.OpcSseCustomerAlgorithm = putReq.OpcSseCustomerAlgorithm
req.OpcSseCustomerKey = putReq.OpcSseCustomerKey
req.OpcSseCustomerKeySha256 = putReq.OpcSseCustomerKeySha256
req.OpcSseKmsKeyId = putReq.OpcSseKmsKeyId
}
func (o *Object) applyPartUploadOptions(putReq *objectstorage.PutObjectRequest, req *objectstorage.UploadPartRequest) {
req.OpcSseCustomerAlgorithm = putReq.OpcSseCustomerAlgorithm
req.OpcSseCustomerKey = putReq.OpcSseCustomerKey
req.OpcSseCustomerKeySha256 = putReq.OpcSseCustomerKeySha256
req.OpcSseKmsKeyId = putReq.OpcSseKmsKeyId
}
func metadataWithOpcPrefix(src map[string]string) map[string]string {

View File

@@ -13,12 +13,15 @@ import (
const (
maxSizeForCopy = 4768 * 1024 * 1024
minChunkSize = fs.SizeSuffix(1024 * 1024 * 5)
defaultUploadCutoff = fs.SizeSuffix(200 * 1024 * 1024)
maxUploadParts = 10000
defaultUploadConcurrency = 10
minChunkSize = fs.SizeSuffix(5 * 1024 * 1024)
defaultUploadCutoff = fs.SizeSuffix(200 * 1024 * 1024)
maxUploadCutoff = fs.SizeSuffix(5 * 1024 * 1024 * 1024)
minSleep = 10 * time.Millisecond
defaultCopyTimeoutDuration = fs.Duration(time.Minute)
memoryPoolFlushTime = fs.Duration(time.Minute) // flush the cached buffers after this long
memoryPoolUseMmap = false
)
const (
@@ -55,12 +58,16 @@ type Options struct {
ConfigProfile string `config:"config_profile"`
UploadCutoff fs.SizeSuffix `config:"upload_cutoff"`
ChunkSize fs.SizeSuffix `config:"chunk_size"`
MaxUploadParts int `config:"max_upload_parts"`
UploadConcurrency int `config:"upload_concurrency"`
DisableChecksum bool `config:"disable_checksum"`
MemoryPoolFlushTime fs.Duration `config:"memory_pool_flush_time"`
MemoryPoolUseMmap bool `config:"memory_pool_use_mmap"`
CopyCutoff fs.SizeSuffix `config:"copy_cutoff"`
CopyTimeout fs.Duration `config:"copy_timeout"`
StorageTier string `config:"storage_tier"`
LeavePartsOnError bool `config:"leave_parts_on_error"`
AttemptResumeUpload bool `config:"attempt_resume_upload"`
NoCheckBucket bool `config:"no_check_bucket"`
SSEKMSKeyID string `config:"sse_kms_key_id"`
SSECustomerAlgorithm string `config:"sse_customer_algorithm"`
@@ -157,9 +164,8 @@ The minimum is 0 and the maximum is 5 GiB.`,
Help: `Chunk size to use for uploading.
When uploading files larger than upload_cutoff or files with unknown
size (e.g. from "rclone rcat" or uploaded with "rclone mount" or google
photos or google docs) they will be uploaded as multipart uploads
using this chunk size.
size (e.g. from "rclone rcat" or uploaded with "rclone mount" they will be uploaded
as multipart uploads using this chunk size.
Note that "upload_concurrency" chunks of this size are buffered
in memory per transfer.
@@ -181,6 +187,20 @@ statistics displayed with "-P" flag.
`,
Default: minChunkSize,
Advanced: true,
}, {
Name: "max_upload_parts",
Help: `Maximum number of parts in a multipart upload.
This option defines the maximum number of multipart chunks to use
when doing a multipart upload.
OCI has max parts limit of 10,000 chunks.
Rclone will automatically increase the chunk size when uploading a
large file of a known size to stay below this number of chunks limit.
`,
Default: maxUploadParts,
Advanced: true,
}, {
Name: "upload_concurrency",
Help: `Concurrency for multipart uploads.
@@ -203,6 +223,19 @@ copied in chunks of this size.
The minimum is 0 and the maximum is 5 GiB.`,
Default: fs.SizeSuffix(maxSizeForCopy),
Advanced: true,
}, {
Name: "memory_pool_flush_time",
Default: memoryPoolFlushTime,
Advanced: true,
Help: `How often internal memory buffer pools will be flushed.
Uploads which requires additional buffers (f.e multipart) will use memory pool for allocations.
This option controls how often unused buffers will be removed from the pool.`,
}, {
Name: "memory_pool_use_mmap",
Default: memoryPoolUseMmap,
Advanced: true,
Help: `Whether to use mmap buffers in internal memory pool.`,
}, {
Name: "copy_timeout",
Help: `Timeout for copy.
@@ -238,12 +271,24 @@ to start uploading.`,
encoder.EncodeDot,
}, {
Name: "leave_parts_on_error",
Help: `If true avoid calling abort upload on a failure, leaving all successfully uploaded parts on S3 for manual recovery.
Help: `If true avoid calling abort upload on a failure, leaving all successfully uploaded parts for manual recovery.
It should be set to true for resuming uploads across different sessions.
WARNING: Storing parts of an incomplete multipart upload counts towards space usage on object storage and will add
additional costs if not cleaned up.
`,
Default: false,
Advanced: true,
}, {
Name: "attempt_resume_upload",
Help: `If true attempt to resume previously started multipart upload for the object.
This will be helpful to speed up multipart transfers by resuming uploads from past session.
WARNING: If chunk size differs in resumed session from past incomplete session, then the resumed multipart upload is
aborted and a new multipart upload is started with the new chunk size.
The flag leave_parts_on_error must be true to resume and optimize to skip parts that were already uploaded successfully.
`,
Default: false,
Advanced: true,

View File

@@ -23,6 +23,7 @@ import (
"github.com/rclone/rclone/fs/walk"
"github.com/rclone/rclone/lib/bucket"
"github.com/rclone/rclone/lib/pacer"
"github.com/rclone/rclone/lib/pool"
)
// Register with Fs
@@ -49,6 +50,7 @@ type Fs struct {
rootDirectory string // directory part of root (if any)
cache *bucket.Cache // cache for bucket creation status
pacer *fs.Pacer // To pace the API calls
pool *pool.Pool // memory pool
}
// NewFs Initialize backend
@@ -80,6 +82,12 @@ func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, e
srv: objectStorageClient,
cache: bucket.NewCache(),
pacer: pc,
pool: pool.New(
time.Duration(opt.MemoryPoolFlushTime),
int(opt.ChunkSize),
opt.UploadConcurrency*ci.Transfers,
opt.MemoryPoolUseMmap,
),
}
f.setRoot(root)
f.features = (&fs.Features{
@@ -179,6 +187,19 @@ func (f *Fs) Hashes() hash.Set {
return hash.Set(hash.MD5)
}
func (f *Fs) getMemoryPool(size int64) *pool.Pool {
if size == int64(f.opt.ChunkSize) {
return f.pool
}
return pool.New(
time.Duration(f.opt.MemoryPoolFlushTime),
int(size),
f.opt.UploadConcurrency*f.ci.Transfers,
f.opt.MemoryPoolUseMmap,
)
}
// setRoot changes the root of the Fs
func (f *Fs) setRoot(root string) {
f.root = parsePath(root)