diff --git a/backend/azureblob/azureblob.go b/backend/azureblob/azureblob.go index 28df22206..6b72fc11b 100644 --- a/backend/azureblob/azureblob.go +++ b/backend/azureblob/azureblob.go @@ -86,12 +86,56 @@ var ( metadataMu sync.Mutex ) +// system metadata keys which this backend owns +var systemMetadataInfo = map[string]fs.MetadataHelp{ + "cache-control": { + Help: "Cache-Control header", + Type: "string", + Example: "no-cache", + }, + "content-disposition": { + Help: "Content-Disposition header", + Type: "string", + Example: "inline", + }, + "content-encoding": { + Help: "Content-Encoding header", + Type: "string", + Example: "gzip", + }, + "content-language": { + Help: "Content-Language header", + Type: "string", + Example: "en-US", + }, + "content-type": { + Help: "Content-Type header", + Type: "string", + Example: "text/plain", + }, + "tier": { + Help: "Tier of the object", + Type: "string", + Example: "Hot", + ReadOnly: true, + }, + "mtime": { + Help: "Time of last modification, read from rclone metadata", + Type: "RFC 3339", + Example: "2006-01-02T15:04:05.999999999Z07:00", + }, +} + // Register with Fs func init() { fs.Register(&fs.RegInfo{ Name: "azureblob", Description: "Microsoft Azure Blob Storage", NewFs: NewFs, + MetadataInfo: &fs.MetadataInfo{ + System: systemMetadataInfo, + Help: `User metadata is stored as x-ms-meta- keys. Azure metadata keys are case insensitive and are always returned in lower case.`, + }, Options: []fs.Option{{ Name: "account", Help: `Azure Storage Account Name. @@ -810,6 +854,9 @@ func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, e f.features = (&fs.Features{ ReadMimeType: true, WriteMimeType: true, + ReadMetadata: true, + WriteMetadata: true, + UserMetadata: true, BucketBased: true, BucketBasedRootOK: true, SetTier: true, @@ -1157,6 +1204,289 @@ func (o *Object) updateMetadataWithModTime(modTime time.Time) { o.meta[modTimeKey] = modTime.Format(timeFormatOut) } +// parseXMsTags parses the value of the x-ms-tags header into a map. +// It expects comma-separated key=value pairs. Whitespace around keys and +// values is trimmed. Empty pairs and empty keys are rejected. +func parseXMsTags(s string) (map[string]string, error) { + if strings.TrimSpace(s) == "" { + return map[string]string{}, nil + } + out := make(map[string]string) + parts := strings.Split(s, ",") + for _, p := range parts { + p = strings.TrimSpace(p) + if p == "" { + continue + } + kv := strings.SplitN(p, "=", 2) + if len(kv) != 2 { + return nil, fmt.Errorf("invalid tag %q", p) + } + k := strings.TrimSpace(kv[0]) + v := strings.TrimSpace(kv[1]) + if k == "" { + return nil, fmt.Errorf("invalid tag key in %q", p) + } + out[k] = v + } + return out, nil +} + +// mapMetadataToAzure maps a generic metadata map to Azure HTTP headers, +// user metadata, tags and optional modTime override. +// Reserved x-ms-* keys (except x-ms-tags) are ignored for user metadata. +// +// Pass a logger to surface non-fatal parsing issues (e.g. bad mtime). +func mapMetadataToAzure(meta map[string]string, logf func(string, ...any)) (headers blob.HTTPHeaders, userMeta map[string]*string, tags map[string]string, modTime *time.Time, err error) { + if meta == nil { + return headers, nil, nil, nil, nil + } + tmp := make(map[string]string) + for k, v := range meta { + lowerKey := strings.ToLower(k) + switch lowerKey { + case "cache-control": + headers.BlobCacheControl = pString(v) + case "content-disposition": + headers.BlobContentDisposition = pString(v) + case "content-encoding": + headers.BlobContentEncoding = pString(v) + case "content-language": + headers.BlobContentLanguage = pString(v) + case "content-type": + headers.BlobContentType = pString(v) + case "x-ms-tags": + parsed, perr := parseXMsTags(v) + if perr != nil { + return headers, nil, nil, nil, perr + } + // allocate only if there are tags + if len(parsed) > 0 { + tags = parsed + } + case "mtime": + // Accept multiple layouts for tolerance + var parsed time.Time + var pErr error + for _, layout := range []string{time.RFC3339Nano, time.RFC3339, timeFormatOut} { + parsed, pErr = time.Parse(layout, v) + if pErr == nil { + modTime = &parsed + break + } + } + // Log and ignore if unparseable + if modTime == nil && logf != nil { + logf("metadata: couldn't parse mtime %q: %v", v, pErr) + } + case "tier": + // ignore - handled elsewhere + default: + // Filter out other reserved headers so they don't end up as user metadata + if strings.HasPrefix(lowerKey, "x-ms-") { + continue + } + tmp[lowerKey] = v + } + } + userMeta = toAzureMetaPtr(tmp) + return headers, userMeta, tags, modTime, nil +} + +// toAzureMetaPtr converts a map[string]string to map[string]*string as used by Azure SDK +func toAzureMetaPtr(in map[string]string) map[string]*string { + if len(in) == 0 { + return nil + } + out := make(map[string]*string, len(in)) + for k, v := range in { + vv := v + out[k] = &vv + } + return out +} + +// assembleCopyParams prepares headers, metadata and tags for copy operations. +// +// It starts from the source properties, optionally overlays mapped metadata +// from rclone's metadata options, ensures mtime presence when mapping is +// enabled, and returns whether mapping was actually requested (hadMapping). +// assembleCopyParams prepares headers, metadata and tags for copy operations. +// +// If includeBaseMeta is true, start user metadata from the source's metadata +// and overlay mapped values. This matches multipart copy commit behavior. +// If false, only include mapped user metadata (no source baseline) which +// matches previous singlepart StartCopyFromURL semantics. +func assembleCopyParams(ctx context.Context, f *Fs, src fs.Object, srcProps *blob.GetPropertiesResponse, includeBaseMeta bool) (headers blob.HTTPHeaders, meta map[string]*string, tags map[string]string, hadMapping bool, err error) { + // Start from source properties + headers = blob.HTTPHeaders{ + BlobCacheControl: srcProps.CacheControl, + BlobContentDisposition: srcProps.ContentDisposition, + BlobContentEncoding: srcProps.ContentEncoding, + BlobContentLanguage: srcProps.ContentLanguage, + BlobContentMD5: srcProps.ContentMD5, + BlobContentType: srcProps.ContentType, + } + // Optionally deep copy user metadata pointers from source. Normalise keys to + // lower-case to avoid duplicate x-ms-meta headers when we later inject/overlay + // metadata (Azure treats keys case-insensitively but Go's http.Header will + // join duplicate keys into a comma separated list, which breaks shared-key + // signing). + if includeBaseMeta && len(srcProps.Metadata) > 0 { + meta = make(map[string]*string, len(srcProps.Metadata)) + for k, v := range srcProps.Metadata { + if v != nil { + vv := *v + meta[strings.ToLower(k)] = &vv + } + } + } + + // Only consider mapping if metadata pipeline is enabled + if fs.GetConfig(ctx).Metadata { + mapped, mapErr := fs.GetMetadataOptions(ctx, f, src, fs.MetadataAsOpenOptions(ctx)) + if mapErr != nil { + return headers, meta, nil, false, fmt.Errorf("failed to map metadata: %w", mapErr) + } + if mapped != nil { + // Map rclone metadata to Azure shapes + mappedHeaders, userMeta, mappedTags, mappedModTime, herr := mapMetadataToAzure(mapped, func(format string, args ...any) { fs.Debugf(f, format, args...) }) + if herr != nil { + return headers, meta, nil, false, fmt.Errorf("metadata mapping: %w", herr) + } + hadMapping = true + // Overlay headers (only non-nil) + if mappedHeaders.BlobCacheControl != nil { + headers.BlobCacheControl = mappedHeaders.BlobCacheControl + } + if mappedHeaders.BlobContentDisposition != nil { + headers.BlobContentDisposition = mappedHeaders.BlobContentDisposition + } + if mappedHeaders.BlobContentEncoding != nil { + headers.BlobContentEncoding = mappedHeaders.BlobContentEncoding + } + if mappedHeaders.BlobContentLanguage != nil { + headers.BlobContentLanguage = mappedHeaders.BlobContentLanguage + } + if mappedHeaders.BlobContentType != nil { + headers.BlobContentType = mappedHeaders.BlobContentType + } + // Overlay user metadata + if len(userMeta) > 0 { + if meta == nil { + meta = make(map[string]*string, len(userMeta)) + } + for k, v := range userMeta { + meta[k] = v + } + } + // Apply tags if any + if len(mappedTags) > 0 { + tags = mappedTags + } + // Ensure mtime present using mapped or source time + if _, ok := meta[modTimeKey]; !ok { + when := src.ModTime(ctx) + if mappedModTime != nil { + when = *mappedModTime + } + val := when.Format(time.RFC3339Nano) + if meta == nil { + meta = make(map[string]*string, 1) + } + meta[modTimeKey] = &val + } + // Ensure content-type fallback to source if not set by mapper + if headers.BlobContentType == nil { + headers.BlobContentType = srcProps.ContentType + } + } else { + // Mapping enabled but not provided: ensure mtime present based on source ModTime + if _, ok := meta[modTimeKey]; !ok { + when := src.ModTime(ctx) + val := when.Format(time.RFC3339Nano) + if meta == nil { + meta = make(map[string]*string, 1) + } + meta[modTimeKey] = &val + } + } + } + + return headers, meta, tags, hadMapping, nil +} + +// applyMappedMetadata applies mapped metadata and headers to the object state for uploads. +// +// It reads `--metadata`, `--metadata-set`, and `--metadata-mapper` outputs via fs.GetMetadataOptions +// and updates o.meta, o.tags and ui.httpHeaders accordingly. +func (o *Object) applyMappedMetadata(ctx context.Context, src fs.ObjectInfo, ui *uploadInfo, options []fs.OpenOption) (modTime time.Time, err error) { + // Start from the source modtime; may be overridden by metadata + modTime = src.ModTime(ctx) + + // Fetch mapped metadata if --metadata is enabled + meta, err := fs.GetMetadataOptions(ctx, o.fs, src, options) + if err != nil { + return modTime, err + } + if meta == nil { + // No metadata processing requested + return modTime, nil + } + + // Map metadata using common helper + headers, userMeta, tags, mappedModTime, err := mapMetadataToAzure(meta, func(format string, args ...any) { fs.Debugf(o, format, args...) }) + if err != nil { + return modTime, err + } + // Merge headers into ui + if headers.BlobCacheControl != nil { + ui.httpHeaders.BlobCacheControl = headers.BlobCacheControl + } + if headers.BlobContentDisposition != nil { + ui.httpHeaders.BlobContentDisposition = headers.BlobContentDisposition + } + if headers.BlobContentEncoding != nil { + ui.httpHeaders.BlobContentEncoding = headers.BlobContentEncoding + } + if headers.BlobContentLanguage != nil { + ui.httpHeaders.BlobContentLanguage = headers.BlobContentLanguage + } + if headers.BlobContentType != nil { + ui.httpHeaders.BlobContentType = headers.BlobContentType + } + + // Apply user metadata to o.meta with a single critical section + if len(userMeta) > 0 { + metadataMu.Lock() + if o.meta == nil { + o.meta = make(map[string]string, len(userMeta)) + } + for k, v := range userMeta { + if v != nil { + o.meta[k] = *v + } + } + metadataMu.Unlock() + } + + // Apply tags + if len(tags) > 0 { + if o.tags == nil { + o.tags = make(map[string]string, len(tags)) + } + for k, v := range tags { + o.tags[k] = v + } + } + + if mappedModTime != nil { + modTime = *mappedModTime + } + + return modTime, nil +} + // Returns whether file is a directory marker or not func isDirectoryMarker(size int64, metadata map[string]*string, remote string) bool { // Directory markers are 0 length @@ -1951,18 +2281,19 @@ func (f *Fs) copyMultipart(ctx context.Context, remote, dstContainer, dstPath st return nil, err } - // Convert metadata from source object + // Prepare metadata/headers/tags for destination + // For multipart commit, include base metadata from source then overlay mapped + commitHeaders, commitMeta, commitTags, _, err := assembleCopyParams(ctx, f, src, srcProperties, true) + if err != nil { + return nil, fmt.Errorf("multipart copy: %w", err) + } + + // Convert metadata from source or mapper options := blockblob.CommitBlockListOptions{ - Metadata: srcProperties.Metadata, - Tier: parseTier(f.opt.AccessTier), - HTTPHeaders: &blob.HTTPHeaders{ - BlobCacheControl: srcProperties.CacheControl, - BlobContentDisposition: srcProperties.ContentDisposition, - BlobContentEncoding: srcProperties.ContentEncoding, - BlobContentLanguage: srcProperties.ContentLanguage, - BlobContentMD5: srcProperties.ContentMD5, - BlobContentType: srcProperties.ContentType, - }, + Metadata: commitMeta, + Tags: commitTags, + Tier: parseTier(f.opt.AccessTier), + HTTPHeaders: &commitHeaders, } // Finalise the upload session @@ -1993,10 +2324,36 @@ func (f *Fs) copySinglepart(ctx context.Context, remote, dstContainer, dstPath s return nil, fmt.Errorf("single part copy: source auth: %w", err) } - // Start the copy + // Prepare mapped metadata/tags/headers if requested options := blob.StartCopyFromURLOptions{ Tier: parseTier(f.opt.AccessTier), } + var postHeaders *blob.HTTPHeaders + // Read source properties and assemble params; this also handles the case when mapping is disabled + srcProps, err := src.readMetaDataAlways(ctx) + if err != nil { + return nil, fmt.Errorf("single part copy: read source properties: %w", err) + } + // For singlepart copy, do not include base metadata from source in StartCopyFromURL + headers, meta, tags, hadMapping, aerr := assembleCopyParams(ctx, f, src, srcProps, false) + if aerr != nil { + return nil, fmt.Errorf("single part copy: %w", aerr) + } + // Apply tags and post-copy headers only when mapping requested changes + if len(tags) > 0 { + options.BlobTags = make(map[string]string, len(tags)) + for k, v := range tags { + options.BlobTags[k] = v + } + } + if hadMapping { + // Only set metadata explicitly when mapping was requested; otherwise + // let the service copy source metadata (including mtime) automatically. + if len(meta) > 0 { + options.Metadata = meta + } + postHeaders = &headers + } var startCopy blob.StartCopyFromURLResponse err = f.pacer.Call(func() (bool, error) { startCopy, err = dstBlobSVC.StartCopyFromURL(ctx, srcURL, &options) @@ -2026,6 +2383,16 @@ func (f *Fs) copySinglepart(ctx context.Context, remote, dstContainer, dstPath s pollTime = min(2*pollTime, time.Second) } + // If mapper requested header changes, set them post-copy + if postHeaders != nil { + blb := f.getBlobSVC(dstContainer, dstPath) + _, setErr := blb.SetHTTPHeaders(ctx, *postHeaders, nil) + if setErr != nil { + return nil, fmt.Errorf("single part copy: failed to set headers: %w", setErr) + } + } + // Metadata (when requested) is set via StartCopyFromURL options.Metadata + return f.NewObject(ctx, remote) } @@ -2157,6 +2524,35 @@ func (o *Object) getMetadata() (metadata map[string]*string) { return metadata } +// Metadata returns metadata for an object +// +// It returns a combined view of system and user metadata. +func (o *Object) Metadata(ctx context.Context) (fs.Metadata, error) { + // Ensure metadata is loaded + if err := o.readMetaData(ctx); err != nil { + return nil, err + } + + m := fs.Metadata{} + + // System metadata we expose + if !o.modTime.IsZero() { + m["mtime"] = o.modTime.Format(time.RFC3339Nano) + } + if o.accessTier != "" { + m["tier"] = string(o.accessTier) + } + + // Merge user metadata (already lower-cased keys) + metadataMu.Lock() + for k, v := range o.meta { + m[k] = v + } + metadataMu.Unlock() + + return m, nil +} + // decodeMetaDataFromPropertiesResponse sets the metadata from the data passed in // // Sets @@ -2995,17 +3391,19 @@ func (o *Object) prepareUpload(ctx context.Context, src fs.ObjectInfo, options [ // containerPath = containerPath[:len(containerPath)-1] // } - // Update Mod time - o.updateMetadataWithModTime(src.ModTime(ctx)) - if err != nil { - return ui, err - } - - // Create the HTTP headers for the upload + // Start with default content-type based on source ui.httpHeaders = blob.HTTPHeaders{ BlobContentType: pString(fs.MimeType(ctx, src)), } + // Apply mapped metadata/headers/tags if requested + modTime, err := o.applyMappedMetadata(ctx, src, &ui, options) + if err != nil { + return ui, err + } + // Ensure mtime is set in metadata based on possibly overridden modTime + o.updateMetadataWithModTime(modTime) + // Compute the Content-MD5 of the file. As we stream all uploads it // will be set in PutBlockList API call using the 'x-ms-blob-content-md5' header if !o.fs.opt.DisableCheckSum { diff --git a/backend/azureblob/azureblob_internal_test.go b/backend/azureblob/azureblob_internal_test.go index 67669775d..b2fc0f7d5 100644 --- a/backend/azureblob/azureblob_internal_test.go +++ b/backend/azureblob/azureblob_internal_test.go @@ -5,11 +5,16 @@ package azureblob import ( "context" "encoding/base64" + "fmt" + "net/http" "strings" "testing" + "time" + "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blob" "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blockblob" "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fs/object" "github.com/rclone/rclone/fstest" "github.com/rclone/rclone/fstest/fstests" "github.com/rclone/rclone/lib/random" @@ -148,4 +153,417 @@ func (f *Fs) testWriteUncommittedBlocks(t *testing.T) { func (f *Fs) InternalTest(t *testing.T) { t.Run("Features", f.testFeatures) t.Run("WriteUncommittedBlocks", f.testWriteUncommittedBlocks) + t.Run("Metadata", f.testMetadataPaths) +} + +// helper to read blob properties for an object +func getProps(ctx context.Context, t *testing.T, o fs.Object) *blob.GetPropertiesResponse { + ao := o.(*Object) + props, err := ao.readMetaDataAlways(ctx) + require.NoError(t, err) + return props +} + +// helper to assert select headers and user metadata +func assertHeadersAndMetadata(t *testing.T, props *blob.GetPropertiesResponse, want map[string]string, wantUserMeta map[string]string) { + // Headers + get := func(p *string) string { + if p == nil { + return "" + } + return *p + } + if v, ok := want["content-type"]; ok { + assert.Equal(t, v, get(props.ContentType), "content-type") + } + if v, ok := want["cache-control"]; ok { + assert.Equal(t, v, get(props.CacheControl), "cache-control") + } + if v, ok := want["content-disposition"]; ok { + assert.Equal(t, v, get(props.ContentDisposition), "content-disposition") + } + if v, ok := want["content-encoding"]; ok { + assert.Equal(t, v, get(props.ContentEncoding), "content-encoding") + } + if v, ok := want["content-language"]; ok { + assert.Equal(t, v, get(props.ContentLanguage), "content-language") + } + // User metadata (case-insensitive keys from service) + norm := make(map[string]*string, len(props.Metadata)) + for kk, vv := range props.Metadata { + norm[strings.ToLower(kk)] = vv + } + for k, v := range wantUserMeta { + pv, ok := norm[strings.ToLower(k)] + if assert.True(t, ok, fmt.Sprintf("missing user metadata key %q", k)) { + if pv == nil { + assert.Equal(t, v, "", k) + } else { + assert.Equal(t, v, *pv, k) + } + } else { + // Log available keys for diagnostics + keys := make([]string, 0, len(props.Metadata)) + for kk := range props.Metadata { + keys = append(keys, kk) + } + t.Logf("available user metadata keys: %v", keys) + } + } +} + +// helper to read blob tags for an object +func getTagsMap(ctx context.Context, t *testing.T, o fs.Object) map[string]string { + ao := o.(*Object) + blb := ao.getBlobSVC() + resp, err := blb.GetTags(ctx, nil) + require.NoError(t, err) + out := make(map[string]string) + for _, tag := range resp.BlobTagSet { + if tag.Key != nil { + k := *tag.Key + v := "" + if tag.Value != nil { + v = *tag.Value + } + out[k] = v + } + } + return out +} + +// Test metadata across different write paths +func (f *Fs) testMetadataPaths(t *testing.T) { + ctx := context.Background() + if testing.Short() { + t.Skip("skipping in short mode") + } + + // Common expected metadata and headers + baseMeta := fs.Metadata{ + "cache-control": "no-cache", + "content-disposition": "inline", + "content-language": "en-US", + // Note: Don't set content-encoding here to avoid download decoding differences + // We will set a custom user metadata key + "potato": "royal", + // and modtime + "mtime": fstest.Time("2009-05-06T04:05:06.499999999Z").Format(time.RFC3339Nano), + } + + // Singlepart upload + t.Run("PutSinglepart", func(t *testing.T) { + // size less than chunk size + contents := random.String(int(f.opt.ChunkSize / 2)) + item := fstest.NewItem("meta-single.txt", contents, fstest.Time("2001-05-06T04:05:06.499999999Z")) + // override content-type via metadata mapping + meta := fs.Metadata{} + meta.Merge(baseMeta) + meta["content-type"] = "text/plain" + obj := fstests.PutTestContentsMetadata(ctx, t, f, &item, true, contents, true, "text/html", meta) + defer func() { _ = obj.Remove(ctx) }() + + props := getProps(ctx, t, obj) + assertHeadersAndMetadata(t, props, map[string]string{ + "content-type": "text/plain", + "cache-control": "no-cache", + "content-disposition": "inline", + "content-language": "en-US", + }, map[string]string{ + "potato": "royal", + }) + _ = http.StatusOK // keep import for parity but don't inspect RawResponse + }) + + // Multipart upload + t.Run("PutMultipart", func(t *testing.T) { + // size greater than chunk size to force multipart + contents := random.String(int(f.opt.ChunkSize + 1024)) + item := fstest.NewItem("meta-multipart.txt", contents, fstest.Time("2001-05-06T04:05:06.499999999Z")) + meta := fs.Metadata{} + meta.Merge(baseMeta) + meta["content-type"] = "application/json" + obj := fstests.PutTestContentsMetadata(ctx, t, f, &item, true, contents, true, "text/html", meta) + defer func() { _ = obj.Remove(ctx) }() + + props := getProps(ctx, t, obj) + assertHeadersAndMetadata(t, props, map[string]string{ + "content-type": "application/json", + "cache-control": "no-cache", + "content-disposition": "inline", + "content-language": "en-US", + }, map[string]string{ + "potato": "royal", + }) + + // Tags: Singlepart upload + t.Run("PutSinglepartTags", func(t *testing.T) { + contents := random.String(int(f.opt.ChunkSize / 2)) + item := fstest.NewItem("tags-single.txt", contents, fstest.Time("2001-05-06T04:05:06.499999999Z")) + meta := fs.Metadata{ + "x-ms-tags": "env=dev,team=sync", + } + obj := fstests.PutTestContentsMetadata(ctx, t, f, &item, true, contents, true, "text/plain", meta) + defer func() { _ = obj.Remove(ctx) }() + + tags := getTagsMap(ctx, t, obj) + assert.Equal(t, "dev", tags["env"]) + assert.Equal(t, "sync", tags["team"]) + }) + + // Tags: Multipart upload + t.Run("PutMultipartTags", func(t *testing.T) { + contents := random.String(int(f.opt.ChunkSize + 2048)) + item := fstest.NewItem("tags-multipart.txt", contents, fstest.Time("2001-05-06T04:05:06.499999999Z")) + meta := fs.Metadata{ + "x-ms-tags": "project=alpha,release=2025-08", + } + obj := fstests.PutTestContentsMetadata(ctx, t, f, &item, true, contents, true, "application/octet-stream", meta) + defer func() { _ = obj.Remove(ctx) }() + + tags := getTagsMap(ctx, t, obj) + assert.Equal(t, "alpha", tags["project"]) + assert.Equal(t, "2025-08", tags["release"]) + }) + }) + + // Singlepart copy with metadata-set mapping; omit content-type to exercise fallback + t.Run("CopySinglepart", func(t *testing.T) { + // create small source + contents := random.String(int(f.opt.ChunkSize / 2)) + srcItem := fstest.NewItem("meta-copy-single-src.txt", contents, fstest.Time("2001-05-06T04:05:06.499999999Z")) + srcObj := fstests.PutTestContentsMetadata(ctx, t, f, &srcItem, true, contents, true, "text/plain", nil) + defer func() { _ = srcObj.Remove(ctx) }() + + // set mapping via MetadataSet + ctx2, ci := fs.AddConfig(ctx) + ci.Metadata = true + ci.MetadataSet = fs.Metadata{ + "cache-control": "private, max-age=60", + "content-disposition": "attachment; filename=foo.txt", + "content-language": "fr", + // no content-type: should fallback to source + "potato": "maris", + } + + // do copy + dstName := "meta-copy-single-dst.txt" + dst, err := f.Copy(ctx2, srcObj, dstName) + require.NoError(t, err) + defer func() { _ = dst.Remove(ctx2) }() + + props := getProps(ctx2, t, dst) + // content-type should fallback to source (text/plain) + assertHeadersAndMetadata(t, props, map[string]string{ + "content-type": "text/plain", + "cache-control": "private, max-age=60", + "content-disposition": "attachment; filename=foo.txt", + "content-language": "fr", + }, map[string]string{ + "potato": "maris", + }) + // mtime should be populated on copy when --metadata is used + // and should equal the source ModTime (RFC3339Nano) + // Read user metadata (case-insensitive) + m := props.Metadata + var gotMtime string + for k, v := range m { + if strings.EqualFold(k, "mtime") && v != nil { + gotMtime = *v + break + } + } + if assert.NotEmpty(t, gotMtime, "mtime not set on destination metadata") { + // parse and compare times ignoring formatting differences + parsed, err := time.Parse(time.RFC3339Nano, gotMtime) + require.NoError(t, err) + assert.True(t, srcObj.ModTime(ctx2).Equal(parsed), "dst mtime should equal src ModTime") + } + }) + + // CopySinglepart with only --metadata (no MetadataSet) must inject mtime and preserve src content-type + t.Run("CopySinglepart_MetadataOnly", func(t *testing.T) { + contents := random.String(int(f.opt.ChunkSize / 2)) + srcItem := fstest.NewItem("meta-copy-single-only-src.txt", contents, fstest.Time("2001-05-06T04:05:06.499999999Z")) + srcObj := fstests.PutTestContentsMetadata(ctx, t, f, &srcItem, true, contents, true, "text/plain", nil) + defer func() { _ = srcObj.Remove(ctx) }() + + ctx2, ci := fs.AddConfig(ctx) + ci.Metadata = true + + dstName := "meta-copy-single-only-dst.txt" + dst, err := f.Copy(ctx2, srcObj, dstName) + require.NoError(t, err) + defer func() { _ = dst.Remove(ctx2) }() + + props := getProps(ctx2, t, dst) + assertHeadersAndMetadata(t, props, map[string]string{ + "content-type": "text/plain", + }, map[string]string{}) + // Assert mtime injected + m := props.Metadata + var gotMtime string + for k, v := range m { + if strings.EqualFold(k, "mtime") && v != nil { + gotMtime = *v + break + } + } + if assert.NotEmpty(t, gotMtime, "mtime not set on destination metadata") { + parsed, err := time.Parse(time.RFC3339Nano, gotMtime) + require.NoError(t, err) + assert.True(t, srcObj.ModTime(ctx2).Equal(parsed), "dst mtime should equal src ModTime") + } + }) + + // Multipart copy with metadata-set mapping; omit content-type to exercise fallback + t.Run("CopyMultipart", func(t *testing.T) { + // create large source to force multipart + contents := random.String(int(f.opt.CopyCutoff + 1024)) + srcItem := fstest.NewItem("meta-copy-multi-src.txt", contents, fstest.Time("2001-05-06T04:05:06.499999999Z")) + srcObj := fstests.PutTestContentsMetadata(ctx, t, f, &srcItem, true, contents, true, "application/octet-stream", nil) + defer func() { _ = srcObj.Remove(ctx) }() + + // set mapping via MetadataSet + ctx2, ci := fs.AddConfig(ctx) + ci.Metadata = true + ci.MetadataSet = fs.Metadata{ + "cache-control": "max-age=0, no-cache", + // omit content-type to trigger fallback + "content-language": "de", + "potato": "desiree", + } + + dstName := "meta-copy-multi-dst.txt" + dst, err := f.Copy(ctx2, srcObj, dstName) + require.NoError(t, err) + defer func() { _ = dst.Remove(ctx2) }() + + props := getProps(ctx2, t, dst) + // content-type should fallback to source (application/octet-stream) + assertHeadersAndMetadata(t, props, map[string]string{ + "content-type": "application/octet-stream", + "cache-control": "max-age=0, no-cache", + "content-language": "de", + }, map[string]string{ + "potato": "desiree", + }) + // mtime should be populated on copy when --metadata is used + m := props.Metadata + var gotMtime string + for k, v := range m { + if strings.EqualFold(k, "mtime") && v != nil { + gotMtime = *v + break + } + } + if assert.NotEmpty(t, gotMtime, "mtime not set on destination metadata") { + parsed, err := time.Parse(time.RFC3339Nano, gotMtime) + require.NoError(t, err) + assert.True(t, srcObj.ModTime(ctx2).Equal(parsed), "dst mtime should equal src ModTime") + } + }) + + // CopyMultipart with only --metadata must inject mtime and preserve src content-type + t.Run("CopyMultipart_MetadataOnly", func(t *testing.T) { + contents := random.String(int(f.opt.CopyCutoff + 2048)) + srcItem := fstest.NewItem("meta-copy-multi-only-src.txt", contents, fstest.Time("2001-05-06T04:05:06.499999999Z")) + srcObj := fstests.PutTestContentsMetadata(ctx, t, f, &srcItem, true, contents, true, "application/octet-stream", nil) + defer func() { _ = srcObj.Remove(ctx) }() + + ctx2, ci := fs.AddConfig(ctx) + ci.Metadata = true + + dstName := "meta-copy-multi-only-dst.txt" + dst, err := f.Copy(ctx2, srcObj, dstName) + require.NoError(t, err) + defer func() { _ = dst.Remove(ctx2) }() + + props := getProps(ctx2, t, dst) + assertHeadersAndMetadata(t, props, map[string]string{ + "content-type": "application/octet-stream", + }, map[string]string{}) + m := props.Metadata + var gotMtime string + for k, v := range m { + if strings.EqualFold(k, "mtime") && v != nil { + gotMtime = *v + break + } + } + if assert.NotEmpty(t, gotMtime, "mtime not set on destination metadata") { + parsed, err := time.Parse(time.RFC3339Nano, gotMtime) + require.NoError(t, err) + assert.True(t, srcObj.ModTime(ctx2).Equal(parsed), "dst mtime should equal src ModTime") + } + }) + + // Tags: Singlepart copy + t.Run("CopySinglepartTags", func(t *testing.T) { + // create small source + contents := random.String(int(f.opt.ChunkSize / 2)) + srcItem := fstest.NewItem("tags-copy-single-src.txt", contents, fstest.Time("2001-05-06T04:05:06.499999999Z")) + srcObj := fstests.PutTestContentsMetadata(ctx, t, f, &srcItem, true, contents, true, "text/plain", nil) + defer func() { _ = srcObj.Remove(ctx) }() + + // set mapping via MetadataSet including tags + ctx2, ci := fs.AddConfig(ctx) + ci.Metadata = true + ci.MetadataSet = fs.Metadata{ + "x-ms-tags": "copy=single,mode=test", + } + + dstName := "tags-copy-single-dst.txt" + dst, err := f.Copy(ctx2, srcObj, dstName) + require.NoError(t, err) + defer func() { _ = dst.Remove(ctx2) }() + + tags := getTagsMap(ctx2, t, dst) + assert.Equal(t, "single", tags["copy"]) + assert.Equal(t, "test", tags["mode"]) + }) + + // Tags: Multipart copy + t.Run("CopyMultipartTags", func(t *testing.T) { + // create large source to force multipart + contents := random.String(int(f.opt.CopyCutoff + 4096)) + srcItem := fstest.NewItem("tags-copy-multi-src.txt", contents, fstest.Time("2001-05-06T04:05:06.499999999Z")) + srcObj := fstests.PutTestContentsMetadata(ctx, t, f, &srcItem, true, contents, true, "application/octet-stream", nil) + defer func() { _ = srcObj.Remove(ctx) }() + + ctx2, ci := fs.AddConfig(ctx) + ci.Metadata = true + ci.MetadataSet = fs.Metadata{ + "x-ms-tags": "copy=multi,mode=test", + } + + dstName := "tags-copy-multi-dst.txt" + dst, err := f.Copy(ctx2, srcObj, dstName) + require.NoError(t, err) + defer func() { _ = dst.Remove(ctx2) }() + + tags := getTagsMap(ctx2, t, dst) + assert.Equal(t, "multi", tags["copy"]) + assert.Equal(t, "test", tags["mode"]) + }) + + // Negative: invalid x-ms-tags must error + t.Run("InvalidXMsTags", func(t *testing.T) { + contents := random.String(32) + item := fstest.NewItem("tags-invalid.txt", contents, fstest.Time("2001-05-06T04:05:06.499999999Z")) + // construct ObjectInfo with invalid x-ms-tags + buf := strings.NewReader(contents) + // Build obj info with metadata + meta := fs.Metadata{ + "x-ms-tags": "badpair-without-equals", + } + // force metadata on + ctx2, ci := fs.AddConfig(ctx) + ci.Metadata = true + obji := object.NewStaticObjectInfo(item.Path, item.ModTime, int64(len(contents)), true, nil, nil) + obji = obji.WithMetadata(meta).WithMimeType("text/plain") + _, err := f.Put(ctx2, buf, obji) + require.Error(t, err) + assert.Contains(t, err.Error(), "invalid tag") + }) } diff --git a/docs/content/azureblob.md b/docs/content/azureblob.md index 3e4a9fb27..11f9989e7 100644 --- a/docs/content/azureblob.md +++ b/docs/content/azureblob.md @@ -103,6 +103,26 @@ MD5 hashes are stored with blobs. However blobs that were uploaded in chunks only have an MD5 if the source remote was capable of MD5 hashes, e.g. the local disk. +### Metadata and tags + +Rclone can map arbitrary metadata to Azure Blob headers, user metadata, and tags +when `--metadata` is enabled (or when using `--metadata-set` / `--metadata-mapper`). + +- Headers: Set these keys in metadata to map to the corresponding blob headers: + - `cache-control`, `content-disposition`, `content-encoding`, `content-language`, `content-type`. +- User metadata: Any other non-reserved keys are written as user metadata + (keys are normalized to lowercase). Keys starting with `x-ms-` are reserved and + are not stored as user metadata. +- Tags: Provide `x-ms-tags` as a comma-separated list of `key=value` pairs, e.g. + `x-ms-tags=env=dev,team=sync`. These are applied as blob tags on upload and on + server-side copies. Whitespace around keys/values is ignored. +- Modtime override: Provide `mtime` in RFC3339/RFC3339Nano format to override the + stored modtime persisted in user metadata. If `mtime` cannot be parsed, rclone + logs a debug message and ignores the override. + +Notes: +- Rclone ignores reserved `x-ms-*` keys (except `x-ms-tags`) for user metadata. + ### Performance When uploading large files, increasing the value of diff --git a/fstest/fstests/fstests.go b/fstest/fstests/fstests.go index 5e1105cfc..b1651a55f 100644 --- a/fstest/fstests/fstests.go +++ b/fstest/fstests/fstests.go @@ -368,7 +368,7 @@ func Run(t *testing.T, opt *Opt) { } file1Contents string file1MimeType = "text/csv" - file1Metadata = fs.Metadata{"rclone-test": "potato"} + file1Metadata = fs.Metadata{"rclonetest": "potato"} file2 = fstest.Item{ ModTime: fstest.Time("2001-02-03T04:05:10.123123123Z"), Path: `hello? sausage/êé/Hello, 世界/ " ' @ < > & ? + ≠/z.txt`,