1
0
mirror of https://github.com/rclone/rclone.git synced 2025-12-06 00:03:32 +00:00
Files
rclone/cmd/copyurl/copyurl.go
dougal cce399515f copyurl: reworked code, added concurrency and tests
- Added Tests
- Fixed file name handling
- Added concurrent downloads
- Limited downloads to --transfers
- Fixes #8127
2025-09-11 13:56:14 +01:00

179 lines
5.4 KiB
Go

// Package copyurl provides the copyurl command.
package copyurl
import (
"context"
"encoding/csv"
"errors"
"fmt"
"os"
"strings"
"github.com/rclone/rclone/cmd"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/config/flags"
"github.com/rclone/rclone/fs/operations"
"github.com/rclone/rclone/lib/errcount"
"github.com/spf13/cobra"
"golang.org/x/sync/errgroup"
)
var (
autoFilename = false
headerFilename = false
printFilename = false
stdout = false
noClobber = false
urls = false
)
func init() {
cmd.Root.AddCommand(commandDefinition)
cmdFlags := commandDefinition.Flags()
flags.BoolVarP(cmdFlags, &autoFilename, "auto-filename", "a", autoFilename, "Get the file name from the URL and use it for destination file path", "")
flags.BoolVarP(cmdFlags, &headerFilename, "header-filename", "", headerFilename, "Get the file name from the Content-Disposition header", "")
flags.BoolVarP(cmdFlags, &printFilename, "print-filename", "p", printFilename, "Print the resulting name from --auto-filename", "")
flags.BoolVarP(cmdFlags, &noClobber, "no-clobber", "", noClobber, "Prevent overwriting file with same name", "")
flags.BoolVarP(cmdFlags, &stdout, "stdout", "", stdout, "Write the output to stdout rather than a file", "")
flags.BoolVarP(cmdFlags, &urls, "urls", "", stdout, "Use a CSV file of links to process multiple URLs", "")
}
var commandDefinition = &cobra.Command{
Use: "copyurl https://example.com dest:path",
Short: `Copy the contents of the URL supplied content to dest:path.`,
Long: strings.ReplaceAll(`Download a URL's content and copy it to the destination without saving
it in temporary storage.
Setting |--auto-filename| will attempt to automatically determine the
filename from the URL (after any redirections) and used in the
destination path.
With |--header-filename| in addition, if a specific filename is
set in HTTP headers, it will be used instead of the name from the URL.
With |--print-filename| in addition, the resulting file name will be
printed.
Setting |--no-clobber| will prevent overwriting file on the
destination if there is one with the same name.
Setting |--stdout| or making the output file name |-|
will cause the output to be written to standard output.
Setting |--urls| allows you to input a CSV file of URLs in format: URL,
FILENAME. If |--urls| is in use then replace the URL in the arguments with the
file containing the URLs, e.g.:
|||sh
rclone copyurl --urls myurls.csv remote:dir
|||
Missing filenames will be autogenerated equivalent to using |--auto-filename|.
Note that |--stdout| and |--print-filename| are incompatible with |--urls|.
This will do |--transfers| copies in parallel. Note that if |--auto-filename|
is desired for all URLs then a file with only URLs and no filename can be used.
### Troubleshooting
If you can't get |rclone copyurl| to work then here are some things you can try:
- |--disable-http2| rclone will use HTTP2 if available - try disabling it
- |--bind 0.0.0.0| rclone will use IPv6 if available - try disabling it
- |--bind ::0| to disable IPv4
- |--user agent curl| - some sites have whitelists for curl's user-agent - try that
- Make sure the site works with |curl| directly`, "|", "`"),
Annotations: map[string]string{
"versionIntroduced": "v1.43",
"groups": "Important",
},
RunE: func(command *cobra.Command, args []string) (err error) {
cmd.CheckArgs(1, 2, command, args)
cmd.Run(true, true, command, func() error {
if !urls {
return run(args)
}
return runURLS(args)
})
return nil
},
}
var copyURL = operations.CopyURL // for testing
// runURLS processes a .csv file of urls and filenames
func runURLS(args []string) (err error) {
if stdout {
return errors.New("can't use --stdout with --urls")
}
if printFilename {
return errors.New("can't use --print-filename with --urls")
}
dstFs := cmd.NewFsDir(args[1:])
f, err := os.Open(args[0])
if err != nil {
return fmt.Errorf("failed to open .csv file: %w", err)
}
defer fs.CheckClose(f, &err)
reader := csv.NewReader(f)
reader.FieldsPerRecord = -1
urlList, err := reader.ReadAll()
if err != nil {
return fmt.Errorf("failed reading .csv file: %w", err)
}
ec := errcount.New()
g, gCtx := errgroup.WithContext(context.Background())
ci := fs.GetConfig(gCtx)
g.SetLimit(ci.Transfers)
for _, urlEntry := range urlList {
if len(urlEntry) == 0 {
continue
}
g.Go(func() error {
url := urlEntry[0]
var filename string
if len(urlEntry) > 1 {
filename = urlEntry[1]
}
_, err := copyURL(gCtx, dstFs, filename, url, filename == "", headerFilename, noClobber)
if err != nil {
fs.Errorf(filename, "failed to copy URL %q: %v", url, err)
ec.Add(err)
}
return nil
})
}
ec.Add(g.Wait())
return ec.Err("not all URLs copied successfully")
}
// run runs the command for a single URL
func run(args []string) error {
var err error
var dstFileName string
var fsdst fs.Fs
if !stdout {
if len(args) < 2 {
return errors.New("need 2 arguments if not using --stdout")
}
if args[1] == "-" {
stdout = true
} else if autoFilename {
fsdst = cmd.NewFsDir(args[1:])
} else {
fsdst, dstFileName = cmd.NewFsDstFile(args[1:])
}
}
var dst fs.Object
if stdout {
err = operations.CopyURLToWriter(context.Background(), args[0], os.Stdout)
} else {
dst, err = copyURL(context.Background(), fsdst, dstFileName, args[0], autoFilename, headerFilename, noClobber)
if printFilename && err == nil && dst != nil {
fmt.Println(dst.Remote())
}
}
return err
}