1
0
mirror of https://github.com/rclone/rclone.git synced 2026-02-20 19:33:28 +00:00
Files
rclone/cmd/copyurl/copyurl.go

188 lines
5.9 KiB
Go

// Package copyurl provides the copyurl command.
package copyurl
import (
"context"
"encoding/csv"
"errors"
"fmt"
"os"
"strings"
"github.com/rclone/rclone/cmd"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/config/flags"
"github.com/rclone/rclone/fs/operations"
"github.com/rclone/rclone/lib/errcount"
"github.com/spf13/cobra"
"golang.org/x/sync/errgroup"
)
var (
autoFilename = false
headerFilename = false
printFilename = false
stdout = false
noClobber = false
urls = false
)
func init() {
cmd.Root.AddCommand(commandDefinition)
cmdFlags := commandDefinition.Flags()
flags.BoolVarP(cmdFlags, &autoFilename, "auto-filename", "a", autoFilename, "Get the file name from the URL and use it for destination file path", "")
flags.BoolVarP(cmdFlags, &headerFilename, "header-filename", "", headerFilename, "Get the file name from the Content-Disposition header", "")
flags.BoolVarP(cmdFlags, &printFilename, "print-filename", "p", printFilename, "Print the resulting name from --auto-filename", "")
flags.BoolVarP(cmdFlags, &noClobber, "no-clobber", "", noClobber, "Prevent overwriting file with same name", "")
flags.BoolVarP(cmdFlags, &stdout, "stdout", "", stdout, "Write the output to stdout rather than a file", "")
flags.BoolVarP(cmdFlags, &urls, "urls", "", stdout, "Use a CSV file of links to process multiple URLs", "")
}
var commandDefinition = &cobra.Command{
Use: "copyurl https://example.com dest:path",
Short: `Copy the contents of the URL supplied content to dest:path.`,
Long: strings.ReplaceAll(`Download a URL's content and copy it to the destination without saving
it in temporary storage.
Setting |--auto-filename| will attempt to automatically determine the
filename from the URL (after any redirections) and used in the
destination path.
With |--header-filename| in addition, if a specific filename is
set in HTTP headers, it will be used instead of the name from the URL.
With |--print-filename| in addition, the resulting file name will be
printed.
Setting |--no-clobber| will prevent overwriting file on the
destination if there is one with the same name.
Setting |--stdout| or making the output file name |-|
will cause the output to be written to standard output.
Setting |--urls| allows you to input a CSV file of URLs in format: URL,
FILENAME. If |--urls| is in use then replace the URL in the arguments with the
file containing the URLs, e.g.:
|||sh
rclone copyurl --urls myurls.csv remote:dir
|||
Missing filenames will be autogenerated equivalent to using |--auto-filename|.
Note that |--stdout| and |--print-filename| are incompatible with |--urls|.
This will do |--transfers| copies in parallel. Note that if |--auto-filename|
is desired for all URLs then a file with only URLs and no filename can be used.
Each FILENAME in the CSV file can start with a relative path which will be appended
to the destination path provided at the command line. For example, running the command
shown above with the following CSV file will write two files to the destination:
|remote:dir/local/path/bar.json| and |remote:dir/another/local/directory/qux.json|
|||csv
https://example.org/foo/bar.json,local/path/bar.json
https://example.org/qux/baz.json,another/local/directory/qux.json
|||
### Troubleshooting
If you can't get |rclone copyurl| to work then here are some things you can try:
- |--disable-http2| rclone will use HTTP2 if available - try disabling it
- |--bind 0.0.0.0| rclone will use IPv6 if available - try disabling it
- |--bind ::0| to disable IPv4
- |--user agent curl| - some sites have whitelists for curl's user-agent - try that
- Make sure the site works with |curl| directly`, "|", "`"),
Annotations: map[string]string{
"versionIntroduced": "v1.43",
"groups": "Important",
},
RunE: func(command *cobra.Command, args []string) (err error) {
cmd.CheckArgs(1, 2, command, args)
cmd.Run(true, true, command, func() error {
if !urls {
return run(args)
}
return runURLS(args)
})
return nil
},
}
var copyURL = operations.CopyURL // for testing
// runURLS processes a .csv file of urls and filenames
func runURLS(args []string) (err error) {
if stdout {
return errors.New("can't use --stdout with --urls")
}
if printFilename {
return errors.New("can't use --print-filename with --urls")
}
dstFs := cmd.NewFsDir(args[1:])
f, err := os.Open(args[0])
if err != nil {
return fmt.Errorf("failed to open .csv file: %w", err)
}
defer fs.CheckClose(f, &err)
reader := csv.NewReader(f)
reader.FieldsPerRecord = -1
urlList, err := reader.ReadAll()
if err != nil {
return fmt.Errorf("failed reading .csv file: %w", err)
}
ec := errcount.New()
g, gCtx := errgroup.WithContext(context.Background())
ci := fs.GetConfig(gCtx)
g.SetLimit(ci.Transfers)
for _, urlEntry := range urlList {
if len(urlEntry) == 0 {
continue
}
g.Go(func() error {
url := urlEntry[0]
var filename string
if len(urlEntry) > 1 {
filename = urlEntry[1]
}
_, err := copyURL(gCtx, dstFs, filename, url, filename == "", headerFilename, noClobber)
if err != nil {
fs.Errorf(filename, "failed to copy URL %q: %v", url, err)
ec.Add(err)
}
return nil
})
}
ec.Add(g.Wait())
return ec.Err("not all URLs copied successfully")
}
// run runs the command for a single URL
func run(args []string) error {
var err error
var dstFileName string
var fsdst fs.Fs
if !stdout {
if len(args) < 2 {
return errors.New("need 2 arguments if not using --stdout")
}
if args[1] == "-" {
stdout = true
} else if autoFilename {
fsdst = cmd.NewFsDir(args[1:])
} else {
fsdst, dstFileName = cmd.NewFsDstFile(args[1:])
}
}
var dst fs.Object
if stdout {
err = operations.CopyURLToWriter(context.Background(), args[0], os.Stdout)
} else {
dst, err = copyURL(context.Background(), fsdst, dstFileName, args[0], autoFilename, headerFilename, noClobber)
if printFilename && err == nil && dst != nil {
fmt.Println(dst.Remote())
}
}
return err
}