1
0
mirror of https://github.com/rclone/rclone.git synced 2025-12-24 12:13:19 +00:00

Compare commits

..

31 Commits

Author SHA1 Message Date
Nick Craig-Wood
98ad80bee3 union: fix slash behaviour on Windows 2020-03-10 15:59:42 +00:00
Max Sum
7da83346bf union: Implement policy by least number of object 2020-03-09 16:16:30 +00:00
Max Sum
c4545465e7 union: make quota relevant policies resilient to unsupported fields 2020-03-09 16:16:30 +00:00
Max Sum
67b38a457b union: add testings 2020-03-09 16:16:30 +00:00
Max Sum
c9374fbe5a union: fix issues when using space-relavant and path-preserving policies
Path-preserving policy would need to look for the parent dir of operating path. Therefor if the operating path is the
same path as root passed in during NewFs, there would be no room for uplooking. And also About() might have
problem if the folder is no exist. RootFs is added to solve this problem.
2020-03-09 16:16:30 +00:00
Max Sum
0081971ade union: update document 2020-03-09 16:16:30 +00:00
Max Sum
6898a0cccd union: backward compatible to old config 2020-03-09 16:16:30 +00:00
Max Sum
f0c17a72db union: refine implementation 2020-03-09 16:16:30 +00:00
Max Sum
266c200f8c union: fix mkdir when using path-preserving policy 2020-03-09 16:16:30 +00:00
Max Sum
3b4cafddad union: fix code quality issue 2020-03-09 16:16:30 +00:00
Max Sum
d7bb2d1d89 union: Add multiple error handler 2020-03-09 16:16:30 +00:00
Max Sum
540bd61305 union: fix goimports 2020-03-09 16:16:30 +00:00
Max Sum
3cd1b20236 union: add cancel to ctx 2020-03-09 16:16:30 +00:00
Max Sum
998169fc02 union: goimports fix 2020-03-09 16:16:30 +00:00
Max Sum
05666e6e51 union: fix indent 2020-03-09 16:16:30 +00:00
Max Sum
5720501b19 union: move entries to new file 2020-03-09 16:16:30 +00:00
Max Sum
a124ce1fb3 union: fix wrong behavior of NewFs, List and Purge 2020-03-09 16:16:30 +00:00
Max Sum
1b1e156908 union: Add fast path for single upstream upload 2020-03-09 16:16:30 +00:00
Max Sum
cd26142705 union: fix crash when upstream returns error 2020-03-09 16:16:30 +00:00
Max Sum
37e21f767c union: implement new policies
Implement eplfs, eplus, eprand, lfs, lus, newest and rand.
2020-03-09 16:16:30 +00:00
Max Sum
d3807c5a0d union: fix epall and all policy 2020-03-09 16:16:30 +00:00
Max Sum
36e184266f union: fix description and variable names of epff, epmfs, mfs policies 2020-03-09 16:16:30 +00:00
Max Sum
da9a44ea5e union: implement write on multiple remotes
Introduce policy from mergerfs.
2020-03-09 16:16:30 +00:00
Nick Craig-Wood
a492c0fb0e local: speed up multi thread downloads by using sparse files on Windows
Before this change rclone didn't use sparse files on Windows. This
means that when you downloaded a file with multithread download it
wrote the entire file with zeros first on the first write not at the
start of the file.

This change makes the file be sparse on Windows. Linux/macOS files
were already sparse.
2020-03-09 10:55:52 +00:00
Nick Craig-Wood
dfc7215bf9 drive: fix duplicate items when using --drive-shared-with-me #4018
Before this change shared with me items with multiple parents (ie most
of them that aren't in the root) would appear twice in the directory
listings.

This fixes the problem by doing an early exit for shared with me
items.
2020-03-07 16:46:53 +00:00
Nick Craig-Wood
38e59ebdf3 drive: fix missing files when using --fast-list and --drive-shared-with-me
This bug was introduced here by removing some necessary code detecting
shared with me items at the root with no parents.

4453fa4ba6 "drive: fix --fast-list when using appDataFolder"

This fix reverts that part of the patch.

Fixes #4018
2020-03-07 16:46:53 +00:00
Yves G
5ee24f804f webdav: report full and consistent usage with about
— allow either Used or Available to be ==0 (remote full or empty)
— compute Total if both values are received
2020-03-05 15:10:19 +00:00
Nick Craig-Wood
747edf42c1 azureblob: document container level SAS URL from root now needs container
In 8a0775ce3c which was released in v1.49.0 we inadvertently
stopped SAS URLs working from the root without a container name.

Previously to this change you could use `rclone mount azsas:` and it
would actually be equivalent to `rclone mount azsas:container`. After
this change, only `rclone mount azsas:container` will work, `rclone
mount azsas:` will have a directory in the root called "container".

After some discussion it was decided not to revert this change as the
current behaviour is more logical and in line with the similar
behaviour for the b2 backend.

Instead the documentation was updated to show exactly how container
level SAS URLs behave.

Fixes #4028
2020-03-05 14:56:36 +00:00
Nick Craig-Wood
ce23cb2093 Add evileye to contributors 2020-03-05 14:07:32 +00:00
evileye
6ff0bb825e mount: fix fail because of too long volume name - fixes #4026 2020-03-05 13:57:20 +00:00
Lars Lehtonen
fef2c6bf7a backend/s3: replace deprecated session.New() with session.NewSession() 2020-03-05 11:34:10 +00:00
165 changed files with 2806 additions and 22441 deletions

View File

@@ -28,7 +28,6 @@ import (
_ "github.com/rclone/rclone/backend/opendrive"
_ "github.com/rclone/rclone/backend/pcloud"
_ "github.com/rclone/rclone/backend/premiumizeme"
_ "github.com/rclone/rclone/backend/press"
_ "github.com/rclone/rclone/backend/putio"
_ "github.com/rclone/rclone/backend/qingstor"
_ "github.com/rclone/rclone/backend/s3"

View File

@@ -1591,8 +1591,13 @@ func (f *Fs) listRRunner(ctx context.Context, wg *sync.WaitGroup, in <-chan list
listRSlices{dirs, paths}.Sort()
var iErr error
_, err := f.list(ctx, dirs, "", false, false, false, func(item *drive.File) bool {
// shared with me items have no parents when at the root
if f.opt.SharedWithMe && len(item.Parents) == 0 && len(paths) == 1 && paths[0] == "" {
item.Parents = dirs
}
for _, parent := range item.Parents {
var i int
earlyExit := false
// If only one item in paths then no need to search for the ID
// assuming google drive is doing its job properly.
//
@@ -1602,6 +1607,9 @@ func (f *Fs) listRRunner(ctx context.Context, wg *sync.WaitGroup, in <-chan list
// - shared with me items have no parents at the root
// - if using a root alias, eg "root" or "appDataFolder" the ID won't match
i = 0
// items at root can have more than one parent so we need to put
// the item in just once.
earlyExit = true
} else {
// only handle parents that are in the requested dirs list if not at root
i = sort.SearchStrings(dirs, parent)
@@ -1621,6 +1629,11 @@ func (f *Fs) listRRunner(ctx context.Context, wg *sync.WaitGroup, in <-chan list
iErr = err
return true
}
// If didn't check parents then insert only once
if earlyExit {
break
}
}
return false
})

View File

@@ -1068,6 +1068,12 @@ func (f *Fs) OpenWriterAt(ctx context.Context, remote string, size int64) (fs.Wr
if err != nil {
fs.Debugf(o, "Failed to pre-allocate: %v", err)
}
// Set the file to be a sparse file (important on Windows)
err = setSparse(out)
if err != nil {
fs.Debugf(o, "Failed to set sparse: %v", err)
}
return out, nil
}

View File

@@ -8,3 +8,8 @@ import "os"
func preAllocate(size int64, out *os.File) error {
return nil
}
// setSparse makes the file be a sparse file
func setSparse(out *os.File) error {
return nil
}

View File

@@ -44,3 +44,8 @@ again:
// }
return err
}
// setSparse makes the file be a sparse file
func setSparse(out *os.File) error {
return nil
}

View File

@@ -77,3 +77,16 @@ func preAllocate(size int64, out *os.File) error {
return nil
}
const (
FSCTL_SET_SPARSE = 0x000900c4
)
// setSparse makes the file be a sparse file
func setSparse(out *os.File) error {
err := syscall.DeviceIoControl(syscall.Handle(out.Fd()), FSCTL_SET_SPARSE, nil, 0, nil, 0, nil, nil)
if err != nil {
return errors.Wrap(err, "DeviceIoControl FSCTL_SET_SPARSE")
}
return nil
}

View File

@@ -1 +0,0 @@
test

View File

@@ -1,75 +0,0 @@
package press
import (
"bufio"
"io"
"github.com/klauspost/compress/gzip"
)
// AlgGzip represents gzip compression algorithm
type AlgGzip struct {
level int
blockSize uint32
}
// InitializeGzip initializes the gzip compression Algorithm
func InitializeGzip(bs uint32, level int) Algorithm {
a := new(AlgGzip)
a.blockSize = bs
a.level = level
return a
}
// GetFileExtension returns file extension
func (a *AlgGzip) GetFileExtension() string {
return ".gz"
}
// GetHeader returns the Lz4 compression header
func (a *AlgGzip) GetHeader() []byte {
return []byte{}
}
// GetFooter returns
func (a *AlgGzip) GetFooter() []byte {
return []byte{}
}
// CompressBlock that compresses a block using gzip
func (a *AlgGzip) CompressBlock(in []byte, out io.Writer) (compressedSize uint32, uncompressedSize uint64, err error) {
// Initialize buffer
bufw := bufio.NewWriterSize(out, int(a.blockSize+(a.blockSize)>>4))
// Initialize block writer
outw, err := gzip.NewWriterLevel(bufw, a.level)
if err != nil {
return 0, 0, err
}
// Compress block
_, err = outw.Write(in)
if err != nil {
return 0, 0, err
}
// Finalize gzip file, flush buffer and return
err = outw.Close()
if err != nil {
return 0, 0, err
}
blockSize := uint32(bufw.Buffered())
err = bufw.Flush()
return blockSize, uint64(len(in)), err
}
// DecompressBlock decompresses Lz4 compressed block
func (a *AlgGzip) DecompressBlock(in io.Reader, out io.Writer, BlockSize uint32) (n int, err error) {
gzipReader, err := gzip.NewReader(in)
if err != nil {
return 0, err
}
written, err := io.Copy(out, gzipReader)
return int(written), err
}

View File

@@ -1,223 +0,0 @@
package press
// This file implements the LZ4 algorithm.
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"math/bits"
"github.com/buengese/xxh32"
lz4 "github.com/pierrec/lz4"
)
/*
Structure of LZ4 header:
Flags:
Version = 01
Independent = 1
Block Checksum = 1
Content Size = 0
Content Checksum = 0
Reserved = 0
Dictionary ID = 0
BD byte:
Reserved = 0
Block Max Size = 101 (or 5; 256kb)
Reserved = 0000
Header checksum byte (xxhash(flags and bd byte) >> 1) & 0xff
*/
// LZ4Header - Header of our LZ4 file
//var LZ4Header = []byte{0x04, 0x22, 0x4d, 0x18, 0x70, 0x50, 0x84}
// LZ4Footer - Footer of our LZ4 file
var LZ4Footer = []byte{0x00, 0x00, 0x00, 0x00} // This is just an empty block
const (
frameMagic uint32 = 0x184D2204
compressedBlockFlag = 1 << 31
compressedBlockMask = compressedBlockFlag - 1
)
// AlgLz4 is the Lz4 Compression algorithm
type AlgLz4 struct {
Header lz4.Header
buf [19]byte // magic number(4) + header(flags(2)+[Size(8)+DictID(4)]+checksum(1)) does not exceed 19 bytes
}
// InitializeLz4 creates an Lz4 compression algorithm
func InitializeLz4(bs uint32, blockChecksum bool) Algorithm {
a := new(AlgLz4)
a.Header.Reset()
a.Header = lz4.Header{
BlockChecksum: blockChecksum,
BlockMaxSize: int(bs),
}
return a
}
// GetFileExtension returns file extension
func (a *AlgLz4) GetFileExtension() string {
return ".lz4"
}
// GetHeader returns the Lz4 compression header
func (a *AlgLz4) GetHeader() []byte {
// Size is optional.
buf := a.buf[:]
// Set the fixed size data: magic number, block max size and flags.
binary.LittleEndian.PutUint32(buf[0:], frameMagic)
flg := byte(lz4.Version << 6)
flg |= 1 << 5 // No block dependency.
if a.Header.BlockChecksum {
flg |= 1 << 4
}
if a.Header.Size > 0 {
flg |= 1 << 3
}
buf[4] = flg
buf[5] = blockSizeValueToIndex(a.Header.BlockMaxSize) << 4
// Current buffer size: magic(4) + flags(1) + block max size (1).
n := 6
if a.Header.Size > 0 {
binary.LittleEndian.PutUint64(buf[n:], a.Header.Size)
n += 8
}
// The header checksum includes the flags, block max size and optional Size.
buf[n] = byte(xxh32.ChecksumZero(buf[4:n]) >> 8 & 0xFF)
// Header ready, write it out.
return buf[0 : n+1]
}
// GetFooter returns
func (a *AlgLz4) GetFooter() []byte {
return LZ4Footer
}
// CompressBlock that compresses a block using lz4
func (a *AlgLz4) CompressBlock(in []byte, out io.Writer) (compressedSize uint32, uncompressedSize uint64, err error) {
if len(in) > 0 {
n, err := a.compressBlock(in, out)
if err != nil {
return 0, 0, err
}
return n, uint64(len(in)), nil
}
return 0, 0, nil
}
// compressBlock compresses a block.
func (a *AlgLz4) compressBlock(data []byte, dst io.Writer) (uint32, error) {
zdata := make([]byte, a.Header.BlockMaxSize) // The compressed block size cannot exceed the input's.
var zn int
if level := a.Header.CompressionLevel; level != 0 {
zn, _ = lz4.CompressBlockHC(data, zdata, level)
} else {
var hashTable [1 << 16]int
zn, _ = lz4.CompressBlock(data, zdata, hashTable[:])
}
var bLen uint32
if zn > 0 && zn < len(data) {
// Compressible and compressed size smaller than uncompressed: ok!
bLen = uint32(zn)
zdata = zdata[:zn]
} else {
// Uncompressed block.
bLen = uint32(len(data)) | compressedBlockFlag
zdata = data
}
// Write the block.
if err := a.writeUint32(bLen, dst); err != nil {
return 0, err
}
_, err := dst.Write(zdata)
if err != nil {
return 0, err
}
if !a.Header.BlockChecksum {
return bLen, nil
}
checksum := xxh32.ChecksumZero(zdata)
if err := a.writeUint32(checksum, dst); err != nil {
return 0, err
}
return bLen, nil
}
// writeUint32 writes a uint32 to the underlying writer.
func (a *AlgLz4) writeUint32(x uint32, dst io.Writer) error {
buf := make([]byte, 4)
binary.LittleEndian.PutUint32(buf, x)
_, err := dst.Write(buf)
return err
}
func blockSizeValueToIndex(size int) byte {
return 4 + byte(bits.TrailingZeros(uint(size)>>16)/2)
}
// DecompressBlock decompresses Lz4 compressed block
func (a *AlgLz4) DecompressBlock(in io.Reader, out io.Writer, BlockSize uint32) (n int, err error) {
// Get our compressed data
var b bytes.Buffer
_, err = io.Copy(&b, in)
if err != nil {
return 0, err
}
zdata := b.Bytes()
bLen := binary.LittleEndian.Uint32(zdata[:4])
if bLen&compressedBlockFlag > 0 {
// Uncompressed block.
bLen &= compressedBlockMask
if bLen > BlockSize {
return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
}
data := zdata[4 : bLen+4]
if a.Header.BlockChecksum {
checksum := binary.LittleEndian.Uint32(zdata[4+bLen:])
if h := xxh32.ChecksumZero(data); h != checksum {
return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum)
}
}
_, err := out.Write(data)
return len(data), err
}
// compressed block
if bLen > BlockSize {
return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
}
if a.Header.BlockChecksum {
checksum := binary.LittleEndian.Uint32(zdata[4+bLen:])
if h := xxh32.ChecksumZero(zdata[4 : bLen+4]); h != checksum {
return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum)
}
}
data := make([]byte, BlockSize)
n, err = lz4.UncompressBlock(zdata[4:bLen+4], data)
if err != nil {
return 0, err
}
_, err = out.Write(data[:n])
return n, err
}

View File

@@ -1,75 +0,0 @@
package press
import (
"bufio"
"io"
"github.com/ulikunitz/xz"
)
// AlgXZ represents the XZ compression algorithm
type AlgXZ struct {
blockSize uint32
config xz.WriterConfig
}
// InitializeXZ creates an Lz4 compression algorithm
func InitializeXZ(bs uint32) Algorithm {
a := new(AlgXZ)
a.blockSize = bs
a.config = xz.WriterConfig{}
return a
}
// GetFileExtension returns file extension
func (a *AlgXZ) GetFileExtension() string {
return ".xz"
}
// GetHeader returns the Lz4 compression header
func (a *AlgXZ) GetHeader() []byte {
return []byte{}
}
// GetFooter returns
func (a *AlgXZ) GetFooter() []byte {
return []byte{}
}
// CompressBlock that compresses a block using lz4
func (a *AlgXZ) CompressBlock(in []byte, out io.Writer) (compressedSize uint32, uncompressedSize uint64, err error) {
// Initialize buffer
bufw := bufio.NewWriterSize(out, int(a.blockSize+(a.blockSize)>>4))
// Initialize block writer
outw, err := a.config.NewWriter(bufw)
if err != nil {
return 0, 0, err
}
// Compress block
_, err = outw.Write(in)
if err != nil {
return 0, 0, err
}
// Finalize gzip file, flush buffer and return
err = outw.Close()
if err != nil {
return 0, 0, err
}
blockSize := uint32(bufw.Buffered())
err = bufw.Flush()
return blockSize, uint64(len(in)), err
}
// DecompressBlock decompresses Lz4 compressed block
func (a *AlgXZ) DecompressBlock(in io.Reader, out io.Writer, BlockSize uint32) (n int, err error) {
xzReader, err := xz.NewReader(in)
if err != nil {
return 0, err
}
written, err := io.Copy(out, xzReader)
return int(written), err
}

View File

@@ -1,526 +0,0 @@
// Package press provides wrappers for Fs and Object which implement compression.
// This file is the backend implementation for seekable compression.
package press
import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"io/ioutil"
"log"
)
// Compression modes
const (
Uncompressed = -1
LZ4 = 2
Gzip = 4
XZ = 8
)
// Errors
var (
ErrMetadataCorrupted = errors.New("metadata may have been corrupted")
)
// DEBUG - flag for debug mode
const DEBUG = false
// Compression is a struct containing configurable variables (what used to be constants)
type Compression struct {
CompressionMode int // Compression mode
Algorithm Algorithm
BlockSize uint32 // Size of blocks. Higher block size means better compression but more download bandwidth needed for small downloads
// ~1MB is recommended for xz, while ~128KB is recommended for gzip and lz4
HeuristicBytes int64 // Bytes to perform gzip heuristic on to determine whether a file should be compressed
NumThreads int // Number of threads to use for compression
MaxCompressionRatio float64 // Maximum compression ratio for a file to be considered compressible
BinPath string // Path to compression binary. This is used for all non-gzip compression.
}
// Algorithm is the main compression Algorithm Interface
type Algorithm interface {
GetHeader() []byte
GetFileExtension() string
CompressBlock(in []byte, out io.Writer) (compressedSize uint32, uncompressedSize uint64, err error)
DecompressBlock(in io.Reader, out io.Writer, BlockSize uint32) (n int, err error)
GetFooter() []byte
}
// NewCompressionPreset creates a Compression object with a preset mode/bs
func NewCompressionPreset(preset string) (*Compression, error) {
switch preset {
case "lz4":
alg := InitializeLz4(262144, true)
return NewCompression(LZ4, alg, 262144) // LZ4 compression (very fast)
case "gzip":
alg := InitializeGzip(131072, 6)
return NewCompression(Gzip, alg, 131070) // GZIP-default compression (medium)*/
case "xz":
alg := InitializeXZ(1048576)
return NewCompression(XZ, alg, 1048576) // XZ compression (strong compression)*/
}
return nil, errors.New("Compression mode doesn't exist")
}
// NewCompressionPresetNumber creates a Compression object with a preset mode/bs
func NewCompressionPresetNumber(preset int) (*Compression, error) {
switch preset {
case LZ4:
alg := InitializeLz4(262144, true)
return NewCompression(LZ4, alg, 262144) // LZ4 compression (very fast)
case Gzip:
alg := InitializeGzip(131072, 6)
return NewCompression(Gzip, alg, 131070) // GZIP-default compression (medium)*/
case XZ:
alg := InitializeXZ(1048576)
return NewCompression(XZ, alg, 1048576) // XZ compression (strong compression)*/
}
return nil, errors.New("Compression mode doesn't exist")
}
// NewCompression creates a Compression object with some default configuration values
func NewCompression(mode int, alg Algorithm, bs uint32) (*Compression, error) {
return NewCompressionAdvanced(mode, alg, bs, 1048576, 12, 0.9)
}
// NewCompressionAdvanced creates a Compression object
func NewCompressionAdvanced(mode int, alg Algorithm, bs uint32, hb int64, threads int, mcr float64) (c *Compression, err error) {
// Set vars
c = new(Compression)
c.Algorithm = alg
c.CompressionMode = mode
c.BlockSize = bs
c.HeuristicBytes = hb
c.NumThreads = threads
c.MaxCompressionRatio = mcr
return c, err
}
/*** UTILITY FUNCTIONS ***/
// GetFileExtension gets a file extension for current compression mode
func (c *Compression) GetFileExtension() string {
return c.Algorithm.GetFileExtension()
}
// GetFileCompressionInfo gets a file extension along with compressibility of file
func (c *Compression) GetFileCompressionInfo(reader io.Reader) (compressable bool, extension string, err error) {
// Use our compression algorithm to do a heuristic on the first few bytes
var emulatedBlock, emulatedBlockCompressed bytes.Buffer
_, err = io.CopyN(&emulatedBlock, reader, c.HeuristicBytes)
if err != nil && err != io.EOF {
return false, "", err
}
compressedSize, uncompressedSize, err := c.Algorithm.CompressBlock(emulatedBlock.Bytes(), &emulatedBlockCompressed)
if err != nil {
return false, "", err
}
compressionRatio := float64(compressedSize) / float64(uncompressedSize)
// If the data is not compressible, return so
if compressionRatio > c.MaxCompressionRatio {
return false, ".bin", nil
}
// If the file is compressible, select file extension based on compression mode
return true, c.Algorithm.GetFileExtension(), nil
}
/*** MAIN COMPRESSION INTERFACE ***/
// compressionResult represents the result of compression for a single block (gotten by a single thread)
type compressionResult struct {
buffer *bytes.Buffer
n uint64
err error
}
// CompressFileReturningBlockData compresses a file returning the block data for that file.
func (c *Compression) CompressFileReturningBlockData(in io.Reader, out io.Writer) (blockData []uint32, err error) {
// Initialize buffered writer
bufw := bufio.NewWriterSize(out, int((c.BlockSize+(c.BlockSize)>>4)*uint32(c.NumThreads)))
// Get blockData, copy over header, add length of header to blockData
blockData = make([]uint32, 0)
header := c.Algorithm.GetHeader()
_, err = bufw.Write(header)
if err != nil {
return nil, err
}
blockData = append(blockData, uint32(len(header)))
// Compress blocks
for {
// Loop through threads, spawning a go procedure for each thread. If we get eof on one thread, set eofAt to that thread and break
compressionResults := make([]chan compressionResult, c.NumThreads)
eofAt := -1
for i := 0; i < c.NumThreads; i++ {
// Create thread channel and allocate buffer to pass to thread
compressionResults[i] = make(chan compressionResult)
var inputBuffer bytes.Buffer
_, err = io.CopyN(&inputBuffer, in, int64(c.BlockSize))
if err == io.EOF {
eofAt = i
} else if err != nil {
return nil, err
}
// Run thread
go func(i int, in []byte) {
// Initialize thread writer and result struct
var res compressionResult
var buffer bytes.Buffer
// Compress block
_, n, err := c.Algorithm.CompressBlock(in, &buffer)
if err != nil && err != io.EOF { // This errored out.
res.buffer = nil
res.n = 0
res.err = err
compressionResults[i] <- res
return
}
// Pass our data back to the main thread as a compression result
res.buffer = &buffer
res.n = n
res.err = err
compressionResults[i] <- res
}(i, inputBuffer.Bytes())
// If we have reached eof, we don't need more threads
if eofAt != -1 {
break
}
}
// Process writers in order
for i := 0; i < c.NumThreads; i++ {
if compressionResults[i] != nil {
// Get current compression result, get buffer, and copy buffer over to output
res := <-compressionResults[i]
close(compressionResults[i])
if res.buffer == nil {
return nil, res.err
}
blockSize := uint32(res.buffer.Len())
_, err = io.Copy(bufw, res.buffer)
if err != nil {
return nil, err
}
if DEBUG {
fmt.Printf("%d %d\n", res.n, blockSize)
}
// Append block size to block data
blockData = append(blockData, blockSize)
// If this is the last block, add the raw size of the last block to the end of blockData and break
if eofAt == i {
if DEBUG {
log.Printf("%d %d %d\n", res.n, byte(res.n%256), byte(res.n/256))
}
blockData = append(blockData, uint32(res.n))
break
}
}
}
// Get number of bytes written in this block (they should all be in the bufio buffer), then close gzip and flush buffer
err = bufw.Flush()
if err != nil {
return nil, err
}
// If eof happened, break
if eofAt != -1 {
if DEBUG {
log.Printf("%d", eofAt)
log.Printf("%v", blockData)
}
break
}
}
// Write footer and flush
footer := c.Algorithm.GetFooter()
_, err = bufw.Write(footer)
if err != nil {
return nil, err
}
err = bufw.Flush()
// Return
return blockData, err
}
/*** BLOCK DECOMPRESSION FUNCTIONS ***/
// Wrapper function for decompressBlock that implements multithreading
// decompressionResult represents the result of decompressing a block
type decompressionResult struct {
err error
buffer *bytes.Buffer
}
func (d *Decompressor) decompressBlockRangeMultithreaded(in io.Reader, out io.Writer, startingBlock uint32) (n int, err error) {
// First, use bufio.Reader to reduce the number of reads and bufio.Writer to reduce the number of writes
bufin := bufio.NewReader(in)
bufout := bufio.NewWriter(out)
// Decompress each block individually.
currBatch := startingBlock // Block # of start of current batch of blocks
totalBytesCopied := 0
for {
// Loop through threads
eofAt := -1
decompressionResults := make([]chan decompressionResult, d.c.NumThreads)
for i := 0; i < d.c.NumThreads; i++ {
// Get currBlock
currBlock := currBatch + uint32(i)
// Create channel
decompressionResults[i] = make(chan decompressionResult)
// Check if we've reached EOF
if currBlock >= d.numBlocks {
eofAt = i
break
}
// Get block to decompress
var compressedBlock bytes.Buffer
var err error
n, err := io.CopyN(&compressedBlock, bufin, d.blockStarts[currBlock+1]-d.blockStarts[currBlock])
if err != nil || n == 0 { // End of stream
eofAt = i
break
}
// Spawn thread to decompress block
if DEBUG {
log.Printf("Spawning %d", i)
}
go func(i int, currBlock uint32, in io.Reader) {
var block bytes.Buffer
var res decompressionResult
// Decompress block
_, res.err = d.c.Algorithm.DecompressBlock(in, &block, d.c.BlockSize)
res.buffer = &block
decompressionResults[i] <- res
}(i, currBlock, &compressedBlock)
}
if DEBUG {
log.Printf("Eof at %d", eofAt)
}
// Process results
for i := 0; i < d.c.NumThreads; i++ {
// If we got EOF, return
if eofAt == i {
return totalBytesCopied, bufout.Flush() // Flushing bufout is needed to prevent us from getting all nulls
}
// Get result and close
res := <-decompressionResults[i]
close(decompressionResults[i])
if res.err != nil {
return totalBytesCopied, res.err
}
// Copy to output and add to total bytes copied
n, err := io.Copy(bufout, res.buffer)
totalBytesCopied += int(n)
if err != nil {
return totalBytesCopied, err
}
}
// Add NumThreads to currBatch
currBatch += uint32(d.c.NumThreads)
}
}
/*** MAIN DECOMPRESSION INTERFACE ***/
// Decompressor is the ReadSeeker implementation for decompression
type Decompressor struct {
cursorPos *int64 // The current location we have seeked to
blockStarts []int64 // The start of each block. These will be recovered from the block sizes
numBlocks uint32 // Number of blocks
decompressedSize int64 // Decompressed size of the file.
in io.ReadSeeker // Input
c *Compression // Compression options
}
// Parses block data. Returns the number of blocks, the block start locations for each block, and the decompressed size of the entire file.
func parseBlockData(blockData []uint32, BlockSize uint32) (numBlocks uint32, blockStarts []int64, decompressedSize int64) {
// Parse the block data
blockDataLen := len(blockData)
numBlocks = uint32(blockDataLen - 1)
if DEBUG {
log.Printf("%v\n", blockData)
log.Printf("metadata len, numblocks = %d, %d", blockDataLen, numBlocks)
}
blockStarts = make([]int64, numBlocks+1) // Starts with start of first block (and end of header), ends with end of last block
currentBlockPosition := int64(0)
for i := uint32(0); i < numBlocks; i++ { // Loop through block data, getting starts of blocks.
currentBlockSize := blockData[i]
currentBlockPosition += int64(currentBlockSize)
blockStarts[i] = currentBlockPosition
}
blockStarts[numBlocks] = currentBlockPosition // End of last block
//log.Printf("Block Starts: %v\n", d.blockStarts)
numBlocks-- // Subtract 1 from number of blocks because our header technically isn't a block
// Get uncompressed size of last block and derive uncompressed size of file
lastBlockRawSize := blockData[blockDataLen-1]
decompressedSize = int64(numBlocks-1)*int64(BlockSize) + int64(lastBlockRawSize)
if DEBUG {
log.Printf("Decompressed size = %d", decompressedSize)
}
return numBlocks, blockStarts, decompressedSize
}
// Initializes decompressor with the block data specified.
func (d *Decompressor) initWithBlockData(c *Compression, in io.ReadSeeker, size int64, blockData []uint32) (err error) {
// Copy over compression object
d.c = c
// Initialize cursor position
d.cursorPos = new(int64)
// Parse the block data
d.numBlocks, d.blockStarts, d.decompressedSize = parseBlockData(blockData, d.c.BlockSize)
// Initialize cursor position value and copy over reader
*d.cursorPos = 0
_, err = in.Seek(0, io.SeekStart)
d.in = in
return err
}
// Read reads data using a decompressor
func (d Decompressor) Read(p []byte) (int, error) {
if DEBUG {
log.Printf("Cursor pos before: %d\n", *d.cursorPos)
}
// Check if we're at the end of the file or before the beginning of the file
if *d.cursorPos >= d.decompressedSize || *d.cursorPos < 0 {
if DEBUG {
log.Println("Out of bounds EOF")
}
return 0, io.EOF
}
// Get block range to read
blockNumber := *d.cursorPos / int64(d.c.BlockSize)
blockStart := d.blockStarts[blockNumber] // Start position of blocks to read
dataOffset := *d.cursorPos % int64(d.c.BlockSize) // Offset of data to read in blocks to read
bytesToRead := len(p) // Number of bytes to read
blocksToRead := (int64(bytesToRead)+dataOffset)/int64(d.c.BlockSize) + 1 // Number of blocks to read
returnEOF := false
if blockNumber+blocksToRead > int64(d.numBlocks) { // Overflowed the last block
blocksToRead = int64(d.numBlocks) - blockNumber
returnEOF = true
}
blockEnd := d.blockStarts[blockNumber+blocksToRead] // Start of the block after the last block we want to get is the end of the last block we want to get
blockLen := blockEnd - blockStart
// Read compressed block range into buffer
var compressedBlocks bytes.Buffer
_, err := d.in.Seek(blockStart, io.SeekStart)
if err != nil {
return 0, err
}
n1, err := io.CopyN(&compressedBlocks, d.in, blockLen)
if DEBUG {
log.Printf("block # = %d @ %d <- %d, len %d, copied %d bytes", blockNumber, blockStart, *d.cursorPos, blockLen, n1)
}
if err != nil {
if DEBUG {
log.Println("Copy Error")
}
return 0, err
}
// Decompress block range
var b bytes.Buffer
n, err := d.decompressBlockRangeMultithreaded(&compressedBlocks, &b, uint32(blockNumber))
if err != nil {
log.Println("Decompression error")
return n, err
}
// Calculate bytes read
readOverflow := *d.cursorPos + int64(bytesToRead) - d.decompressedSize
if readOverflow < 0 {
readOverflow = 0
}
bytesRead := int64(bytesToRead) - readOverflow
if DEBUG {
log.Printf("Read offset = %d, overflow = %d", dataOffset, readOverflow)
log.Printf("Decompressed %d bytes; read %d out of %d bytes\n", n, bytesRead, bytesToRead)
// log.Printf("%v", b.Bytes())
}
// If we read 0 bytes, we reached the end of the file
if bytesRead == 0 {
log.Println("EOF")
return 0, io.EOF
}
// Copy from buffer+offset to p
_, err = io.CopyN(ioutil.Discard, &b, dataOffset)
if err != nil {
return 0, err
}
n, err = b.Read(p) // Note: everything after bytesToRead bytes will be discarded; we are returning bytesToRead instead of n
if err != nil {
return n, err
}
// Increment cursor position and return
*d.cursorPos += bytesRead
if returnEOF {
if DEBUG {
log.Println("EOF")
}
return int(bytesRead), io.EOF
}
return int(bytesRead), nil
}
// Seek seeks to a location in compressed stream
func (d Decompressor) Seek(offset int64, whence int) (int64, error) {
// Seek to offset in cursorPos
if whence == io.SeekStart {
*d.cursorPos = offset
} else if whence == io.SeekCurrent {
*d.cursorPos += offset
} else if whence == io.SeekEnd {
*d.cursorPos = d.decompressedSize + offset
}
// Return
return offset, nil
}
// DecompressFileExtData decompresses a file using external block data. Argument "size" is very useful here.
func (c *Compression) DecompressFileExtData(in io.ReadSeeker, size int64, blockData []uint32) (FileHandle io.ReadSeeker, decompressedSize int64, err error) {
var decompressor Decompressor
err = decompressor.initWithBlockData(c, in, size, blockData)
return decompressor, decompressor.decompressedSize, err
}

View File

@@ -1,131 +0,0 @@
package press
import (
"bufio"
"bytes"
"crypto/md5"
"encoding/base64"
"io"
"io/ioutil"
"math/rand"
"os"
"strings"
"testing"
)
const TestStringSmall = "The quick brown fox jumps over the lazy dog."
const TestSizeLarge = 2097152 // 2 megabytes
// Tests compression and decompression for a preset
func testCompressDecompress(t *testing.T, preset string, testString string) {
// Create compression instance
comp, err := NewCompressionPreset(preset)
if err != nil {
t.Fatal(err)
}
// Open files and hashers
testFile := strings.NewReader(testString)
testFileHasher := md5.New()
if err != nil {
t.Fatal(err)
}
compressedFile, err := ioutil.TempFile(os.TempDir(), "rclone_compression_test")
if err != nil {
t.Fatal(err)
}
outHasher := md5.New()
// Compress file and hash it (size doesn't matter here)
testFileReader, testFileWriter := io.Pipe()
go func() {
_, err := io.Copy(io.MultiWriter(testFileHasher, testFileWriter), testFile)
if err != nil {
t.Fatal("Failed to write compressed file")
}
err = testFileWriter.Close()
if err != nil {
t.Log("Failed to close compressed file")
}
}()
var blockData []uint32
blockData, err = comp.CompressFileReturningBlockData(testFileReader, compressedFile)
if err != nil {
t.Fatalf("Compression failed with error: %v", err)
}
testFileHash := testFileHasher.Sum(nil)
// Get the size, seek to the beginning of the compressed file
size, err := compressedFile.Seek(0, io.SeekEnd)
if err != nil {
t.Fatal(err)
}
_, err = compressedFile.Seek(0, io.SeekStart)
if err != nil {
t.Fatal(err)
}
t.Logf("Compressed size: %d\n", size)
// Decompress file into a hasher
var FileHandle io.ReadSeeker
var decompressedSize int64
FileHandle, decompressedSize, err = comp.DecompressFileExtData(compressedFile, size, blockData)
if err != nil {
t.Fatal(err)
}
t.Logf("Decompressed size: %d\n", decompressedSize)
bufr := bufio.NewReaderSize(FileHandle, 12345678)
_, err = io.Copy(outHasher, bufr)
if err != nil && err != io.EOF {
t.Fatal(err)
}
decompressedFileHash := outHasher.Sum(nil)
// Clean up
err = compressedFile.Close()
if err != nil {
t.Log("Warning: cannot close compressed test file")
}
err = os.Remove(compressedFile.Name())
if err != nil {
t.Log("Warning: cannot remove compressed test file")
}
// Compare hashes
if !bytes.Equal(testFileHash, decompressedFileHash) {
t.Logf("Hash of original file: %x\n", testFileHash)
t.Logf("Hash of recovered file: %x\n", decompressedFileHash)
t.Fatal("Hashes do not match!")
}
}
// Tests both small and large strings for a preset
func testSmallLarge(t *testing.T, preset string) {
testStringLarge := getCompressibleString(TestSizeLarge)
t.Run("TestSmall", func(t *testing.T) {
testCompressDecompress(t, preset, TestStringSmall)
})
t.Run("TestLarge", func(t *testing.T) {
testCompressDecompress(t, preset, testStringLarge)
})
}
// Gets a compressible string
func getCompressibleString(size int) string {
// Get pseudorandom bytes
prbytes := make([]byte, size*3/4+16)
prsource := rand.New(rand.NewSource(0))
prsource.Read(prbytes)
// Encode in base64
encoding := base64.NewEncoding("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/")
return encoding.EncodeToString(prbytes)[:size]
}
func TestCompression(t *testing.T) {
testCases := []string{"lz4", "gzip", "xz"}
for _, tc := range testCases {
t.Run(tc, func(t *testing.T) {
testSmallLarge(t, tc)
})
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,129 +0,0 @@
// Test Crypt filesystem interface
package press
import (
"os"
"path/filepath"
"testing"
_ "github.com/rclone/rclone/backend/local"
"github.com/rclone/rclone/fstest"
"github.com/rclone/rclone/fstest/fstests"
)
// TestIntegration runs integration tests against the remote
func TestIntegration(t *testing.T) {
if *fstest.RemoteName == "" {
t.Skip("Skipping as -remote not set")
}
fstests.Run(t, &fstests.Opt{
RemoteName: *fstest.RemoteName,
NilObject: (*Object)(nil),
UnimplementableFsMethods: []string{
"OpenWriterAt",
"MergeDirs",
"DirCacheFlush",
"PutUnchecked",
"PutStream",
"UserInfo",
"Disconnect",
},
UnimplementableObjectMethods: []string{
"GetTier",
"SetTier",
},
})
}
// TestRemoteLz4 tests LZ4 compression
func TestRemoteLz4(t *testing.T) {
if *fstest.RemoteName != "" {
t.Skip("Skipping as -remote set")
}
tempdir := filepath.Join(os.TempDir(), "rclone-press-test-lz4")
name := "TestPressLz4"
fstests.Run(t, &fstests.Opt{
RemoteName: name + ":",
NilObject: (*Object)(nil),
UnimplementableFsMethods: []string{
"OpenWriterAt",
"MergeDirs",
"DirCacheFlush",
"PutUnchecked",
"PutStream",
"UserInfo",
"Disconnect",
},
UnimplementableObjectMethods: []string{
"GetTier",
"SetTier",
},
ExtraConfig: []fstests.ExtraConfigItem{
{Name: name, Key: "type", Value: "press"},
{Name: name, Key: "remote", Value: tempdir},
{Name: name, Key: "compression_mode", Value: "lz4"},
},
})
}
// TestRemoteGzip tests GZIP compression
func TestRemoteGzip(t *testing.T) {
if *fstest.RemoteName != "" {
t.Skip("Skipping as -remote set")
}
tempdir := filepath.Join(os.TempDir(), "rclone-press-test-gzip")
name := "TestPressGzip"
fstests.Run(t, &fstests.Opt{
RemoteName: name + ":",
NilObject: (*Object)(nil),
UnimplementableFsMethods: []string{
"OpenWriterAt",
"MergeDirs",
"DirCacheFlush",
"PutUnchecked",
"PutStream",
"UserInfo",
"Disconnect",
},
UnimplementableObjectMethods: []string{
"GetTier",
"SetTier",
},
ExtraConfig: []fstests.ExtraConfigItem{
{Name: name, Key: "type", Value: "press"},
{Name: name, Key: "remote", Value: tempdir},
{Name: name, Key: "compression_mode", Value: "gzip"},
},
})
}
// TestRemoteXz tests XZ compression
func TestRemoteXz(t *testing.T) {
if *fstest.RemoteName != "" {
t.Skip("Skipping as -remote set")
}
tempdir := filepath.Join(os.TempDir(), "rclone-press-test-xz")
name := "TestPressXz"
fstests.Run(t, &fstests.Opt{
RemoteName: name + ":",
NilObject: (*Object)(nil),
UnimplementableFsMethods: []string{
"OpenWriterAt",
"MergeDirs",
"DirCacheFlush",
"PutUnchecked",
"PutStream",
"UserInfo",
"Disconnect",
},
UnimplementableObjectMethods: []string{
"GetTier",
"SetTier",
},
ExtraConfig: []fstests.ExtraConfigItem{
{Name: name, Key: "type", Value: "press"},
{Name: name, Key: "remote", Value: tempdir},
{Name: name, Key: "compression_mode", Value: "xz"},
},
})
}

View File

@@ -1039,6 +1039,12 @@ func s3Connection(opt *Options) (*s3.S3, *session.Session, error) {
def := defaults.Get()
def.Config.HTTPClient = lowTimeoutClient
// start a new AWS session
awsSession, err := session.NewSession()
if err != nil {
return nil, nil, errors.Wrap(err, "NewSession")
}
// first provider to supply a credential set "wins"
providers := []credentials.Provider{
// use static credentials if they're present (checked by provider)
@@ -1058,7 +1064,7 @@ func s3Connection(opt *Options) (*s3.S3, *session.Session, error) {
// Pick up IAM role in case we're on EC2
&ec2rolecreds.EC2RoleProvider{
Client: ec2metadata.New(session.New(), &aws.Config{
Client: ec2metadata.New(awsSession, &aws.Config{
HTTPClient: lowTimeoutClient,
}),
ExpiryWindow: 3 * time.Minute,

167
backend/union/entry.go Normal file
View File

@@ -0,0 +1,167 @@
package union
import (
"bufio"
"context"
"io"
"sync"
"time"
"github.com/pkg/errors"
"github.com/rclone/rclone/backend/union/upstream"
"github.com/rclone/rclone/fs"
)
// Object describes a union Object
//
// This is a wrapped object which returns the Union Fs as its parent
type Object struct {
*upstream.Object
fs *Fs // what this object is part of
co []upstream.Entry
}
// Directory describes a union Directory
//
// This is a wrapped object contains all candidates
type Directory struct {
*upstream.Directory
cd []upstream.Entry
}
type entry interface {
upstream.Entry
candidates() []upstream.Entry
}
// UnWrap returns the Object that this Object is wrapping or
// nil if it isn't wrapping anything
func (o *Object) UnWrap() *upstream.Object {
return o.Object
}
// Fs returns the union Fs as the parent
func (o *Object) Fs() fs.Info {
return o.fs
}
func (o *Object) candidates() []upstream.Entry {
return o.co
}
func (d *Directory) candidates() []upstream.Entry {
return d.cd
}
// Update in to the object with the modTime given of the given size
//
// When called from outside a Fs by rclone, src.Size() will always be >= 0.
// But for unknown-sized objects (indicated by src.Size() == -1), Upload should either
// return an error or update the object properly (rather than e.g. calling panic).
func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error {
entries, err := o.fs.actionEntries(o.candidates()...)
if err != nil {
return err
}
if len(entries) == 1 {
obj := entries[0].(*upstream.Object)
return obj.Update(ctx, in, src, options...)
}
// Get multiple reader
readers := make([]io.Reader, len(entries))
writers := make([]io.Writer, len(entries))
errs := Errors(make([]error, len(entries)+1))
for i := range entries {
r, w := io.Pipe()
bw := bufio.NewWriter(w)
readers[i], writers[i] = r, bw
defer func() {
err := w.Close()
if err != nil {
panic(err)
}
}()
}
go func() {
mw := io.MultiWriter(writers...)
es := make([]error, len(writers)+1)
_, es[len(es)-1] = io.Copy(mw, in)
for i, bw := range writers {
es[i] = bw.(*bufio.Writer).Flush()
}
errs[len(entries)] = Errors(es).Err()
}()
// Multi-threading
multithread(len(entries), func(i int) {
if o, ok := entries[i].(*upstream.Object); ok {
err := o.Update(ctx, readers[i], src, options...)
errs[i] = errors.Wrap(err, o.UpstreamFs().Name())
} else {
errs[i] = fs.ErrorNotAFile
}
})
return errs.Err()
}
// Remove candidate objects selected by ACTION policy
func (o *Object) Remove(ctx context.Context) error {
entries, err := o.fs.actionEntries(o.candidates()...)
if err != nil {
return err
}
errs := Errors(make([]error, len(entries)))
multithread(len(entries), func(i int) {
if o, ok := entries[i].(*upstream.Object); ok {
err := o.Remove(ctx)
errs[i] = errors.Wrap(err, o.UpstreamFs().Name())
} else {
errs[i] = fs.ErrorNotAFile
}
})
return errs.Err()
}
// SetModTime sets the metadata on the object to set the modification date
func (o *Object) SetModTime(ctx context.Context, t time.Time) error {
entries, err := o.fs.actionEntries(o.candidates()...)
if err != nil {
return err
}
var wg sync.WaitGroup
errs := Errors(make([]error, len(entries)))
multithread(len(entries), func(i int) {
if o, ok := entries[i].(*upstream.Object); ok {
err := o.SetModTime(ctx, t)
errs[i] = errors.Wrap(err, o.UpstreamFs().Name())
} else {
errs[i] = fs.ErrorNotAFile
}
})
wg.Wait()
return errs.Err()
}
// ModTime returns the modification date of the directory
// It returns the latest ModTime of all candidates
func (d *Directory) ModTime(ctx context.Context) (t time.Time) {
entries := d.candidates()
times := make([]time.Time, len(entries))
multithread(len(entries), func(i int) {
times[i] = entries[i].ModTime(ctx)
})
for _, ti := range times {
if t.Before(ti) {
t = ti
}
}
return t
}
// Size returns the size of the directory
// It returns the sum of all candidates
func (d *Directory) Size() (s int64) {
for _, e := range d.candidates() {
s += e.Size()
}
return s
}

68
backend/union/errors.go Normal file
View File

@@ -0,0 +1,68 @@
package union
import (
"bytes"
"fmt"
)
// The Errors type wraps a slice of errors
type Errors []error
// Map returns a copy of the error slice with all its errors modified
// according to the mapping function. If mapping returns nil,
// the error is dropped from the error slice with no replacement.
func (e Errors) Map(mapping func(error) error) Errors {
s := make([]error, len(e))
i := 0
for _, err := range e {
nerr := mapping(err)
if nerr == nil {
continue
}
s[i] = nerr
i++
}
return Errors(s[:i])
}
// FilterNil returns the Errors without nil
func (e Errors) FilterNil() Errors {
ne := e.Map(func(err error) error {
return err
})
return ne
}
// Err returns a error interface that filtered nil,
// or nil if no non-nil Error is presented.
func (e Errors) Err() error {
ne := e.FilterNil()
if len(ne) == 0 {
return nil
}
return ne
}
// Error returns a concatenated string of the contained errors
func (e Errors) Error() string {
var buf bytes.Buffer
if len(e) == 0 {
buf.WriteString("no error")
}
if len(e) == 1 {
buf.WriteString("1 error: ")
} else {
fmt.Fprintf(&buf, "%d errors: ", len(e))
}
for i, err := range e {
if i != 0 {
buf.WriteString("; ")
}
buf.WriteString(err.Error())
}
return buf.String()
}

View File

@@ -0,0 +1,44 @@
package policy
import (
"context"
"github.com/rclone/rclone/backend/union/upstream"
"github.com/rclone/rclone/fs"
)
func init() {
registerPolicy("all", &All{})
}
// All policy behaves the same as EpAll except for the CREATE category
// Action category: same as epall.
// Create category: apply to all branches.
// Search category: same as epall.
type All struct {
EpAll
}
// Create category policy, governing the creation of files and directories
func (p *All) Create(ctx context.Context, upstreams []*upstream.Fs, path string) ([]*upstream.Fs, error) {
if len(upstreams) == 0 {
return nil, fs.ErrorObjectNotFound
}
upstreams = filterNC(upstreams)
if len(upstreams) == 0 {
return nil, fs.ErrorPermissionDenied
}
return upstreams, nil
}
// CreateEntries is CREATE category policy but receving a set of candidate entries
func (p *All) CreateEntries(entries ...upstream.Entry) ([]upstream.Entry, error) {
if len(entries) == 0 {
return nil, fs.ErrorObjectNotFound
}
entries = filterNCEntries(entries)
if len(entries) == 0 {
return nil, fs.ErrorPermissionDenied
}
return entries, nil
}

View File

@@ -0,0 +1,99 @@
package policy
import (
"context"
"path"
"sync"
"github.com/rclone/rclone/backend/union/upstream"
"github.com/rclone/rclone/fs"
)
func init() {
registerPolicy("epall", &EpAll{})
}
// EpAll stands for existing path, All
// Action category: apply to all found.
// Create category: apply to all found.
// Search category: same as epff.
type EpAll struct {
EpFF
}
func (p *EpAll) epall(ctx context.Context, upstreams []*upstream.Fs, filePath string) ([]*upstream.Fs, error) {
var wg sync.WaitGroup
ufs := make([]*upstream.Fs, len(upstreams))
for i, u := range upstreams {
wg.Add(1)
i, u := i, u // Closure
go func() {
rfs := u.RootFs
remote := path.Join(u.RootPath, filePath)
if findEntry(ctx, rfs, remote) != nil {
ufs[i] = u
}
wg.Done()
}()
}
wg.Wait()
var results []*upstream.Fs
for _, f := range ufs {
if f != nil {
results = append(results, f)
}
}
if len(results) == 0 {
return nil, fs.ErrorObjectNotFound
}
return results, nil
}
// Action category policy, governing the modification of files and directories
func (p *EpAll) Action(ctx context.Context, upstreams []*upstream.Fs, path string) ([]*upstream.Fs, error) {
if len(upstreams) == 0 {
return nil, fs.ErrorObjectNotFound
}
upstreams = filterRO(upstreams)
if len(upstreams) == 0 {
return nil, fs.ErrorPermissionDenied
}
return p.epall(ctx, upstreams, path)
}
// ActionEntries is ACTION category policy but receving a set of candidate entries
func (p *EpAll) ActionEntries(entries ...upstream.Entry) ([]upstream.Entry, error) {
if len(entries) == 0 {
return nil, fs.ErrorObjectNotFound
}
entries = filterROEntries(entries)
if len(entries) == 0 {
return nil, fs.ErrorPermissionDenied
}
return entries, nil
}
// Create category policy, governing the creation of files and directories
func (p *EpAll) Create(ctx context.Context, upstreams []*upstream.Fs, path string) ([]*upstream.Fs, error) {
if len(upstreams) == 0 {
return nil, fs.ErrorObjectNotFound
}
upstreams = filterNC(upstreams)
if len(upstreams) == 0 {
return nil, fs.ErrorPermissionDenied
}
upstreams, err := p.epall(ctx, upstreams, path+"/..")
return upstreams, err
}
// CreateEntries is CREATE category policy but receving a set of candidate entries
func (p *EpAll) CreateEntries(entries ...upstream.Entry) ([]upstream.Entry, error) {
if len(entries) == 0 {
return nil, fs.ErrorObjectNotFound
}
entries = filterNCEntries(entries)
if len(entries) == 0 {
return nil, fs.ErrorPermissionDenied
}
return entries, nil
}

View File

@@ -0,0 +1,115 @@
package policy
import (
"context"
"path"
"github.com/rclone/rclone/backend/union/upstream"
"github.com/rclone/rclone/fs"
)
func init() {
registerPolicy("epff", &EpFF{})
}
// EpFF stands for existing path, first found
// Given the order of the candidates, act on the first one found where the relative path exists.
type EpFF struct{}
func (p *EpFF) epff(ctx context.Context, upstreams []*upstream.Fs, filePath string) (*upstream.Fs, error) {
ch := make(chan *upstream.Fs)
for _, u := range upstreams {
u := u // Closure
go func() {
rfs := u.RootFs
remote := path.Join(u.RootPath, filePath)
if findEntry(ctx, rfs, remote) == nil {
u = nil
}
ch <- u
}()
}
var u *upstream.Fs
for i := 0; i < len(upstreams); i++ {
u = <-ch
if u != nil {
// close remaining goroutines
go func(num int) {
defer close(ch)
for i := 0; i < num; i++ {
<-ch
}
}(len(upstreams) - 1 - i)
}
}
if u == nil {
return nil, fs.ErrorObjectNotFound
}
return u, nil
}
// Action category policy, governing the modification of files and directories
func (p *EpFF) Action(ctx context.Context, upstreams []*upstream.Fs, path string) ([]*upstream.Fs, error) {
if len(upstreams) == 0 {
return nil, fs.ErrorObjectNotFound
}
upstreams = filterRO(upstreams)
if len(upstreams) == 0 {
return nil, fs.ErrorPermissionDenied
}
u, err := p.epff(ctx, upstreams, path)
return []*upstream.Fs{u}, err
}
// ActionEntries is ACTION category policy but receving a set of candidate entries
func (p *EpFF) ActionEntries(entries ...upstream.Entry) ([]upstream.Entry, error) {
if len(entries) == 0 {
return nil, fs.ErrorObjectNotFound
}
entries = filterROEntries(entries)
if len(entries) == 0 {
return nil, fs.ErrorPermissionDenied
}
return entries[:1], nil
}
// Create category policy, governing the creation of files and directories
func (p *EpFF) Create(ctx context.Context, upstreams []*upstream.Fs, path string) ([]*upstream.Fs, error) {
if len(upstreams) == 0 {
return nil, fs.ErrorObjectNotFound
}
upstreams = filterNC(upstreams)
if len(upstreams) == 0 {
return nil, fs.ErrorPermissionDenied
}
u, err := p.epff(ctx, upstreams, path+"/..")
return []*upstream.Fs{u}, err
}
// CreateEntries is CREATE category policy but receving a set of candidate entries
func (p *EpFF) CreateEntries(entries ...upstream.Entry) ([]upstream.Entry, error) {
if len(entries) == 0 {
return nil, fs.ErrorObjectNotFound
}
entries = filterNCEntries(entries)
if len(entries) == 0 {
return nil, fs.ErrorPermissionDenied
}
return entries[:1], nil
}
// Search category policy, governing the access to files and directories
func (p *EpFF) Search(ctx context.Context, upstreams []*upstream.Fs, path string) (*upstream.Fs, error) {
if len(upstreams) == 0 {
return nil, fs.ErrorObjectNotFound
}
return p.epff(ctx, upstreams, path)
}
// SearchEntries is SEARCH category policy but receving a set of candidate entries
func (p *EpFF) SearchEntries(entries ...upstream.Entry) (upstream.Entry, error) {
if len(entries) == 0 {
return nil, fs.ErrorObjectNotFound
}
return entries[0], nil
}

View File

@@ -0,0 +1,116 @@
package policy
import (
"context"
"math"
"github.com/rclone/rclone/backend/union/upstream"
"github.com/rclone/rclone/fs"
)
func init() {
registerPolicy("eplfs", &EpLfs{})
}
// EpLfs stands for existing path, least free space
// Of all the candidates on which the path exists choose the one with the least free space.
type EpLfs struct {
EpAll
}
func (p *EpLfs) lfs(upstreams []*upstream.Fs) (*upstream.Fs, error) {
var minFreeSpace int64 = math.MaxInt64
var lfsupstream *upstream.Fs
for _, u := range upstreams {
space, err := u.GetFreeSpace()
if err != nil {
fs.LogPrintf(fs.LogLevelNotice, nil,
"Free Space is not supported for upstream %s, treating as infinite", u.Name())
}
if space < minFreeSpace {
minFreeSpace = space
lfsupstream = u
}
}
if lfsupstream == nil {
return nil, fs.ErrorObjectNotFound
}
return lfsupstream, nil
}
func (p *EpLfs) lfsEntries(entries []upstream.Entry) (upstream.Entry, error) {
var minFreeSpace int64
var lfsEntry upstream.Entry
for _, e := range entries {
space, err := e.UpstreamFs().GetFreeSpace()
if err != nil {
fs.LogPrintf(fs.LogLevelNotice, nil,
"Free Space is not supported for upstream %s, treating as infinite", e.UpstreamFs().Name())
}
if space < minFreeSpace {
minFreeSpace = space
lfsEntry = e
}
}
return lfsEntry, nil
}
// Action category policy, governing the modification of files and directories
func (p *EpLfs) Action(ctx context.Context, upstreams []*upstream.Fs, path string) ([]*upstream.Fs, error) {
upstreams, err := p.EpAll.Action(ctx, upstreams, path)
if err != nil {
return nil, err
}
u, err := p.lfs(upstreams)
return []*upstream.Fs{u}, err
}
// ActionEntries is ACTION category policy but receving a set of candidate entries
func (p *EpLfs) ActionEntries(entries ...upstream.Entry) ([]upstream.Entry, error) {
entries, err := p.EpAll.ActionEntries(entries...)
if err != nil {
return nil, err
}
e, err := p.lfsEntries(entries)
return []upstream.Entry{e}, err
}
// Create category policy, governing the creation of files and directories
func (p *EpLfs) Create(ctx context.Context, upstreams []*upstream.Fs, path string) ([]*upstream.Fs, error) {
upstreams, err := p.EpAll.Create(ctx, upstreams, path)
if err != nil {
return nil, err
}
u, err := p.lfs(upstreams)
return []*upstream.Fs{u}, err
}
// CreateEntries is CREATE category policy but receving a set of candidate entries
func (p *EpLfs) CreateEntries(entries ...upstream.Entry) ([]upstream.Entry, error) {
entries, err := p.EpAll.CreateEntries(entries...)
if err != nil {
return nil, err
}
e, err := p.lfsEntries(entries)
return []upstream.Entry{e}, err
}
// Search category policy, governing the access to files and directories
func (p *EpLfs) Search(ctx context.Context, upstreams []*upstream.Fs, path string) (*upstream.Fs, error) {
if len(upstreams) == 0 {
return nil, fs.ErrorObjectNotFound
}
upstreams, err := p.epall(ctx, upstreams, path)
if err != nil {
return nil, err
}
return p.lfs(upstreams)
}
// SearchEntries is SEARCH category policy but receving a set of candidate entries
func (p *EpLfs) SearchEntries(entries ...upstream.Entry) (upstream.Entry, error) {
if len(entries) == 0 {
return nil, fs.ErrorObjectNotFound
}
return p.lfsEntries(entries)
}

View File

@@ -0,0 +1,116 @@
package policy
import (
"context"
"math"
"github.com/rclone/rclone/backend/union/upstream"
"github.com/rclone/rclone/fs"
)
func init() {
registerPolicy("eplno", &EpLno{})
}
// EpLno stands for existing path, least number of objects
// Of all the candidates on which the path exists choose the one with the least number of objects
type EpLno struct {
EpAll
}
func (p *EpLno) lno(upstreams []*upstream.Fs) (*upstream.Fs, error) {
var minNumObj int64 = math.MaxInt64
var lnoUpstream *upstream.Fs
for _, u := range upstreams {
numObj, err := u.GetNumObjects()
if err != nil {
fs.LogPrintf(fs.LogLevelNotice, nil,
"Number of Objects is not supported for upstream %s, treating as 0", u.Name())
}
if minNumObj > numObj {
minNumObj = numObj
lnoUpstream = u
}
}
if lnoUpstream == nil {
return nil, fs.ErrorObjectNotFound
}
return lnoUpstream, nil
}
func (p *EpLno) lnoEntries(entries []upstream.Entry) (upstream.Entry, error) {
var minNumObj int64 = math.MaxInt64
var lnoEntry upstream.Entry
for _, e := range entries {
numObj, err := e.UpstreamFs().GetNumObjects()
if err != nil {
fs.LogPrintf(fs.LogLevelNotice, nil,
"Number of Objects is not supported for upstream %s, treating as 0", e.UpstreamFs().Name())
}
if minNumObj > numObj {
minNumObj = numObj
lnoEntry = e
}
}
return lnoEntry, nil
}
// Action category policy, governing the modification of files and directories
func (p *EpLno) Action(ctx context.Context, upstreams []*upstream.Fs, path string) ([]*upstream.Fs, error) {
upstreams, err := p.EpAll.Action(ctx, upstreams, path)
if err != nil {
return nil, err
}
u, err := p.lno(upstreams)
return []*upstream.Fs{u}, err
}
// ActionEntries is ACTION category policy but receving a set of candidate entries
func (p *EpLno) ActionEntries(entries ...upstream.Entry) ([]upstream.Entry, error) {
entries, err := p.EpAll.ActionEntries(entries...)
if err != nil {
return nil, err
}
e, err := p.lnoEntries(entries)
return []upstream.Entry{e}, err
}
// Create category policy, governing the creation of files and directories
func (p *EpLno) Create(ctx context.Context, upstreams []*upstream.Fs, path string) ([]*upstream.Fs, error) {
upstreams, err := p.EpAll.Create(ctx, upstreams, path)
if err != nil {
return nil, err
}
u, err := p.lno(upstreams)
return []*upstream.Fs{u}, err
}
// CreateEntries is CREATE category policy but receving a set of candidate entries
func (p *EpLno) CreateEntries(entries ...upstream.Entry) ([]upstream.Entry, error) {
entries, err := p.EpAll.CreateEntries(entries...)
if err != nil {
return nil, err
}
e, err := p.lnoEntries(entries)
return []upstream.Entry{e}, err
}
// Search category policy, governing the access to files and directories
func (p *EpLno) Search(ctx context.Context, upstreams []*upstream.Fs, path string) (*upstream.Fs, error) {
if len(upstreams) == 0 {
return nil, fs.ErrorObjectNotFound
}
upstreams, err := p.epall(ctx, upstreams, path)
if err != nil {
return nil, err
}
return p.lno(upstreams)
}
// SearchEntries is SEARCH category policy but receving a set of candidate entries
func (p *EpLno) SearchEntries(entries ...upstream.Entry) (upstream.Entry, error) {
if len(entries) == 0 {
return nil, fs.ErrorObjectNotFound
}
return p.lnoEntries(entries)
}

View File

@@ -0,0 +1,116 @@
package policy
import (
"context"
"math"
"github.com/rclone/rclone/backend/union/upstream"
"github.com/rclone/rclone/fs"
)
func init() {
registerPolicy("eplus", &EpLus{})
}
// EpLus stands for existing path, least used space
// Of all the candidates on which the path exists choose the one with the least used space.
type EpLus struct {
EpAll
}
func (p *EpLus) lus(upstreams []*upstream.Fs) (*upstream.Fs, error) {
var minUsedSpace int64 = math.MaxInt64
var lusupstream *upstream.Fs
for _, u := range upstreams {
space, err := u.GetUsedSpace()
if err != nil {
fs.LogPrintf(fs.LogLevelNotice, nil,
"Used Space is not supported for upstream %s, treating as 0", u.Name())
}
if space < minUsedSpace {
minUsedSpace = space
lusupstream = u
}
}
if lusupstream == nil {
return nil, fs.ErrorObjectNotFound
}
return lusupstream, nil
}
func (p *EpLus) lusEntries(entries []upstream.Entry) (upstream.Entry, error) {
var minUsedSpace int64
var lusEntry upstream.Entry
for _, e := range entries {
space, err := e.UpstreamFs().GetFreeSpace()
if err != nil {
fs.LogPrintf(fs.LogLevelNotice, nil,
"Used Space is not supported for upstream %s, treating as 0", e.UpstreamFs().Name())
}
if space < minUsedSpace {
minUsedSpace = space
lusEntry = e
}
}
return lusEntry, nil
}
// Action category policy, governing the modification of files and directories
func (p *EpLus) Action(ctx context.Context, upstreams []*upstream.Fs, path string) ([]*upstream.Fs, error) {
upstreams, err := p.EpAll.Action(ctx, upstreams, path)
if err != nil {
return nil, err
}
u, err := p.lus(upstreams)
return []*upstream.Fs{u}, err
}
// ActionEntries is ACTION category policy but receving a set of candidate entries
func (p *EpLus) ActionEntries(entries ...upstream.Entry) ([]upstream.Entry, error) {
entries, err := p.EpAll.ActionEntries(entries...)
if err != nil {
return nil, err
}
e, err := p.lusEntries(entries)
return []upstream.Entry{e}, err
}
// Create category policy, governing the creation of files and directories
func (p *EpLus) Create(ctx context.Context, upstreams []*upstream.Fs, path string) ([]*upstream.Fs, error) {
upstreams, err := p.EpAll.Create(ctx, upstreams, path)
if err != nil {
return nil, err
}
u, err := p.lus(upstreams)
return []*upstream.Fs{u}, err
}
// CreateEntries is CREATE category policy but receving a set of candidate entries
func (p *EpLus) CreateEntries(entries ...upstream.Entry) ([]upstream.Entry, error) {
entries, err := p.EpAll.CreateEntries(entries...)
if err != nil {
return nil, err
}
e, err := p.lusEntries(entries)
return []upstream.Entry{e}, err
}
// Search category policy, governing the access to files and directories
func (p *EpLus) Search(ctx context.Context, upstreams []*upstream.Fs, path string) (*upstream.Fs, error) {
if len(upstreams) == 0 {
return nil, fs.ErrorObjectNotFound
}
upstreams, err := p.epall(ctx, upstreams, path)
if err != nil {
return nil, err
}
return p.lus(upstreams)
}
// SearchEntries is SEARCH category policy but receving a set of candidate entries
func (p *EpLus) SearchEntries(entries ...upstream.Entry) (upstream.Entry, error) {
if len(entries) == 0 {
return nil, fs.ErrorObjectNotFound
}
return p.lusEntries(entries)
}

View File

@@ -0,0 +1,115 @@
package policy
import (
"context"
"github.com/rclone/rclone/backend/union/upstream"
"github.com/rclone/rclone/fs"
)
func init() {
registerPolicy("epmfs", &EpMfs{})
}
// EpMfs stands for existing path, most free space
// Of all the candidates on which the path exists choose the one with the most free space.
type EpMfs struct {
EpAll
}
func (p *EpMfs) mfs(upstreams []*upstream.Fs) (*upstream.Fs, error) {
var maxFreeSpace int64
var mfsupstream *upstream.Fs
for _, u := range upstreams {
space, err := u.GetFreeSpace()
if err != nil {
fs.LogPrintf(fs.LogLevelNotice, nil,
"Free Space is not supported for upstream %s, treating as infinite", u.Name())
}
if maxFreeSpace < space {
maxFreeSpace = space
mfsupstream = u
}
}
if mfsupstream == nil {
return nil, fs.ErrorObjectNotFound
}
return mfsupstream, nil
}
func (p *EpMfs) mfsEntries(entries []upstream.Entry) (upstream.Entry, error) {
var maxFreeSpace int64
var mfsEntry upstream.Entry
for _, e := range entries {
space, err := e.UpstreamFs().GetFreeSpace()
if err != nil {
fs.LogPrintf(fs.LogLevelNotice, nil,
"Free Space is not supported for upstream %s, treating as infinite", e.UpstreamFs().Name())
}
if maxFreeSpace < space {
maxFreeSpace = space
mfsEntry = e
}
}
return mfsEntry, nil
}
// Action category policy, governing the modification of files and directories
func (p *EpMfs) Action(ctx context.Context, upstreams []*upstream.Fs, path string) ([]*upstream.Fs, error) {
upstreams, err := p.EpAll.Action(ctx, upstreams, path)
if err != nil {
return nil, err
}
u, err := p.mfs(upstreams)
return []*upstream.Fs{u}, err
}
// ActionEntries is ACTION category policy but receving a set of candidate entries
func (p *EpMfs) ActionEntries(entries ...upstream.Entry) ([]upstream.Entry, error) {
entries, err := p.EpAll.ActionEntries(entries...)
if err != nil {
return nil, err
}
e, err := p.mfsEntries(entries)
return []upstream.Entry{e}, err
}
// Create category policy, governing the creation of files and directories
func (p *EpMfs) Create(ctx context.Context, upstreams []*upstream.Fs, path string) ([]*upstream.Fs, error) {
upstreams, err := p.EpAll.Create(ctx, upstreams, path)
if err != nil {
return nil, err
}
u, err := p.mfs(upstreams)
return []*upstream.Fs{u}, err
}
// CreateEntries is CREATE category policy but receving a set of candidate entries
func (p *EpMfs) CreateEntries(entries ...upstream.Entry) ([]upstream.Entry, error) {
entries, err := p.EpAll.CreateEntries(entries...)
if err != nil {
return nil, err
}
e, err := p.mfsEntries(entries)
return []upstream.Entry{e}, err
}
// Search category policy, governing the access to files and directories
func (p *EpMfs) Search(ctx context.Context, upstreams []*upstream.Fs, path string) (*upstream.Fs, error) {
if len(upstreams) == 0 {
return nil, fs.ErrorObjectNotFound
}
upstreams, err := p.epall(ctx, upstreams, path)
if err != nil {
return nil, err
}
return p.mfs(upstreams)
}
// SearchEntries is SEARCH category policy but receving a set of candidate entries
func (p *EpMfs) SearchEntries(entries ...upstream.Entry) (upstream.Entry, error) {
if len(entries) == 0 {
return nil, fs.ErrorObjectNotFound
}
return p.mfsEntries(entries)
}

View File

@@ -0,0 +1,86 @@
package policy
import (
"context"
"math/rand"
"time"
"github.com/rclone/rclone/backend/union/upstream"
"github.com/rclone/rclone/fs"
)
func init() {
registerPolicy("eprand", &EpRand{})
}
// EpRand stands for existing path, random
// Calls epall and then randomizes. Returns one candidate.
type EpRand struct {
EpAll
}
func (p *EpRand) rand(upstreams []*upstream.Fs) *upstream.Fs {
rand.Seed(time.Now().Unix())
return upstreams[rand.Intn(len(upstreams))]
}
func (p *EpRand) randEntries(entries []upstream.Entry) upstream.Entry {
rand.Seed(time.Now().Unix())
return entries[rand.Intn(len(entries))]
}
// Action category policy, governing the modification of files and directories
func (p *EpRand) Action(ctx context.Context, upstreams []*upstream.Fs, path string) ([]*upstream.Fs, error) {
upstreams, err := p.EpAll.Action(ctx, upstreams, path)
if err != nil {
return nil, err
}
return []*upstream.Fs{p.rand(upstreams)}, nil
}
// ActionEntries is ACTION category policy but receving a set of candidate entries
func (p *EpRand) ActionEntries(entries ...upstream.Entry) ([]upstream.Entry, error) {
entries, err := p.EpAll.ActionEntries(entries...)
if err != nil {
return nil, err
}
return []upstream.Entry{p.randEntries(entries)}, nil
}
// Create category policy, governing the creation of files and directories
func (p *EpRand) Create(ctx context.Context, upstreams []*upstream.Fs, path string) ([]*upstream.Fs, error) {
upstreams, err := p.EpAll.Create(ctx, upstreams, path)
if err != nil {
return nil, err
}
return []*upstream.Fs{p.rand(upstreams)}, nil
}
// CreateEntries is CREATE category policy but receving a set of candidate entries
func (p *EpRand) CreateEntries(entries ...upstream.Entry) ([]upstream.Entry, error) {
entries, err := p.EpAll.CreateEntries(entries...)
if err != nil {
return nil, err
}
return []upstream.Entry{p.randEntries(entries)}, nil
}
// Search category policy, governing the access to files and directories
func (p *EpRand) Search(ctx context.Context, upstreams []*upstream.Fs, path string) (*upstream.Fs, error) {
if len(upstreams) == 0 {
return nil, fs.ErrorObjectNotFound
}
upstreams, err := p.epall(ctx, upstreams, path)
if err != nil {
return nil, err
}
return p.rand(upstreams), nil
}
// SearchEntries is SEARCH category policy but receving a set of candidate entries
func (p *EpRand) SearchEntries(entries ...upstream.Entry) (upstream.Entry, error) {
if len(entries) == 0 {
return nil, fs.ErrorObjectNotFound
}
return p.randEntries(entries), nil
}

View File

@@ -0,0 +1,32 @@
package policy
import (
"context"
"github.com/rclone/rclone/backend/union/upstream"
"github.com/rclone/rclone/fs"
)
func init() {
registerPolicy("ff", &FF{})
}
// FF stands for first found
// Search category: same as epff.
// Action category: same as epff.
// Create category: Given the order of the candiates, act on the first one found.
type FF struct {
EpFF
}
// Create category policy, governing the creation of files and directories
func (p *FF) Create(ctx context.Context, upstreams []*upstream.Fs, path string) ([]*upstream.Fs, error) {
if len(upstreams) == 0 {
return nil, fs.ErrorObjectNotFound
}
upstreams = filterNC(upstreams)
if len(upstreams) == 0 {
return upstreams, fs.ErrorPermissionDenied
}
return upstreams[:1], nil
}

View File

@@ -0,0 +1,33 @@
package policy
import (
"context"
"github.com/rclone/rclone/backend/union/upstream"
"github.com/rclone/rclone/fs"
)
func init() {
registerPolicy("lfs", &Lfs{})
}
// Lfs stands for least free space
// Search category: same as eplfs.
// Action category: same as eplfs.
// Create category: Pick the drive with the least free space.
type Lfs struct {
EpLfs
}
// Create category policy, governing the creation of files and directories
func (p *Lfs) Create(ctx context.Context, upstreams []*upstream.Fs, path string) ([]*upstream.Fs, error) {
if len(upstreams) == 0 {
return nil, fs.ErrorObjectNotFound
}
upstreams = filterNC(upstreams)
if len(upstreams) == 0 {
return nil, fs.ErrorPermissionDenied
}
u, err := p.lfs(upstreams)
return []*upstream.Fs{u}, err
}

View File

@@ -0,0 +1,33 @@
package policy
import (
"context"
"github.com/rclone/rclone/backend/union/upstream"
"github.com/rclone/rclone/fs"
)
func init() {
registerPolicy("lno", &Lno{})
}
// Lno stands for least number of objects
// Search category: same as eplno.
// Action category: same as eplno.
// Create category: Pick the drive with the least number of objects.
type Lno struct {
EpLno
}
// Create category policy, governing the creation of files and directories
func (p *Lno) Create(ctx context.Context, upstreams []*upstream.Fs, path string) ([]*upstream.Fs, error) {
if len(upstreams) == 0 {
return nil, fs.ErrorObjectNotFound
}
upstreams = filterNC(upstreams)
if len(upstreams) == 0 {
return nil, fs.ErrorPermissionDenied
}
u, err := p.lno(upstreams)
return []*upstream.Fs{u}, err
}

View File

@@ -0,0 +1,33 @@
package policy
import (
"context"
"github.com/rclone/rclone/backend/union/upstream"
"github.com/rclone/rclone/fs"
)
func init() {
registerPolicy("lus", &Lus{})
}
// Lus stands for least free space
// Search category: same as eplus.
// Action category: same as eplus.
// Create category: Pick the drive with the least used space.
type Lus struct {
EpLus
}
// Create category policy, governing the creation of files and directories
func (p *Lus) Create(ctx context.Context, upstreams []*upstream.Fs, path string) ([]*upstream.Fs, error) {
if len(upstreams) == 0 {
return nil, fs.ErrorObjectNotFound
}
upstreams = filterNC(upstreams)
if len(upstreams) == 0 {
return nil, fs.ErrorPermissionDenied
}
u, err := p.lus(upstreams)
return []*upstream.Fs{u}, err
}

View File

@@ -0,0 +1,33 @@
package policy
import (
"context"
"github.com/rclone/rclone/backend/union/upstream"
"github.com/rclone/rclone/fs"
)
func init() {
registerPolicy("mfs", &Mfs{})
}
// Mfs stands for most free space
// Search category: same as epmfs.
// Action category: same as epmfs.
// Create category: Pick the drive with the most free space.
type Mfs struct {
EpMfs
}
// Create category policy, governing the creation of files and directories
func (p *Mfs) Create(ctx context.Context, upstreams []*upstream.Fs, path string) ([]*upstream.Fs, error) {
if len(upstreams) == 0 {
return nil, fs.ErrorObjectNotFound
}
upstreams = filterNC(upstreams)
if len(upstreams) == 0 {
return nil, fs.ErrorPermissionDenied
}
u, err := p.mfs(upstreams)
return []*upstream.Fs{u}, err
}

View File

@@ -0,0 +1,149 @@
package policy
import (
"context"
"path"
"sync"
"time"
"github.com/rclone/rclone/backend/union/upstream"
"github.com/rclone/rclone/fs"
)
func init() {
registerPolicy("newest", &Newest{})
}
// Newest policy picks the file / directory with the largest mtime
// It implies the existance of a path
type Newest struct {
EpAll
}
func (p *Newest) newest(ctx context.Context, upstreams []*upstream.Fs, filePath string) (*upstream.Fs, error) {
var wg sync.WaitGroup
ufs := make([]*upstream.Fs, len(upstreams))
mtimes := make([]time.Time, len(upstreams))
for i, u := range upstreams {
wg.Add(1)
i, u := i, u // Closure
go func() {
defer wg.Done()
rfs := u.RootFs
remote := path.Join(u.RootPath, filePath)
if e := findEntry(ctx, rfs, remote); e != nil {
ufs[i] = u
mtimes[i] = e.ModTime(ctx)
}
}()
}
wg.Wait()
maxMtime := time.Time{}
var newestFs *upstream.Fs
for i, u := range ufs {
if u != nil && mtimes[i].After(maxMtime) {
maxMtime = mtimes[i]
newestFs = u
}
}
if newestFs == nil {
return nil, fs.ErrorObjectNotFound
}
return newestFs, nil
}
func (p *Newest) newestEntries(entries []upstream.Entry) (upstream.Entry, error) {
var wg sync.WaitGroup
mtimes := make([]time.Time, len(entries))
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
for i, e := range entries {
wg.Add(1)
i, e := i, e // Closure
go func() {
defer wg.Done()
mtimes[i] = e.ModTime(ctx)
}()
}
wg.Wait()
maxMtime := time.Time{}
var newestEntry upstream.Entry
for i, t := range mtimes {
if t.After(maxMtime) {
maxMtime = t
newestEntry = entries[i]
}
}
if newestEntry == nil {
return nil, fs.ErrorObjectNotFound
}
return newestEntry, nil
}
// Action category policy, governing the modification of files and directories
func (p *Newest) Action(ctx context.Context, upstreams []*upstream.Fs, path string) ([]*upstream.Fs, error) {
if len(upstreams) == 0 {
return nil, fs.ErrorObjectNotFound
}
upstreams = filterRO(upstreams)
if len(upstreams) == 0 {
return nil, fs.ErrorPermissionDenied
}
u, err := p.newest(ctx, upstreams, path)
return []*upstream.Fs{u}, err
}
// ActionEntries is ACTION category policy but receving a set of candidate entries
func (p *Newest) ActionEntries(entries ...upstream.Entry) ([]upstream.Entry, error) {
if len(entries) == 0 {
return nil, fs.ErrorObjectNotFound
}
entries = filterROEntries(entries)
if len(entries) == 0 {
return nil, fs.ErrorPermissionDenied
}
e, err := p.newestEntries(entries)
return []upstream.Entry{e}, err
}
// Create category policy, governing the creation of files and directories
func (p *Newest) Create(ctx context.Context, upstreams []*upstream.Fs, path string) ([]*upstream.Fs, error) {
if len(upstreams) == 0 {
return nil, fs.ErrorObjectNotFound
}
upstreams = filterNC(upstreams)
if len(upstreams) == 0 {
return nil, fs.ErrorPermissionDenied
}
u, err := p.newest(ctx, upstreams, path+"/..")
return []*upstream.Fs{u}, err
}
// CreateEntries is CREATE category policy but receving a set of candidate entries
func (p *Newest) CreateEntries(entries ...upstream.Entry) ([]upstream.Entry, error) {
if len(entries) == 0 {
return nil, fs.ErrorObjectNotFound
}
entries = filterNCEntries(entries)
if len(entries) == 0 {
return nil, fs.ErrorPermissionDenied
}
e, err := p.newestEntries(entries)
return []upstream.Entry{e}, err
}
// Search category policy, governing the access to files and directories
func (p *Newest) Search(ctx context.Context, upstreams []*upstream.Fs, path string) (*upstream.Fs, error) {
if len(upstreams) == 0 {
return nil, fs.ErrorObjectNotFound
}
return p.newest(ctx, upstreams, path)
}
// SearchEntries is SEARCH category policy but receving a set of candidate entries
func (p *Newest) SearchEntries(entries ...upstream.Entry) (upstream.Entry, error) {
if len(entries) == 0 {
return nil, fs.ErrorObjectNotFound
}
return p.newestEntries(entries)
}

View File

@@ -0,0 +1,129 @@
package policy
import (
"context"
"math/rand"
"path"
"strings"
"time"
"github.com/pkg/errors"
"github.com/rclone/rclone/backend/union/upstream"
"github.com/rclone/rclone/fs"
)
var policies = make(map[string]Policy)
// Policy is the interface of a set of defined behavior choosing
// the upstream Fs to operate on
type Policy interface {
// Action category policy, governing the modification of files and directories
Action(ctx context.Context, upstreams []*upstream.Fs, path string) ([]*upstream.Fs, error)
// Create category policy, governing the creation of files and directories
Create(ctx context.Context, upstreams []*upstream.Fs, path string) ([]*upstream.Fs, error)
// Search category policy, governing the access to files and directories
Search(ctx context.Context, upstreams []*upstream.Fs, path string) (*upstream.Fs, error)
// ActionEntries is ACTION category policy but receving a set of candidate entries
ActionEntries(entries ...upstream.Entry) ([]upstream.Entry, error)
// CreateEntries is CREATE category policy but receving a set of candidate entries
CreateEntries(entries ...upstream.Entry) ([]upstream.Entry, error)
// SearchEntries is SEARCH category policy but receving a set of candidate entries
SearchEntries(entries ...upstream.Entry) (upstream.Entry, error)
}
func registerPolicy(name string, p Policy) {
policies[strings.ToLower(name)] = p
}
// Get a Policy from the list
func Get(name string) (Policy, error) {
p, ok := policies[strings.ToLower(name)]
if !ok {
return nil, errors.Errorf("didn't find policy called %q", name)
}
return p, nil
}
func filterRO(ufs []*upstream.Fs) (wufs []*upstream.Fs) {
for _, u := range ufs {
if u.IsWritable() {
wufs = append(wufs, u)
}
}
return wufs
}
func filterROEntries(ue []upstream.Entry) (wue []upstream.Entry) {
for _, e := range ue {
if e.UpstreamFs().IsWritable() {
wue = append(wue, e)
}
}
return wue
}
func filterNC(ufs []*upstream.Fs) (wufs []*upstream.Fs) {
for _, u := range ufs {
if u.IsCreatable() {
wufs = append(wufs, u)
}
}
return wufs
}
func filterNCEntries(ue []upstream.Entry) (wue []upstream.Entry) {
for _, e := range ue {
if e.UpstreamFs().IsCreatable() {
wue = append(wue, e)
}
}
return wue
}
func parentDir(absPath string) string {
parent := path.Dir(strings.TrimRight(absPath, "/"))
if parent == "." {
parent = ""
}
return parent
}
func clean(absPath string) string {
cleanPath := path.Clean(absPath)
if cleanPath == "." {
cleanPath = ""
}
return cleanPath
}
func findEntry(ctx context.Context, f fs.Fs, remote string) fs.DirEntry {
remote = clean(remote)
dir := parentDir(remote)
entries, err := f.List(ctx, dir)
if remote == dir {
if err != nil {
return nil
}
// random modtime for root
randomNow := time.Unix(time.Now().Unix()-rand.Int63n(10000), 0)
return fs.NewDir("", randomNow)
}
found := false
for _, e := range entries {
eRemote := e.Remote()
if f.Features().CaseInsensitive {
found = strings.EqualFold(remote, eRemote)
} else {
found = (remote == eRemote)
}
if found {
return e
}
}
return nil
}

View File

@@ -0,0 +1,83 @@
package policy
import (
"context"
"math/rand"
"github.com/rclone/rclone/backend/union/upstream"
"github.com/rclone/rclone/fs"
)
func init() {
registerPolicy("rand", &Rand{})
}
// Rand stands for random
// Calls all and then randomizes. Returns one candidate.
type Rand struct {
All
}
func (p *Rand) rand(upstreams []*upstream.Fs) *upstream.Fs {
return upstreams[rand.Intn(len(upstreams))]
}
func (p *Rand) randEntries(entries []upstream.Entry) upstream.Entry {
return entries[rand.Intn(len(entries))]
}
// Action category policy, governing the modification of files and directories
func (p *Rand) Action(ctx context.Context, upstreams []*upstream.Fs, path string) ([]*upstream.Fs, error) {
upstreams, err := p.All.Action(ctx, upstreams, path)
if err != nil {
return nil, err
}
return []*upstream.Fs{p.rand(upstreams)}, nil
}
// ActionEntries is ACTION category policy but receving a set of candidate entries
func (p *Rand) ActionEntries(entries ...upstream.Entry) ([]upstream.Entry, error) {
entries, err := p.All.ActionEntries(entries...)
if err != nil {
return nil, err
}
return []upstream.Entry{p.randEntries(entries)}, nil
}
// Create category policy, governing the creation of files and directories
func (p *Rand) Create(ctx context.Context, upstreams []*upstream.Fs, path string) ([]*upstream.Fs, error) {
upstreams, err := p.All.Create(ctx, upstreams, path)
if err != nil {
return nil, err
}
return []*upstream.Fs{p.rand(upstreams)}, nil
}
// CreateEntries is CREATE category policy but receving a set of candidate entries
func (p *Rand) CreateEntries(entries ...upstream.Entry) ([]upstream.Entry, error) {
entries, err := p.All.CreateEntries(entries...)
if err != nil {
return nil, err
}
return []upstream.Entry{p.randEntries(entries)}, nil
}
// Search category policy, governing the access to files and directories
func (p *Rand) Search(ctx context.Context, upstreams []*upstream.Fs, path string) (*upstream.Fs, error) {
if len(upstreams) == 0 {
return nil, fs.ErrorObjectNotFound
}
upstreams, err := p.epall(ctx, upstreams, path)
if err != nil {
return nil, err
}
return p.rand(upstreams), nil
}
// SearchEntries is SEARCH category policy but receving a set of candidate entries
func (p *Rand) SearchEntries(entries ...upstream.Entry) (upstream.Entry, error) {
if len(entries) == 0 {
return nil, fs.ErrorObjectNotFound
}
return p.randEntries(entries), nil
}

View File

@@ -1,17 +1,20 @@
package union
import (
"bufio"
"context"
"fmt"
"io"
"path"
"path/filepath"
"strings"
"sync"
"time"
"github.com/pkg/errors"
"github.com/rclone/rclone/backend/union/policy"
"github.com/rclone/rclone/backend/union/upstream"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/cache"
"github.com/rclone/rclone/fs/config/configmap"
"github.com/rclone/rclone/fs/config/configstruct"
"github.com/rclone/rclone/fs/hash"
@@ -21,12 +24,32 @@ import (
func init() {
fsi := &fs.RegInfo{
Name: "union",
Description: "Union merges the contents of several remotes",
Description: "Union merges the contents of several upstream fs",
NewFs: NewFs,
Options: []fs.Option{{
Name: "remotes",
Help: "List of space separated remotes.\nCan be 'remotea:test/dir remoteb:', '\"remotea:test/space dir\" remoteb:', etc.\nThe last remote is used to write to.",
Name: "upstreams",
Help: "List of space separated upstreams.\nCan be 'upstreama:test/dir upstreamb:', '\"upstreama:test/space:ro dir\" upstreamb:', etc.\n",
Required: true,
}, {
Name: "action_policy",
Help: "Policy to choose upstream on ACTION category.",
Required: true,
Default: "epall",
}, {
Name: "create_policy",
Help: "Policy to choose upstream on CREATE category.",
Required: true,
Default: "epmfs",
}, {
Name: "search_policy",
Help: "Policy to choose upstream on SEARCH category.",
Required: true,
Default: "ff",
}, {
Name: "cache_time",
Help: "Cache time of usage and free space (in seconds)",
Required: true,
Default: 120,
}},
}
fs.Register(fsi)
@@ -34,39 +57,48 @@ func init() {
// Options defines the configuration for this backend
type Options struct {
Remotes fs.SpaceSepList `config:"remotes"`
Upstreams fs.SpaceSepList `config:"upstreams"`
Remotes fs.SpaceSepList `config:"remotes"` // Depreated
ActionPolicy string `config:"action_policy"`
CreatePolicy string `config:"create_policy"`
SearchPolicy string `config:"search_policy"`
CacheTime int `config:"cache_time"`
}
// Fs represents a union of remotes
// Fs represents a union of upstreams
type Fs struct {
name string // name of this remote
features *fs.Features // optional features
opt Options // options for this Fs
root string // the path we are working on
remotes []fs.Fs // slice of remotes
wr fs.Fs // writable remote
hashSet hash.Set // intersection of hash types
name string // name of this remote
features *fs.Features // optional features
opt Options // options for this Fs
root string // the path we are working on
upstreams []*upstream.Fs // slice of upstreams
hashSet hash.Set // intersection of hash types
actionPolicy policy.Policy // policy for ACTION
createPolicy policy.Policy // policy for CREATE
searchPolicy policy.Policy // policy for SEARCH
}
// Object describes a union Object
//
// This is a wrapped object which returns the Union Fs as its parent
type Object struct {
fs.Object
fs *Fs // what this object is part of
}
// Wrap an existing object in the union Object
func (f *Fs) wrapObject(o fs.Object) *Object {
return &Object{
Object: o,
fs: f,
// Wrap candidate objects in to an union Object
func (f *Fs) wrapEntries(entries ...upstream.Entry) (entry, error) {
e, err := f.searchEntries(entries...)
if err != nil {
return nil, err
}
switch e.(type) {
case *upstream.Object:
return &Object{
Object: e.(*upstream.Object),
fs: f,
co: entries,
}, nil
case *upstream.Directory:
return &Directory{
Directory: e.(*upstream.Directory),
cd: entries,
}, nil
default:
return nil, errors.Errorf("unknown object type %T", e)
}
}
// Fs returns the union Fs as the parent
func (o *Object) Fs() fs.Info {
return o.fs
}
// Name of the remote (as passed into NewFs)
@@ -91,7 +123,16 @@ func (f *Fs) Features() *fs.Features {
// Rmdir removes the root directory of the Fs object
func (f *Fs) Rmdir(ctx context.Context, dir string) error {
return f.wr.Rmdir(ctx, dir)
upstreams, err := f.action(ctx, dir)
if err != nil {
return err
}
errs := Errors(make([]error, len(upstreams)))
multithread(len(upstreams), func(i int) {
err := upstreams[i].Rmdir(ctx, dir)
errs[i] = errors.Wrap(err, upstreams[i].Name())
})
return errs.Err()
}
// Hashes returns hash.HashNone to indicate remote hashing is unavailable
@@ -101,7 +142,22 @@ func (f *Fs) Hashes() hash.Set {
// Mkdir makes the root directory of the Fs object
func (f *Fs) Mkdir(ctx context.Context, dir string) error {
return f.wr.Mkdir(ctx, dir)
upstreams, err := f.create(ctx, dir)
if err == fs.ErrorObjectNotFound && dir != parentDir(dir) {
if err := f.Mkdir(ctx, parentDir(dir)); err != nil {
return err
}
upstreams, err = f.create(ctx, dir)
}
if err != nil {
return err
}
errs := Errors(make([]error, len(upstreams)))
multithread(len(upstreams), func(i int) {
err := upstreams[i].Mkdir(ctx, dir)
errs[i] = errors.Wrap(err, upstreams[i].Name())
})
return errs.Err()
}
// Purge all files in the root and the root directory
@@ -111,7 +167,21 @@ func (f *Fs) Mkdir(ctx context.Context, dir string) error {
//
// Return an error if it doesn't exist
func (f *Fs) Purge(ctx context.Context) error {
return f.wr.Features().Purge(ctx)
for _, r := range f.upstreams {
if r.Features().Purge == nil {
return fs.ErrorCantPurge
}
}
upstreams, err := f.action(ctx, "")
if err != nil {
return err
}
errs := Errors(make([]error, len(upstreams)))
multithread(len(upstreams), func(i int) {
err := upstreams[i].Features().Purge(ctx)
errs[i] = errors.Wrap(err, upstreams[i].Name())
})
return errs.Err()
}
// Copy src to this remote using server side copy operations.
@@ -124,15 +194,26 @@ func (f *Fs) Purge(ctx context.Context) error {
//
// If it isn't possible then return fs.ErrorCantCopy
func (f *Fs) Copy(ctx context.Context, src fs.Object, remote string) (fs.Object, error) {
if src.Fs() != f.wr {
srcObj, ok := src.(*Object)
if !ok {
fs.Debugf(src, "Can't copy - not same remote type")
return nil, fs.ErrorCantCopy
}
o, err := f.wr.Features().Copy(ctx, src, remote)
if err != nil {
o := srcObj.UnWrap()
u := o.UpstreamFs()
do := u.Features().Copy
if do == nil {
return nil, fs.ErrorCantCopy
}
if !u.IsCreatable() {
return nil, fs.ErrorPermissionDenied
}
co, err := do(ctx, o, remote)
if err != nil || co == nil {
return nil, err
}
return f.wrapObject(o), nil
wo, err := f.wrapEntries(u.WrapObject(co))
return wo.(*Object), err
}
// Move src to this remote using server side move operations.
@@ -145,15 +226,47 @@ func (f *Fs) Copy(ctx context.Context, src fs.Object, remote string) (fs.Object,
//
// If it isn't possible then return fs.ErrorCantMove
func (f *Fs) Move(ctx context.Context, src fs.Object, remote string) (fs.Object, error) {
if src.Fs() != f.wr {
o, ok := src.(*Object)
if !ok {
fs.Debugf(src, "Can't move - not same remote type")
return nil, fs.ErrorCantMove
}
o, err := f.wr.Features().Move(ctx, src, remote)
entries, err := f.actionEntries(o.candidates()...)
if err != nil {
return nil, err
}
return f.wrapObject(o), err
for _, e := range entries {
if e.UpstreamFs().Features().Move == nil {
return nil, fs.ErrorCantMove
}
}
objs := make([]*upstream.Object, len(entries))
errs := Errors(make([]error, len(entries)))
multithread(len(entries), func(i int) {
u := entries[i].UpstreamFs()
o, ok := entries[i].(*upstream.Object)
if !ok {
errs[i] = errors.Wrap(fs.ErrorNotAFile, u.Name())
return
}
mo, err := u.Features().Move(ctx, o.UnWrap(), remote)
if err != nil || mo == nil {
errs[i] = errors.Wrap(err, u.Name())
return
}
objs[i] = u.WrapObject(mo)
})
var en []upstream.Entry
for _, o := range objs {
if o != nil {
en = append(en, o)
}
}
e, err := f.wrapEntries(en...)
if err != nil {
return nil, err
}
return e.(*Object), errs.Err()
}
// DirMove moves src, srcRemote to this remote at dstRemote
@@ -165,12 +278,46 @@ func (f *Fs) Move(ctx context.Context, src fs.Object, remote string) (fs.Object,
//
// If destination exists then return fs.ErrorDirExists
func (f *Fs) DirMove(ctx context.Context, src fs.Fs, srcRemote, dstRemote string) error {
srcFs, ok := src.(*Fs)
sfs, ok := src.(*Fs)
if !ok {
fs.Debugf(srcFs, "Can't move directory - not same remote type")
fs.Debugf(src, "Can't move directory - not same remote type")
return fs.ErrorCantDirMove
}
return f.wr.Features().DirMove(ctx, srcFs.wr, srcRemote, dstRemote)
upstreams, err := sfs.action(ctx, srcRemote)
if err != nil {
return err
}
for _, u := range upstreams {
if u.Features().DirMove == nil {
return fs.ErrorCantDirMove
}
}
errs := Errors(make([]error, len(upstreams)))
multithread(len(upstreams), func(i int) {
su := upstreams[i]
var du *upstream.Fs
for _, u := range f.upstreams {
if u.RootFs.Root() == su.RootFs.Root() {
du = u
}
}
if du == nil {
errs[i] = errors.Wrap(fs.ErrorCantDirMove, su.Name()+":"+su.Root())
return
}
err := du.Features().DirMove(ctx, su.Fs, srcRemote, dstRemote)
errs[i] = errors.Wrap(err, du.Name()+":"+du.Root())
})
errs = errs.FilterNil()
if len(errs) == 0 {
return nil
}
for _, e := range errs {
if errors.Cause(e) != fs.ErrorDirExists {
return errs
}
}
return fs.ErrorDirExists
}
// ChangeNotify calls the passed function with a path
@@ -183,23 +330,23 @@ func (f *Fs) DirMove(ctx context.Context, src fs.Fs, srcRemote, dstRemote string
// regularly. When the channel gets closed, the implementation
// should stop polling and release resources.
func (f *Fs) ChangeNotify(ctx context.Context, fn func(string, fs.EntryType), ch <-chan time.Duration) {
var remoteChans []chan time.Duration
var uChans []chan time.Duration
for _, remote := range f.remotes {
if ChangeNotify := remote.Features().ChangeNotify; ChangeNotify != nil {
for _, u := range f.upstreams {
if ChangeNotify := u.Features().ChangeNotify; ChangeNotify != nil {
ch := make(chan time.Duration)
remoteChans = append(remoteChans, ch)
uChans = append(uChans, ch)
ChangeNotify(ctx, fn, ch)
}
}
go func() {
for i := range ch {
for _, c := range remoteChans {
for _, c := range uChans {
c <- i
}
}
for _, c := range remoteChans {
for _, c := range uChans {
close(c)
}
}()
@@ -208,10 +355,103 @@ func (f *Fs) ChangeNotify(ctx context.Context, fn func(string, fs.EntryType), ch
// DirCacheFlush resets the directory cache - used in testing
// as an optional interface
func (f *Fs) DirCacheFlush() {
for _, remote := range f.remotes {
if DirCacheFlush := remote.Features().DirCacheFlush; DirCacheFlush != nil {
DirCacheFlush()
multithread(len(f.upstreams), func(i int) {
if do := f.upstreams[i].Features().DirCacheFlush; do != nil {
do()
}
})
}
func (f *Fs) put(ctx context.Context, in io.Reader, src fs.ObjectInfo, stream bool, options ...fs.OpenOption) (fs.Object, error) {
srcPath := src.Remote()
upstreams, err := f.create(ctx, srcPath)
if err == fs.ErrorObjectNotFound {
if err := f.Mkdir(ctx, parentDir(srcPath)); err != nil {
return nil, err
}
upstreams, err = f.create(ctx, srcPath)
}
if err != nil {
return nil, err
}
if len(upstreams) == 1 {
u := upstreams[0]
var o fs.Object
var err error
if stream {
o, err = u.Features().PutStream(ctx, in, src, options...)
} else {
o, err = u.Put(ctx, in, src, options...)
}
if err != nil {
return nil, err
}
e, err := f.wrapEntries(u.WrapObject(o))
return e.(*Object), err
}
errs := Errors(make([]error, len(upstreams)+1))
// Get multiple reader
readers := make([]io.Reader, len(upstreams))
writers := make([]io.Writer, len(upstreams))
for i := range writers {
r, w := io.Pipe()
bw := bufio.NewWriter(w)
readers[i], writers[i] = r, bw
defer func() {
err := w.Close()
if err != nil {
panic(err)
}
}()
}
go func() {
mw := io.MultiWriter(writers...)
es := make([]error, len(writers)+1)
_, es[len(es)-1] = io.Copy(mw, in)
for i, bw := range writers {
es[i] = bw.(*bufio.Writer).Flush()
}
errs[len(upstreams)] = Errors(es).Err()
}()
// Multi-threading
objs := make([]upstream.Entry, len(upstreams))
multithread(len(upstreams), func(i int) {
u := upstreams[i]
var o fs.Object
var err error
if stream {
o, err = u.Features().PutStream(ctx, readers[i], src, options...)
} else {
o, err = u.Put(ctx, readers[i], src, options...)
}
if err != nil {
errs[i] = errors.Wrap(err, u.Name())
return
}
objs[i] = u.WrapObject(o)
})
err = errs.Err()
if err != nil {
return nil, err
}
e, err := f.wrapEntries(objs...)
return e.(*Object), err
}
// Put in to the remote path with the modTime given of the given size
//
// May create the object even if it returns an error - if so
// will return the object and the error, otherwise will return
// nil and the error
func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
o, err := f.NewObject(ctx, src.Remote())
switch err {
case nil:
return o, o.Update(ctx, in, src, options...)
case fs.ErrorObjectNotFound:
return f.put(ctx, in, src, false, options...)
default:
return nil, err
}
}
@@ -221,29 +461,64 @@ func (f *Fs) DirCacheFlush() {
// will return the object and the error, otherwise will return
// nil and the error
func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
o, err := f.wr.Features().PutStream(ctx, in, src, options...)
if err != nil {
o, err := f.NewObject(ctx, src.Remote())
switch err {
case nil:
return o, o.Update(ctx, in, src, options...)
case fs.ErrorObjectNotFound:
return f.put(ctx, in, src, true, options...)
default:
return nil, err
}
return f.wrapObject(o), err
}
// About gets quota information from the Fs
func (f *Fs) About(ctx context.Context) (*fs.Usage, error) {
return f.wr.Features().About(ctx)
}
// Put in to the remote path with the modTime given of the given size
//
// May create the object even if it returns an error - if so
// will return the object and the error, otherwise will return
// nil and the error
func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
o, err := f.wr.Put(ctx, in, src, options...)
if err != nil {
return nil, err
usage := &fs.Usage{
Total: new(int64),
Used: new(int64),
Trashed: new(int64),
Other: new(int64),
Free: new(int64),
Objects: new(int64),
}
return f.wrapObject(o), err
for _, u := range f.upstreams {
usg, err := u.About(ctx)
if err != nil {
return nil, err
}
if usg.Total != nil && usage.Total != nil {
*usage.Total += *usg.Total
} else {
usage.Total = nil
}
if usg.Used != nil && usage.Used != nil {
*usage.Used += *usg.Used
} else {
usage.Used = nil
}
if usg.Trashed != nil && usage.Trashed != nil {
*usage.Trashed += *usg.Trashed
} else {
usage.Trashed = nil
}
if usg.Other != nil && usage.Other != nil {
*usage.Other += *usg.Other
} else {
usage.Other = nil
}
if usg.Free != nil && usage.Free != nil {
*usage.Free += *usg.Free
} else {
usage.Free = nil
}
if usg.Objects != nil && usage.Objects != nil {
*usage.Objects += *usg.Objects
} else {
usage.Objects = nil
}
}
return usage, nil
}
// List the objects and directories in dir into entries. The
@@ -256,60 +531,125 @@ func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options .
// This should return ErrDirNotFound if the directory isn't
// found.
func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) {
set := make(map[string]fs.DirEntry)
found := false
for _, remote := range f.remotes {
var remoteEntries, err = remote.List(ctx, dir)
if err == fs.ErrorDirNotFound {
continue
}
entriess := make([][]upstream.Entry, len(f.upstreams))
errs := Errors(make([]error, len(f.upstreams)))
multithread(len(f.upstreams), func(i int) {
u := f.upstreams[i]
entries, err := u.List(ctx, dir)
if err != nil {
return nil, errors.Wrapf(err, "List failed on %v", remote)
errs[i] = errors.Wrap(err, u.Name())
return
}
found = true
for _, remoteEntry := range remoteEntries {
set[remoteEntry.Remote()] = remoteEntry
uEntries := make([]upstream.Entry, len(entries))
for j, e := range entries {
uEntries[j], _ = u.WrapEntry(e)
}
}
if !found {
return nil, fs.ErrorDirNotFound
}
for _, entry := range set {
if o, ok := entry.(fs.Object); ok {
entry = f.wrapObject(o)
entriess[i] = uEntries
})
if len(errs) == len(errs.FilterNil()) {
errs = errs.Map(func(e error) error {
if errors.Cause(e) == fs.ErrorDirNotFound {
return nil
}
return e
})
if len(errs) == 0 {
return nil, fs.ErrorDirNotFound
}
entries = append(entries, entry)
return nil, errs.Err()
}
return entries, nil
return f.mergeDirEntries(entriess)
}
// NewObject creates a new remote union file object based on the first Object it finds (reverse remote order)
func (f *Fs) NewObject(ctx context.Context, path string) (fs.Object, error) {
for i := range f.remotes {
var remote = f.remotes[len(f.remotes)-i-1]
var obj, err = remote.NewObject(ctx, path)
if err == fs.ErrorObjectNotFound {
continue
// NewObject creates a new remote union file object
func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
objs := make([]*upstream.Object, len(f.upstreams))
errs := Errors(make([]error, len(f.upstreams)))
multithread(len(f.upstreams), func(i int) {
u := f.upstreams[i]
o, err := u.NewObject(ctx, remote)
if err != nil && err != fs.ErrorObjectNotFound {
errs[i] = errors.Wrap(err, u.Name())
return
}
if err != nil {
return nil, errors.Wrapf(err, "NewObject failed on %v", remote)
objs[i] = u.WrapObject(o)
})
var entries []upstream.Entry
for _, o := range objs {
if o != nil {
entries = append(entries, o)
}
return f.wrapObject(obj), nil
}
return nil, fs.ErrorObjectNotFound
if len(entries) == 0 {
return nil, fs.ErrorObjectNotFound
}
e, err := f.wrapEntries(entries...)
if err != nil {
return nil, err
}
return e.(*Object), errs.Err()
}
// Precision is the greatest Precision of all remotes
// Precision is the greatest Precision of all upstreams
func (f *Fs) Precision() time.Duration {
var greatestPrecision time.Duration
for _, remote := range f.remotes {
if remote.Precision() > greatestPrecision {
greatestPrecision = remote.Precision()
for _, u := range f.upstreams {
if u.Precision() > greatestPrecision {
greatestPrecision = u.Precision()
}
}
return greatestPrecision
}
func (f *Fs) action(ctx context.Context, path string) ([]*upstream.Fs, error) {
return f.actionPolicy.Action(ctx, f.upstreams, path)
}
func (f *Fs) actionEntries(entries ...upstream.Entry) ([]upstream.Entry, error) {
return f.actionPolicy.ActionEntries(entries...)
}
func (f *Fs) create(ctx context.Context, path string) ([]*upstream.Fs, error) {
return f.createPolicy.Create(ctx, f.upstreams, path)
}
func (f *Fs) createEntries(entries ...upstream.Entry) ([]upstream.Entry, error) {
return f.createPolicy.CreateEntries(entries...)
}
func (f *Fs) search(ctx context.Context, path string) (*upstream.Fs, error) {
return f.searchPolicy.Search(ctx, f.upstreams, path)
}
func (f *Fs) searchEntries(entries ...upstream.Entry) (upstream.Entry, error) {
return f.searchPolicy.SearchEntries(entries...)
}
func (f *Fs) mergeDirEntries(entriess [][]upstream.Entry) (fs.DirEntries, error) {
entryMap := make(map[string]([]upstream.Entry))
for _, en := range entriess {
if en == nil {
continue
}
for _, entry := range en {
remote := entry.Remote()
if f.Features().CaseInsensitive {
remote = strings.ToLower(remote)
}
entryMap[remote] = append(entryMap[remote], entry)
}
}
var entries fs.DirEntries
for path := range entryMap {
e, err := f.wrapEntries(entryMap[path]...)
if err != nil {
return nil, err
}
entries = append(entries, e)
}
return entries, nil
}
// NewFs constructs an Fs from the path.
//
// The returned Fs is the actual Fs, referenced by remote in the config
@@ -320,51 +660,64 @@ func NewFs(name, root string, m configmap.Mapper) (fs.Fs, error) {
if err != nil {
return nil, err
}
if len(opt.Remotes) == 0 {
return nil, errors.New("union can't point to an empty remote - check the value of the remotes setting")
// Backward compatible to old config
if len(opt.Upstreams) == 0 && len(opt.Remotes) > 0 {
for i := 0; i < len(opt.Remotes)-1; i++ {
opt.Remotes[i] = opt.Remotes[i] + ":ro"
}
opt.Upstreams = opt.Remotes
}
if len(opt.Remotes) == 1 {
return nil, errors.New("union can't point to a single remote - check the value of the remotes setting")
if len(opt.Upstreams) == 0 {
return nil, errors.New("union can't point to an empty upstream - check the value of the upstreams setting")
}
for _, remote := range opt.Remotes {
if strings.HasPrefix(remote, name+":") {
return nil, errors.New("can't point union remote at itself - check the value of the remote setting")
if len(opt.Upstreams) == 1 {
return nil, errors.New("union can't point to a single upstream - check the value of the upstreams setting")
}
for _, u := range opt.Upstreams {
if strings.HasPrefix(u, name+":") {
return nil, errors.New("can't point union remote at itself - check the value of the upstreams setting")
}
}
var remotes []fs.Fs
for i := range opt.Remotes {
// Last remote first so we return the correct (last) matching fs in case of fs.ErrorIsFile
var remote = opt.Remotes[len(opt.Remotes)-i-1]
_, configName, fsPath, err := fs.ParseRemote(remote)
if err != nil {
upstreams := make([]*upstream.Fs, len(opt.Upstreams))
errs := Errors(make([]error, len(opt.Upstreams)))
multithread(len(opt.Upstreams), func(i int) {
u := opt.Upstreams[i]
upstreams[i], errs[i] = upstream.New(u, root, time.Duration(opt.CacheTime)*time.Second)
})
var usedUpstreams []*upstream.Fs
var fserr error
for i, err := range errs {
if err != nil && err != fs.ErrorIsFile {
return nil, err
}
var rootString = path.Join(fsPath, filepath.ToSlash(root))
if configName != "local" {
rootString = configName + ":" + rootString
// Only the upstreams returns ErrorIsFile would be used if any
if err == fs.ErrorIsFile {
usedUpstreams = append(usedUpstreams, upstreams[i])
fserr = fs.ErrorIsFile
}
myFs, err := cache.Get(rootString)
if err != nil {
if err == fs.ErrorIsFile {
return myFs, err
}
return nil, err
}
remotes = append(remotes, myFs)
}
// Reverse the remotes again so they are in the order as before
for i, j := 0, len(remotes)-1; i < j; i, j = i+1, j-1 {
remotes[i], remotes[j] = remotes[j], remotes[i]
if fserr == nil {
usedUpstreams = upstreams
}
f := &Fs{
name: name,
root: root,
opt: *opt,
remotes: remotes,
wr: remotes[len(remotes)-1],
name: name,
root: root,
opt: *opt,
upstreams: usedUpstreams,
}
f.actionPolicy, err = policy.Get(opt.ActionPolicy)
if err != nil {
return nil, err
}
f.createPolicy, err = policy.Get(opt.CreatePolicy)
if err != nil {
return nil, err
}
f.searchPolicy, err = policy.Get(opt.SearchPolicy)
if err != nil {
return nil, err
}
var features = (&fs.Features{
CaseInsensitive: true,
@@ -376,9 +729,14 @@ func NewFs(name, root string, m configmap.Mapper) (fs.Fs, error) {
SetTier: true,
GetTier: true,
}).Fill(f)
features = features.Mask(f.wr) // mask the features just on the writable fs
for _, f := range upstreams {
if !f.IsWritable() {
continue
}
features = features.Mask(f) // Mask all writable upstream fs
}
// Really need the union of all remotes for these, so
// Really need the union of all upstreams for these, so
// re-instate and calculate separately.
features.ChangeNotify = f.ChangeNotify
features.DirCacheFlush = f.DirCacheFlush
@@ -388,12 +746,12 @@ func NewFs(name, root string, m configmap.Mapper) (fs.Fs, error) {
// Clear ChangeNotify and DirCacheFlush if all are nil
clearChangeNotify := true
clearDirCacheFlush := true
for _, remote := range f.remotes {
remoteFeatures := remote.Features()
if remoteFeatures.ChangeNotify != nil {
for _, u := range f.upstreams {
uFeatures := u.Features()
if uFeatures.ChangeNotify != nil {
clearChangeNotify = false
}
if remoteFeatures.DirCacheFlush != nil {
if uFeatures.DirCacheFlush != nil {
clearDirCacheFlush = false
}
}
@@ -407,13 +765,34 @@ func NewFs(name, root string, m configmap.Mapper) (fs.Fs, error) {
f.features = features
// Get common intersection of hashes
hashSet := f.remotes[0].Hashes()
for _, remote := range f.remotes[1:] {
hashSet = hashSet.Overlap(remote.Hashes())
hashSet := f.upstreams[0].Hashes()
for _, u := range f.upstreams[1:] {
hashSet = hashSet.Overlap(u.Hashes())
}
f.hashSet = hashSet
return f, nil
return f, fserr
}
func parentDir(absPath string) string {
parent := path.Dir(strings.TrimRight(filepath.ToSlash(absPath), "/"))
if parent == "." {
parent = ""
}
return parent
}
func multithread(num int, fn func(int)) {
var wg sync.WaitGroup
for i := 0; i < num; i++ {
wg.Add(1)
i := i
go func() {
defer wg.Done()
fn(i)
}()
}
wg.Wait()
}
// Check the interfaces are satisfied

View File

@@ -2,17 +2,154 @@
package union_test
import (
"os"
"path/filepath"
"testing"
_ "github.com/rclone/rclone/backend/local"
"github.com/rclone/rclone/fstest"
"github.com/rclone/rclone/fstest/fstests"
"github.com/stretchr/testify/require"
)
// TestIntegration runs integration tests against the remote
func TestIntegration(t *testing.T) {
if *fstest.RemoteName == "" {
t.Skip("Skipping as -remote not set")
}
fstests.Run(t, &fstests.Opt{
RemoteName: "TestUnion:",
NilObject: nil,
SkipFsMatch: true,
RemoteName: *fstest.RemoteName,
UnimplementableFsMethods: []string{"OpenWriterAt", "DuplicateFiles"},
UnimplementableObjectMethods: []string{"MimeType"},
})
}
func TestStandard(t *testing.T) {
if *fstest.RemoteName != "" {
t.Skip("Skipping as -remote set")
}
tempdir1 := filepath.Join(os.TempDir(), "rclone-union-test-standard1")
tempdir2 := filepath.Join(os.TempDir(), "rclone-union-test-standard2")
tempdir3 := filepath.Join(os.TempDir(), "rclone-union-test-standard3")
require.NoError(t, os.MkdirAll(tempdir1, 0744))
require.NoError(t, os.MkdirAll(tempdir2, 0744))
require.NoError(t, os.MkdirAll(tempdir3, 0744))
upstreams := tempdir1 + " " + tempdir2 + " " + tempdir3
name := "TestUnion"
fstests.Run(t, &fstests.Opt{
RemoteName: name + ":",
ExtraConfig: []fstests.ExtraConfigItem{
{Name: name, Key: "type", Value: "union"},
{Name: name, Key: "upstreams", Value: upstreams},
{Name: name, Key: "action_policy", Value: "epall"},
{Name: name, Key: "create_policy", Value: "epmfs"},
{Name: name, Key: "search_policy", Value: "ff"},
},
UnimplementableFsMethods: []string{"OpenWriterAt", "DuplicateFiles"},
UnimplementableObjectMethods: []string{"MimeType"},
})
}
func TestRO(t *testing.T) {
if *fstest.RemoteName != "" {
t.Skip("Skipping as -remote set")
}
tempdir1 := filepath.Join(os.TempDir(), "rclone-union-test-ro1")
tempdir2 := filepath.Join(os.TempDir(), "rclone-union-test-ro2")
tempdir3 := filepath.Join(os.TempDir(), "rclone-union-test-ro3")
require.NoError(t, os.MkdirAll(tempdir1, 0744))
require.NoError(t, os.MkdirAll(tempdir2, 0744))
require.NoError(t, os.MkdirAll(tempdir3, 0744))
upstreams := tempdir1 + " " + tempdir2 + ":ro " + tempdir3 + ":ro"
name := "TestUnionRO"
fstests.Run(t, &fstests.Opt{
RemoteName: name + ":",
ExtraConfig: []fstests.ExtraConfigItem{
{Name: name, Key: "type", Value: "union"},
{Name: name, Key: "upstreams", Value: upstreams},
{Name: name, Key: "action_policy", Value: "epall"},
{Name: name, Key: "create_policy", Value: "epmfs"},
{Name: name, Key: "search_policy", Value: "ff"},
},
UnimplementableFsMethods: []string{"OpenWriterAt", "DuplicateFiles"},
UnimplementableObjectMethods: []string{"MimeType"},
})
}
func TestNC(t *testing.T) {
if *fstest.RemoteName != "" {
t.Skip("Skipping as -remote set")
}
tempdir1 := filepath.Join(os.TempDir(), "rclone-union-test-nc1")
tempdir2 := filepath.Join(os.TempDir(), "rclone-union-test-nc2")
tempdir3 := filepath.Join(os.TempDir(), "rclone-union-test-nc3")
require.NoError(t, os.MkdirAll(tempdir1, 0744))
require.NoError(t, os.MkdirAll(tempdir2, 0744))
require.NoError(t, os.MkdirAll(tempdir3, 0744))
upstreams := tempdir1 + " " + tempdir2 + ":nc " + tempdir3 + ":nc"
name := "TestUnionNC"
fstests.Run(t, &fstests.Opt{
RemoteName: name + ":",
ExtraConfig: []fstests.ExtraConfigItem{
{Name: name, Key: "type", Value: "union"},
{Name: name, Key: "upstreams", Value: upstreams},
{Name: name, Key: "action_policy", Value: "epall"},
{Name: name, Key: "create_policy", Value: "epmfs"},
{Name: name, Key: "search_policy", Value: "ff"},
},
UnimplementableFsMethods: []string{"OpenWriterAt", "DuplicateFiles"},
UnimplementableObjectMethods: []string{"MimeType"},
})
}
func TestPolicy1(t *testing.T) {
if *fstest.RemoteName != "" {
t.Skip("Skipping as -remote set")
}
tempdir1 := filepath.Join(os.TempDir(), "rclone-union-test-policy11")
tempdir2 := filepath.Join(os.TempDir(), "rclone-union-test-policy12")
tempdir3 := filepath.Join(os.TempDir(), "rclone-union-test-policy13")
require.NoError(t, os.MkdirAll(tempdir1, 0744))
require.NoError(t, os.MkdirAll(tempdir2, 0744))
require.NoError(t, os.MkdirAll(tempdir3, 0744))
upstreams := tempdir1 + " " + tempdir2 + " " + tempdir3
name := "TestUnionPolicy1"
fstests.Run(t, &fstests.Opt{
RemoteName: name + ":",
ExtraConfig: []fstests.ExtraConfigItem{
{Name: name, Key: "type", Value: "union"},
{Name: name, Key: "upstreams", Value: upstreams},
{Name: name, Key: "action_policy", Value: "all"},
{Name: name, Key: "create_policy", Value: "lus"},
{Name: name, Key: "search_policy", Value: "all"},
},
UnimplementableFsMethods: []string{"OpenWriterAt", "DuplicateFiles"},
UnimplementableObjectMethods: []string{"MimeType"},
})
}
func TestPolicy2(t *testing.T) {
if *fstest.RemoteName != "" {
t.Skip("Skipping as -remote set")
}
tempdir1 := filepath.Join(os.TempDir(), "rclone-union-test-policy21")
tempdir2 := filepath.Join(os.TempDir(), "rclone-union-test-policy22")
tempdir3 := filepath.Join(os.TempDir(), "rclone-union-test-policy23")
require.NoError(t, os.MkdirAll(tempdir1, 0744))
require.NoError(t, os.MkdirAll(tempdir2, 0744))
require.NoError(t, os.MkdirAll(tempdir3, 0744))
upstreams := tempdir1 + " " + tempdir2 + " " + tempdir3
name := "TestUnionPolicy2"
fstests.Run(t, &fstests.Opt{
RemoteName: name + ":",
ExtraConfig: []fstests.ExtraConfigItem{
{Name: name, Key: "type", Value: "union"},
{Name: name, Key: "upstreams", Value: upstreams},
{Name: name, Key: "action_policy", Value: "all"},
{Name: name, Key: "create_policy", Value: "rand"},
{Name: name, Key: "search_policy", Value: "ff"},
},
UnimplementableFsMethods: []string{"OpenWriterAt", "DuplicateFiles"},
UnimplementableObjectMethods: []string{"MimeType"},
})
}

View File

@@ -0,0 +1,348 @@
package upstream
import (
"context"
"io"
"math"
"path"
"path/filepath"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/pkg/errors"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/cache"
)
var (
// ErrUsageFieldNotSupported stats the usage field is not supported by the backend
ErrUsageFieldNotSupported = errors.New("this usage field is not supported")
)
// Fs is a wrap of any fs and its configs
type Fs struct {
fs.Fs
RootFs fs.Fs
RootPath string
writable bool
creatable bool
usage *fs.Usage // Cache the usage
cacheTime time.Duration // cache duration
cacheExpiry int64 // usage cache expiry time
cacheMutex sync.RWMutex
cacheOnce sync.Once
cacheUpdate bool // if the cache is updating
}
// Directory describes a wrapped Directory
//
// This is a wrapped Directory which contains the upstream Fs
type Directory struct {
fs.Directory
f *Fs
}
// Object describes a wrapped Object
//
// This is a wrapped Object which contains the upstream Fs
type Object struct {
fs.Object
f *Fs
}
// Entry describe a warpped fs.DirEntry interface with the
// information of upstream Fs
type Entry interface {
fs.DirEntry
UpstreamFs() *Fs
}
// New creates a new Fs based on the
// string formatted `type:root_path(:ro/:nc)`
func New(remote, root string, cacheTime time.Duration) (*Fs, error) {
_, configName, fsPath, err := fs.ParseRemote(remote)
if err != nil {
return nil, err
}
f := &Fs{
RootPath: root,
writable: true,
creatable: true,
cacheExpiry: time.Now().Unix(),
cacheTime: cacheTime,
usage: &fs.Usage{},
}
if strings.HasSuffix(fsPath, ":ro") {
f.writable = false
f.creatable = false
fsPath = fsPath[0 : len(fsPath)-3]
} else if strings.HasSuffix(fsPath, ":nc") {
f.writable = true
f.creatable = false
fsPath = fsPath[0 : len(fsPath)-3]
}
if configName != "local" {
fsPath = configName + ":" + fsPath
}
rFs, err := cache.Get(fsPath)
if err != nil && err != fs.ErrorIsFile {
return nil, err
}
f.RootFs = rFs
rootString := path.Join(fsPath, filepath.ToSlash(root))
myFs, err := cache.Get(rootString)
if err != nil && err != fs.ErrorIsFile {
return nil, err
}
f.Fs = myFs
return f, err
}
// WrapDirectory wraps a fs.Directory to include the info
// of the upstream Fs
func (f *Fs) WrapDirectory(e fs.Directory) *Directory {
if e == nil {
return nil
}
return &Directory{
Directory: e,
f: f,
}
}
// WrapObject wraps a fs.Object to include the info
// of the upstream Fs
func (f *Fs) WrapObject(o fs.Object) *Object {
if o == nil {
return nil
}
return &Object{
Object: o,
f: f,
}
}
// WrapEntry wraps a fs.DirEntry to include the info
// of the upstream Fs
func (f *Fs) WrapEntry(e fs.DirEntry) (Entry, error) {
switch e.(type) {
case fs.Object:
return f.WrapObject(e.(fs.Object)), nil
case fs.Directory:
return f.WrapDirectory(e.(fs.Directory)), nil
default:
return nil, errors.Errorf("unknown object type %T", e)
}
}
// UpstreamFs get the upstream Fs the entry is stored in
func (e *Directory) UpstreamFs() *Fs {
return e.f
}
// UpstreamFs get the upstream Fs the entry is stored in
func (o *Object) UpstreamFs() *Fs {
return o.f
}
// UnWrap returns the Object that this Object is wrapping or
// nil if it isn't wrapping anything
func (o *Object) UnWrap() fs.Object {
return o.Object
}
// IsCreatable return if the fs is allowed to create new objects
func (f *Fs) IsCreatable() bool {
return f.creatable
}
// IsWritable return if the fs is allowed to write
func (f *Fs) IsWritable() bool {
return f.writable
}
// Put in to the remote path with the modTime given of the given size
//
// May create the object even if it returns an error - if so
// will return the object and the error, otherwise will return
// nil and the error
func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
o, err := f.Fs.Put(ctx, in, src, options...)
if err != nil {
return o, err
}
f.cacheMutex.Lock()
defer f.cacheMutex.Unlock()
size := src.Size()
if f.usage.Used != nil {
*f.usage.Used += size
}
if f.usage.Free != nil {
*f.usage.Free -= size
}
if f.usage.Objects != nil {
*f.usage.Objects++
}
return o, nil
}
// PutStream uploads to the remote path with the modTime given of indeterminate size
//
// May create the object even if it returns an error - if so
// will return the object and the error, otherwise will return
// nil and the error
func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
do := f.Features().PutStream
if do == nil {
return nil, fs.ErrorNotImplemented
}
o, err := do(ctx, in, src, options...)
if err != nil {
return o, err
}
f.cacheMutex.Lock()
defer f.cacheMutex.Unlock()
size := o.Size()
if f.usage.Used != nil {
*f.usage.Used += size
}
if f.usage.Free != nil {
*f.usage.Free -= size
}
if f.usage.Objects != nil {
*f.usage.Objects++
}
return o, nil
}
// Update in to the object with the modTime given of the given size
//
// When called from outside a Fs by rclone, src.Size() will always be >= 0.
// But for unknown-sized objects (indicated by src.Size() == -1), Upload should either
// return an error or update the object properly (rather than e.g. calling panic).
func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error {
size := o.Size()
err := o.Object.Update(ctx, in, src, options...)
if err != nil {
return err
}
o.f.cacheMutex.Lock()
defer o.f.cacheMutex.Unlock()
delta := o.Size() - size
if delta <= 0 {
return nil
}
if o.f.usage.Used != nil {
*o.f.usage.Used += size
}
if o.f.usage.Free != nil {
*o.f.usage.Free -= size
}
return nil
}
// About gets quota information from the Fs
func (f *Fs) About(ctx context.Context) (*fs.Usage, error) {
if atomic.LoadInt64(&f.cacheExpiry) <= time.Now().Unix() {
err := f.updateUsage()
if err != nil {
return nil, ErrUsageFieldNotSupported
}
}
f.cacheMutex.RLock()
defer f.cacheMutex.RUnlock()
return f.usage, nil
}
// GetFreeSpace get the free space of the fs
func (f *Fs) GetFreeSpace() (int64, error) {
if atomic.LoadInt64(&f.cacheExpiry) <= time.Now().Unix() {
err := f.updateUsage()
if err != nil {
return math.MaxInt64, ErrUsageFieldNotSupported
}
}
f.cacheMutex.RLock()
defer f.cacheMutex.RUnlock()
if f.usage.Free == nil {
return math.MaxInt64, ErrUsageFieldNotSupported
}
return *f.usage.Free, nil
}
// GetUsedSpace get the used space of the fs
func (f *Fs) GetUsedSpace() (int64, error) {
if atomic.LoadInt64(&f.cacheExpiry) <= time.Now().Unix() {
err := f.updateUsage()
if err != nil {
return 0, ErrUsageFieldNotSupported
}
}
f.cacheMutex.RLock()
defer f.cacheMutex.RUnlock()
if f.usage.Used == nil {
return 0, ErrUsageFieldNotSupported
}
return *f.usage.Used, nil
}
// GetNumObjects get the number of objects of the fs
func (f *Fs) GetNumObjects() (int64, error) {
if atomic.LoadInt64(&f.cacheExpiry) <= time.Now().Unix() {
err := f.updateUsage()
if err != nil {
return 0, ErrUsageFieldNotSupported
}
}
f.cacheMutex.RLock()
defer f.cacheMutex.RUnlock()
if f.usage.Objects == nil {
return 0, ErrUsageFieldNotSupported
}
return *f.usage.Objects, nil
}
func (f *Fs) updateUsage() (err error) {
if do := f.RootFs.Features().About; do == nil {
return ErrUsageFieldNotSupported
}
done := false
f.cacheOnce.Do(func() {
f.cacheMutex.Lock()
err = f.updateUsageCore(false)
f.cacheMutex.Unlock()
done = true
})
if done {
return err
}
if !f.cacheUpdate {
f.cacheUpdate = true
go func() {
_ = f.updateUsageCore(true)
f.cacheUpdate = false
}()
}
return nil
}
func (f *Fs) updateUsageCore(lock bool) error {
// Run in background, should not be cancelled by user
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
defer cancel()
usage, err := f.RootFs.Features().About(ctx)
if err != nil {
f.cacheUpdate = false
return err
}
if lock {
f.cacheMutex.Lock()
defer f.cacheMutex.Unlock()
}
// Store usage
atomic.StoreInt64(&f.cacheExpiry, time.Now().Add(f.cacheTime).Unix())
f.usage = usage
return nil
}

View File

@@ -989,13 +989,14 @@ func (f *Fs) About(ctx context.Context) (*fs.Usage, error) {
return nil, errors.Wrap(err, "about call failed")
}
usage := &fs.Usage{}
if q.Available != 0 || q.Used != 0 {
if q.Available >= 0 && q.Used >= 0 {
usage.Total = fs.NewUsageValue(q.Available + q.Used)
}
if q.Used >= 0 {
usage.Used = fs.NewUsageValue(q.Used)
}
if q.Used >= 0 {
usage.Used = fs.NewUsageValue(q.Used)
}
if q.Available >= 0 {
usage.Free = fs.NewUsageValue(q.Available)
}
if q.Available >= 0 && q.Used >= 0 {
usage.Total = fs.NewUsageValue(q.Available + q.Used)
}
return usage, nil
}

View File

@@ -322,6 +322,9 @@ be copied to the vfs cache before opening with --vfs-cache-mode full.
VolumeName = strings.Replace(VolumeName, ":", " ", -1)
VolumeName = strings.Replace(VolumeName, "/", " ", -1)
VolumeName = strings.TrimSpace(VolumeName)
if runtime.GOOS == "windows" && len(VolumeName) > 32 {
VolumeName = VolumeName[:32]
}
// Start background task if --background is specified
if Daemon {

View File

@@ -72,7 +72,6 @@ Features
* [Cache](/cache/) backend
* [Chunking](/chunker/) backend
* [Union](/union/) backend
* Experimental [Compression](/press/) backend
* Optional FUSE mount ([rclone mount](/commands/rclone_mount/))
* Multi-threaded downloads to local disk
* Can [serve](/commands/rclone_serve/) local or remote files over [HTTP](/commands/rclone_serve_http/)/[WebDav](/commands/rclone_serve_webdav/)/[FTP](/commands/rclone_serve_ftp/)/[SFTP](/commands/rclone_serve_sftp/)/[dlna](/commands/rclone_serve_dlna/)

View File

@@ -343,3 +343,4 @@ Contributors
* Shing Kit Chan <chanshingkit@gmail.com>
* Franklyn Tackitt <franklyn@tackitt.net>
* Robert-André Mauchin <zebob.m@gmail.com>
* evileye <48332831+ibiruai@users.noreply.github.com>

View File

@@ -113,29 +113,39 @@ Rclone has 3 ways of authenticating with Azure Blob Storage:
#### Account and Key
This is the most straight forward and least flexible way. Just fill in the `account` and `key` lines and leave the rest blank.
This is the most straight forward and least flexible way. Just fill
in the `account` and `key` lines and leave the rest blank.
#### SAS URL
This can be an account level SAS URL or container level SAS URL
This can be an account level SAS URL or container level SAS URL.
To use it leave `account`, `key` blank and fill in `sas_url`.
To use it leave `account`, `key` blank and fill in `sas_url`.
Account level SAS URL or container level SAS URL can be obtained from Azure portal or Azure Storage Explorer.
To get a container level SAS URL right click on a container in the Azure Blob explorer in the Azure portal.
An account level SAS URL or container level SAS URL can be obtained
from the Azure portal or the Azure Storage Explorer. To get a
container level SAS URL right click on a container in the Azure Blob
explorer in the Azure portal.
If You use container level SAS URL, rclone operations are permitted only on particular container, eg
If you use a container level SAS URL, rclone operations are permitted
only on a particular container, eg
rclone ls azureblob:container or rclone ls azureblob:
rclone ls azureblob:container
Since container name already exists in SAS URL, you can leave it empty as well.
You can also list the single container from the root. This will only
show the container specified by the SAS URL.
However these will not work
$ rclone lsd azureblob:
container/
Note that you can't see or access any other containers - this will
fail
rclone lsd azureblob:
rclone ls azureblob:othercontainer
This would be useful for temporarily allowing third parties access to a single container or putting credentials into an untrusted environment.
Container level SAS URLs are useful for temporarily allowing third
parties access to a single container or putting credentials into an
untrusted environment such as a CI build server.
### Multipart uploads ###

View File

@@ -1,81 +0,0 @@
---
title: "Press"
description: "Compression Remote"
date: "2019-05-12"
---
Press (Experimental)
-----------------------------------------
The `press` remote adds compression to another remote. It is best used with remotes containing
many large compressible files or on top of other remotes like crypt.
Please read the [warnings](#warnings) before using this remote.
To use this remote, all you need to do is specify another remote and a compression mode to use:
```
Current remotes:
Name Type
==== ====
remote_to_press sometype
e) Edit existing remote
$ rclone config
n) New remote
d) Delete remote
r) Rename remote
c) Copy remote
s) Set configuration password
q) Quit config
e/n/d/r/c/s/q> n
name> press
...
8 / Compress a remote
\ "press"
...
Storage> press
** See help for press backend at: https://rclone.org/press/ **
Remote to compress.
Enter a string value. Press Enter for the default ("")
remote> remote_to_press
Compression mode. XZ compression mode requires the xz binary to be in PATH.
Enter a string value. Press Enter for the default ("gzip-min").
Choose a number from below, or type in your own value
1 / Fast, real-time compression with reasonable compression ratios.
\ "lz4"
2 / Google's compression algorithm. Slightly faster and larger than LZ4.
\ "snappy"
3 / Standard gzip compression with fastest parameters.
\ "gzip-min"
4 / Standard gzip compression with default parameters.
\ "gzip-default"
5 / Slow but powerful compression with reasonable speed.
\ "xz-min"
6 / Slowest but best compression.
\ "xz-default"
compression_mode> gzip-min
```
### Compression Modes
Currently there are four compression algorithms supported: lz4, snappy, gzip, and xz.
Gzip and xz are further divided into two modes: "min" with less compression and "default" with more.
Currently, xz modes are only supported if there is an xz binary in your system's $PATH.
Depending on your operating system, the methods of installing this binary vary. This may be changed in
future updates.
### Warnings
#### Filetype
If you open a remote wrapped by press, you will see that there are many files with an extension corresponding to
the compression algorithm you chose. These files, with the exception of snappy files, are standard files that
can be opened by various archive programs, but they have some hidden metadata that allows them to be used by rclone.
While you may download and decompress these files at will, do **not** upload any compressed files to a wrapped remote
through any other means than rclone. This will upload files that do not contain metadata and **will** cause unexpected behavior.
#### Experimental
This remote is currently **experimental**. Things may break and data may be lost. Anything you do with this remote is
at your own risk. Please understand the risks associated with using experimental code and don't use this remote in
critical applications.

View File

@@ -1,7 +1,7 @@
---
title: "Union"
description: "Remote Unification"
date: "2018-08-29"
date: "2020-01-25"
---
<i class="fa fa-link"></i> Union
@@ -12,22 +12,90 @@ The `union` remote provides a unification similar to UnionFS using other remotes
Paths may be as deep as required or a local path,
eg `remote:directory/subdirectory` or `/directory/subdirectory`.
During the initial setup with `rclone config` you will specify the target
remotes as a space separated list. The target remotes can either be a local paths or other remotes.
During the initial setup with `rclone config` you will specify the upstream
remotes as a space separated list. The upstream remotes can either be a local paths or other remotes.
The order of the remotes is important as it defines which remotes take precedence over others if there are files with the same name in the same logical path.
The last remote is the topmost remote and replaces files with the same name from previous remotes.
Attribute `:ro` and `:nc` can be attach to the end of path to tag the remote as **read only** or **no create**,
eg `remote:directory/subdirectory:ro` or `remote:directory/subdirectory:nc`.
Only the last remote is used to write to and delete from, all other remotes are read-only.
Subfolders can be used in target remote. Assume a union remote named `backup`
with the remotes `mydrive:private/backup mydrive2:/backup`. Invoking `rclone mkdir backup:desktop`
Subfolders can be used in upstream remotes. Assume a union remote named `backup`
with the remotes `mydrive:private/backup`. Invoking `rclone mkdir backup:desktop`
is exactly the same as invoking `rclone mkdir mydrive2:/backup/desktop`.
There will be no special handling of paths containing `..` segments.
Invoking `rclone mkdir backup:../desktop` is exactly the same as invoking
`rclone mkdir mydrive2:/backup/../desktop`.
### Behavior / Policies
The behavior of union backend is inspired by [trapexit/mergerfs](https://github.com/trapexit/mergerfs). All functions are grouped into 3 categories: **action**, **create** and **search**. These functions and categories can be assigned a policy which dictates what file or directory is chosen when performing that behavior. Any policy can be assigned to a function or category though some may not be very useful in practice. For instance: **rand** (random) may be useful for file creation (create) but could lead to very odd behavior if used for `delete` if there were more than one copy of the file.
#### Function / Category classifications
| Category | Description | Functions |
|----------|--------------------------|-------------------------------------------------------------------------------------|
| action | Writing Existing file | move, rmdir, rmdirs, delete, purge and copy, sync (as destination when file exist) |
| create | Create non-existing file | copy, sync (as destination when file not exist) |
| search | Reading and listing file | ls, lsd, lsl, cat, md5sum, sha1sum and copy, sync (as source) |
| N/A | | size, about |
#### Path Preservation
Policies, as described below, are of two basic types. `path preserving` and `non-path preserving`.
All policies which start with `ep` (**epff**, **eplfs**, **eplus**, **epmfs**, **eprand**) are `path preserving`. `ep` stands for `existing path`.
A path preserving policy will only consider upstreams where the relative path being accessed already exists.
When using non-path preserving policies paths will be created in target upstreams as necessary.
#### Quota Relevant Policies
Some policies rely on quota information. These policies should be used only if your upstreams support the respective quota fields.
| Policy | Required Field |
|------------|----------------|
| lfs, eplfs | Free |
| mfs, epmfs | Free |
| lus, eplus | Used |
| lno, eplno | Objects |
To check if your upstream support the field, run `rclone about remote: [flags]` and see if the reuqired field exists.
#### Filters
Policies basically search upstream remotes and create a list of files / paths for functions to work on. The policy is responsible for filtering and sorting. The policy type defines the sorting but filtering is mostly uniform as described below.
* No **search** policies filter.
* All **action** policies will filter out remotes which are tagged as **read-only**.
* All **create** policies will filter out remotes which are tagged **read-only** or **no-create**.
If all remotes are filtered an error will be returned.
#### Policy descriptions
THe policies definition are inspired by [trapexit/mergerfs](https://github.com/trapexit/mergerfs) but not exactly the same. Some policy definition could be different due to the much larger latency of remote file systems.
| Policy | Description |
|------------------|------------------------------------------------------------|
| all | Search category: same as **epall**. Action category: same as **epall**. Create category: act on all remotes. |
| epall (existing path, all) | Search category: Given this order configured, act on the first one found where the relative path exists. Action category: apply to all found. Create category: act on all remotes where the relative path exists. |
| epff (existing path, first found) | Act on the first one found, by the time upstreams reply, where the relative path exists. |
| eplfs (existing path, least free space) | Of all the remotes on which the relative path exists choose the one with the least free space. |
| eplus (existing path, least used space) | Of all the remotes on which the relative path exists choose the one with the least used space. |
| eplno (existing path, least number of objects) | Of all the remotes on which the relative path exists choose the one with the least number of objects. |
| epmfs (existing path, most free space) | Of all the remotes on which the relative path exists choose the one with the most free space. |
| eprand (existing path, random) | Calls **epall** and then randomizes. Returns only one remote. |
| ff (first found) | Search category: same as **epff**. Action category: same as **epff**. Create category: Act on the first one found by the time upstreams reply. |
| lfs (least free space) | Search category: same as **eplfs**. Action category: same as **eplfs**. Create category: Pick the remote with the least available free space. |
| lus (least used space) | Search category: same as **eplus**. Action category: same as **eplus**. Create category: Pick the remote with the least used space. |
| lno (least number of objects) | Search category: same as **eplno**. Action category: same as **eplno**. Create category: Pick the remote with the least number of objects. |
| mfs (most free space) | Search category: same as **epmfs**. Action category: same as **epmfs**. Create category: Pick the remote with the most available free space. |
| newest | Pick the file / directory with the largest mtime. |
| rand (random) | Calls **all** and then randomizes. Returns only one remote. |
### Setup
Here is an example of how to make a union called `remote` for local folders.
First run:
@@ -49,16 +117,27 @@ XX / Union merges the contents of several remotes
\ "union"
[snip]
Storage> union
List of space separated remotes.
Can be 'remotea:test/dir remoteb:', '"remotea:test/space dir" remoteb:', etc.
The last remote is used to write to.
List of space separated upstreams.
Can be 'upstreama:test/dir upstreamb:', '\"upstreama:test/space:ro dir\" upstreamb:', etc.
Enter a string value. Press Enter for the default ("").
remotes>
upstreams>
Policy to choose upstream on ACTION class.
Enter a string value. Press Enter for the default ("epall").
action_policy>
Policy to choose upstream on CREATE class.
Enter a string value. Press Enter for the default ("epmfs").
create_policy>
Policy to choose upstream on SEARCH class.
Enter a string value. Press Enter for the default ("ff").
search_policy>
Cache time of usage and free space (in seconds)
Enter a signed integer. Press Enter for the default ("120").
cache_time>
Remote config
--------------------
[remote]
type = union
remotes = C:\dir1 C:\dir2 C:\dir3
upstreams = C:\dir1 C:\dir2 C:\dir3
--------------------
y) Yes this is OK
e) Edit this remote
@@ -97,17 +176,53 @@ Copy another local directory to the union directory called source, which will be
<!--- autogenerated options start - DO NOT EDIT, instead edit fs.RegInfo in backend/union/union.go then run make backenddocs -->
### Standard Options
Here are the standard options specific to union (Union merges the contents of several remotes).
Here are the standard options specific to union (Union merges the contents of several upstream fs).
#### --union-remotes
#### --union-upstreams
List of space separated remotes.
Can be 'remotea:test/dir remoteb:', '"remotea:test/space dir" remoteb:', etc.
The last remote is used to write to.
List of space separated upstreams.
Can be 'upstreama:test/dir upstreamb:', '"upstreama:test/space:ro dir" upstreamb:', etc.
- Config: remotes
- Env Var: RCLONE_UNION_REMOTES
- Config: upstreams
- Env Var: RCLONE_UNION_UPSTREAMS
- Type: string
- Default: ""
#### --union-action-policy
Policy to choose upstream on ACTION class.
- Config: action_policy
- Env Var: RCLONE_UNION_ACTION_POLICY
- Type: string
- Default: "epall"
#### --union-create-policy
Policy to choose upstream on CREATE class.
- Config: create_policy
- Env Var: RCLONE_UNION_CREATE_POLICY
- Type: string
- Default: "epmfs"
#### --union-search-policy
Policy to choose upstream on SEARCH class.
- Config: search_policy
- Env Var: RCLONE_UNION_SEARCH_POLICY
- Type: string
- Default: "ff"
#### --union-cache-time
Cache time of usage and free space (in seconds)
- Config: cache_time
- Env Var: RCLONE_UNION_CACHE_TIME
- Type: int
- Default: 120
<!--- autogenerated options stop -->

View File

@@ -262,4 +262,4 @@ backends:
- backend: "mailru"
remote: "TestMailru:"
subdir: false
fastlist: false
fastlist: false

9
go.mod
View File

@@ -13,17 +13,14 @@ require (
github.com/atotto/clipboard v0.1.2
github.com/aws/aws-sdk-go v1.29.9
github.com/billziss-gh/cgofuse v1.2.0
github.com/buengese/xxh32 v1.0.1
github.com/djherbis/times v1.2.0
github.com/dropbox/dropbox-sdk-go-unofficial v5.6.0+incompatible
github.com/gabriel-vasile/mimetype v1.0.2
github.com/google/go-querystring v1.0.0 // indirect
github.com/gopherjs/gopherjs v0.0.0-20190812055157-5d271430af9f // indirect
github.com/hanwen/go-fuse/v2 v2.0.3-0.20191108143333-152e6ac32d54
github.com/jlaffaye/ftp v0.0.0-20191218041957-e1b8fdd0dcc3
github.com/jzelinskie/whirlpool v0.0.0-20170603002051-c19460b8caa6
github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 // indirect
github.com/klauspost/compress v1.10.1
github.com/konsorten/go-windows-terminal-sequences v1.0.2 // indirect
github.com/koofr/go-httpclient v0.0.0-20190818202018-e0dc8fd921dc
github.com/koofr/go-koofrclient v0.0.0-20190724113126-8e5366da203a
@@ -36,10 +33,9 @@ require (
github.com/ncw/swift v1.0.50
github.com/nsf/termbox-go v0.0.0-20200204031403-4d2b513ad8be
github.com/okzk/sdnotify v0.0.0-20180710141335-d9becc38acbd
github.com/onsi/ginkgo v1.12.0 // indirect
github.com/onsi/gomega v1.9.0 // indirect
github.com/onsi/ginkgo v1.9.0 // indirect
github.com/onsi/gomega v1.6.0 // indirect
github.com/patrickmn/go-cache v2.1.0+incompatible
github.com/pierrec/lz4 v2.4.1+incompatible
github.com/pkg/errors v0.9.1
github.com/pkg/sftp v1.11.0
github.com/prometheus/client_golang v1.4.1
@@ -54,7 +50,6 @@ require (
github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.5.1
github.com/t3rm1n4l/go-mega v0.0.0-20200117211730-79a813bb328d
github.com/ulikunitz/xz v0.5.7
github.com/xanzy/ssh-agent v0.2.1
github.com/youmark/pkcs8 v0.0.0-20191102193632-94c173a94d60
github.com/yunify/qingstor-sdk-go/v3 v3.2.0

27
go.sum
View File

@@ -76,8 +76,6 @@ github.com/billziss-gh/cgofuse v1.2.0 h1:FMdQSygSBpD4yEPENJcmvfCdmNWMVkPLlD7wWdl
github.com/billziss-gh/cgofuse v1.2.0/go.mod h1:LJjoaUojlVjgo5GQoEJTcJNqZJeRU0nCR84CyxKt2YM=
github.com/bradfitz/iter v0.0.0-20140124041915-454541ec3da2/go.mod h1:PyRFw1Lt2wKX4ZVSQ2mk+PeDa1rxyObEDlApuIsUKuo=
github.com/bradfitz/iter v0.0.0-20190303215204-33e6a9893b0c/go.mod h1:PyRFw1Lt2wKX4ZVSQ2mk+PeDa1rxyObEDlApuIsUKuo=
github.com/buengese/xxh32 v1.0.1 h1:aNZNg2XxotiTr6JD+R4bzmL1uzMZ2KEKvxyj4P1Z1Xw=
github.com/buengese/xxh32 v1.0.1/go.mod h1:Q5GTtu7m/GuqzCc8YZ0n+oetaGFwW7oy291HvqLTZFk=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko=
github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
@@ -108,12 +106,8 @@ github.com/dropbox/dropbox-sdk-go-unofficial v5.6.0+incompatible/go.mod h1:lr+Lh
github.com/dustin/go-humanize v0.0.0-20180421182945-02af3965c54e/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/frankban/quicktest v1.7.3 h1:kV0lw0TH1j1hozahVmcpFCsbV5hcS4ZalH+U7UoeTow=
github.com/frankban/quicktest v1.7.3/go.mod h1:V1d2J5pfxYH6EjBAgSK7YNXcXlTWxUHdE1sVDXkjnig=
github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/gabriel-vasile/mimetype v1.0.2 h1:GKCo1TUCg0pV0R4atTcaLv/9SI2W9xPgMySZxUxcJOE=
github.com/gabriel-vasile/mimetype v1.0.2/go.mod h1:6CDPel/o/3/s4+bp6kIbsWATq8pmgOisOPG40CJa6To=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/glycerine/go-unsnap-stream v0.0.0-20180323001048-9f0cb55181dd/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE=
github.com/glycerine/goconvey v0.0.0-20180728074245-46e3a41ad493/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24=
@@ -146,7 +140,6 @@ github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.3 h1:gyjaxf+svBWX08ZjK86iN9geUJF0H6gp2IRKX6Nf6/I=
github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db h1:woRePGFeVFfLKN/pOkfl+p/TAqKOfFu+7KPlMVpok/w=
github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/btree v0.0.0-20180124185431-e89373fe6b4a/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
@@ -211,8 +204,6 @@ github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 h1:iQTw/8FWTuc7uia
github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0/go.mod h1:1NbS8ALrpOvjt0rHPNLyCIeMtbizbir8U//inJ+zuB8=
github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.10.1 h1:a/QY0o9S6wCi0XhxaMX/QmusicNUqCqFugR6WKPOSoQ=
github.com/klauspost/compress v1.10.1/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGiHgQ4OO8tzTaLawm8vnODuwDk=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.2 h1:DB17ag19krx9CFsz4o3enTrPXyIXCl+2iCXH/aMAp9s=
@@ -267,19 +258,16 @@ github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn
github.com/okzk/sdnotify v0.0.0-20180710141335-d9becc38acbd h1:+iAPaTbi1gZpcpDwe/BW1fx7Xoesv69hLNGPheoyhBs=
github.com/okzk/sdnotify v0.0.0-20180710141335-d9becc38acbd/go.mod h1:4soZNh0zW0LtYGdQ416i0jO0EIqMGcbtaspRS4BDvRQ=
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.12.0 h1:Iw5WCbBcaAAd0fpRb1c9r5YCylv4XDoCSigm1zLevwU=
github.com/onsi/ginkgo v1.12.0/go.mod h1:oUhWkIvk5aDxtKvDDuw8gItl8pKl42LzjC9KZE0HfGg=
github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
github.com/onsi/gomega v1.9.0 h1:R1uwffexN6Pr340GtYRIdZmAiN4J+iw6WG4wog1DUXg=
github.com/onsi/gomega v1.9.0/go.mod h1:Ho0h+IUsWyvy1OpqCwxlQ/21gkhVunqlU8fDGcoTdcA=
github.com/onsi/ginkgo v1.9.0 h1:SZjF721BByVj8QH636/8S2DnX4n0Re3SteMmw3N+tzc=
github.com/onsi/ginkgo v1.9.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/gomega v1.6.0 h1:8XTW0fcJZEq9q+Upcyws4JSGua2MFysCL5xkaSgHc+M=
github.com/onsi/gomega v1.6.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc=
github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=
github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
github.com/pengsrc/go-shared v0.2.1-0.20190131101655-1999055a4a14 h1:XeOYlK9W1uCmhjJSsY78Mcuh7MVkNjTzmHx1yBzizSU=
github.com/pengsrc/go-shared v0.2.1-0.20190131101655-1999055a4a14/go.mod h1:jVblp62SafmidSkvWrXyxAme3gaTfEtWwRPGz5cpvHg=
github.com/philhofer/fwd v1.0.0/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
github.com/pierrec/lz4 v2.4.1+incompatible h1:mFe7ttWaflA46Mhqh+jUfjp2qTbPYxLB2/OyBppH9dg=
github.com/pierrec/lz4 v2.4.1+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
@@ -370,8 +358,6 @@ github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1
github.com/tv42/httpunix v0.0.0-20191220191345-2ba4b9c3382c h1:u6SKchux2yDvFQnDHS3lPnIRmfVJ5Sxy3ao2SIdysLQ=
github.com/tv42/httpunix v0.0.0-20191220191345-2ba4b9c3382c/go.mod h1:hzIxponao9Kjc7aWznkXaL4U4TWaDSs8zcsY4Ka08nM=
github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc=
github.com/ulikunitz/xz v0.5.7 h1:YvTNdFzX6+W5m9msiYg/zpkSURPPtOlzbqYjrFn7Yt4=
github.com/ulikunitz/xz v0.5.7/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
github.com/willf/bitset v1.1.9/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4=
github.com/xanzy/ssh-agent v0.2.1 h1:TCbipTQL2JiiCprBWx9frJ2eJlCYT00NmctrHxVAr70=
github.com/xanzy/ssh-agent v0.2.1/go.mod h1:mLlQY/MoOhWBj+gOGMQkOeiEvkx+8pJSI+0Bx9h2kr4=
@@ -489,7 +475,6 @@ golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191112214154-59a1497f0cea/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191210023423-ac6580df4449 h1:gSbV7h1NRL2G1xTg/owz62CST1oJBmxy4QpMMregXVQ=
golang.org/x/sys v0.0.0-20191210023423-ac6580df4449/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -516,7 +501,6 @@ golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190328211700-ab21143f2384 h1:TFlARGu6Czu1z7q93HTxcP1P+/ZFC/IKythI5RzrnRg=
golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
@@ -592,7 +576,6 @@ gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLks
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4=
@@ -605,8 +588,6 @@ gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.5 h1:ymVxjfMaHvXD8RqPRmzHHsB3VvucivSkIAvJFDI5O3c=
gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

View File

@@ -1,5 +0,0 @@
module github.com/buengese/xxh32
go 1.13
require github.com/frankban/quicktest v1.7.3

View File

@@ -1,11 +0,0 @@
github.com/frankban/quicktest v1.7.3 h1:kV0lw0TH1j1hozahVmcpFCsbV5hcS4ZalH+U7UoeTow=
github.com/frankban/quicktest v1.7.3/go.mod h1:V1d2J5pfxYH6EjBAgSK7YNXcXlTWxUHdE1sVDXkjnig=
github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=

View File

@@ -1,223 +0,0 @@
// Package xxh32 implements the very fast XXH hashing algorithm (32 bits version).
// (https://github.com/Cyan4973/XXH/)
package xxh32
import (
"encoding/binary"
)
const (
prime1 uint32 = 2654435761
prime2 uint32 = 2246822519
prime3 uint32 = 3266489917
prime4 uint32 = 668265263
prime5 uint32 = 374761393
primeMask = 0xFFFFFFFF
prime1plus2 = uint32((uint64(prime1) + uint64(prime2)) & primeMask) // 606290984
prime1minus = uint32((-int64(prime1)) & primeMask) // 1640531535
)
// XXHZero represents an xxhash32 object with seed 0.
type XXHZero struct {
v1 uint32
v2 uint32
v3 uint32
v4 uint32
totalLen uint64
buf [16]byte
bufused int
}
// Sum appends the current hash to b and returns the resulting slice.
// It does not change the underlying hash state.
func (xxh XXHZero) Sum(b []byte) []byte {
h32 := xxh.Sum32()
return append(b, byte(h32), byte(h32>>8), byte(h32>>16), byte(h32>>24))
}
// Reset resets the Hash to its initial state.
func (xxh *XXHZero) Reset() {
xxh.v1 = prime1plus2
xxh.v2 = prime2
xxh.v3 = 0
xxh.v4 = prime1minus
xxh.totalLen = 0
xxh.bufused = 0
}
// Size returns the number of bytes returned by Sum().
func (xxh *XXHZero) Size() int {
return 4
}
// BlockSize gives the minimum number of bytes accepted by Write().
func (xxh *XXHZero) BlockSize() int {
return 1
}
// Write adds input bytes to the Hash.
// It never returns an error.
func (xxh *XXHZero) Write(input []byte) (int, error) {
if xxh.totalLen == 0 {
xxh.Reset()
}
n := len(input)
m := xxh.bufused
xxh.totalLen += uint64(n)
r := len(xxh.buf) - m
if n < r {
copy(xxh.buf[m:], input)
xxh.bufused += len(input)
return n, nil
}
p := 0
// Causes compiler to work directly from registers instead of stack:
v1, v2, v3, v4 := xxh.v1, xxh.v2, xxh.v3, xxh.v4
if m > 0 {
// some data left from previous update
copy(xxh.buf[xxh.bufused:], input[:r])
xxh.bufused += len(input) - r
// fast rotl(13)
buf := xxh.buf[:16] // BCE hint.
v1 = rol13(v1+binary.LittleEndian.Uint32(buf[:])*prime2) * prime1
v2 = rol13(v2+binary.LittleEndian.Uint32(buf[4:])*prime2) * prime1
v3 = rol13(v3+binary.LittleEndian.Uint32(buf[8:])*prime2) * prime1
v4 = rol13(v4+binary.LittleEndian.Uint32(buf[12:])*prime2) * prime1
p = r
xxh.bufused = 0
}
for n := n - 16; p <= n; p += 16 {
sub := input[p:][:16] //BCE hint for compiler
v1 = rol13(v1+binary.LittleEndian.Uint32(sub[:])*prime2) * prime1
v2 = rol13(v2+binary.LittleEndian.Uint32(sub[4:])*prime2) * prime1
v3 = rol13(v3+binary.LittleEndian.Uint32(sub[8:])*prime2) * prime1
v4 = rol13(v4+binary.LittleEndian.Uint32(sub[12:])*prime2) * prime1
}
xxh.v1, xxh.v2, xxh.v3, xxh.v4 = v1, v2, v3, v4
copy(xxh.buf[xxh.bufused:], input[p:])
xxh.bufused += len(input) - p
return n, nil
}
// Sum32 returns the 32 bits Hash value.
func (xxh *XXHZero) Sum32() uint32 {
h32 := uint32(xxh.totalLen)
if h32 >= 16 {
h32 += rol1(xxh.v1) + rol7(xxh.v2) + rol12(xxh.v3) + rol18(xxh.v4)
} else {
h32 += prime5
}
p := 0
n := xxh.bufused
buf := xxh.buf
for n := n - 4; p <= n; p += 4 {
h32 += binary.LittleEndian.Uint32(buf[p:p+4]) * prime3
h32 = rol17(h32) * prime4
}
for ; p < n; p++ {
h32 += uint32(buf[p]) * prime5
h32 = rol11(h32) * prime1
}
h32 ^= h32 >> 15
h32 *= prime2
h32 ^= h32 >> 13
h32 *= prime3
h32 ^= h32 >> 16
return h32
}
// ChecksumZero returns the 32bits Hash value.
func ChecksumZero(input []byte) uint32 {
n := len(input)
h32 := uint32(n)
if n < 16 {
h32 += prime5
} else {
v1 := prime1plus2
v2 := prime2
v3 := uint32(0)
v4 := prime1minus
p := 0
for n := n - 16; p <= n; p += 16 {
sub := input[p:][:16] //BCE hint for compiler
v1 = rol13(v1+binary.LittleEndian.Uint32(sub[:])*prime2) * prime1
v2 = rol13(v2+binary.LittleEndian.Uint32(sub[4:])*prime2) * prime1
v3 = rol13(v3+binary.LittleEndian.Uint32(sub[8:])*prime2) * prime1
v4 = rol13(v4+binary.LittleEndian.Uint32(sub[12:])*prime2) * prime1
}
input = input[p:]
n -= p
h32 += rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4)
}
p := 0
for n := n - 4; p <= n; p += 4 {
h32 += binary.LittleEndian.Uint32(input[p:p+4]) * prime3
h32 = rol17(h32) * prime4
}
for p < n {
h32 += uint32(input[p]) * prime5
h32 = rol11(h32) * prime1
p++
}
h32 ^= h32 >> 15
h32 *= prime2
h32 ^= h32 >> 13
h32 *= prime3
h32 ^= h32 >> 16
return h32
}
// Uint32Zero hashes x with seed 0.
func Uint32Zero(x uint32) uint32 {
h := prime5 + 4 + x*prime3
h = rol17(h) * prime4
h ^= h >> 15
h *= prime2
h ^= h >> 13
h *= prime3
h ^= h >> 16
return h
}
func rol1(u uint32) uint32 {
return u<<1 | u>>31
}
func rol7(u uint32) uint32 {
return u<<7 | u>>25
}
func rol11(u uint32) uint32 {
return u<<11 | u>>21
}
func rol12(u uint32) uint32 {
return u<<12 | u>>20
}
func rol13(u uint32) uint32 {
return u<<13 | u>>19
}
func rol17(u uint32) uint32 {
return u<<17 | u>>15
}
func rol18(u uint32) uint32 {
return u<<18 | u>>14
}

View File

@@ -1 +0,0 @@
testdata/* linguist-vendored

View File

@@ -1,14 +0,0 @@
language: go
go:
- "1.12"
- "master"
before_install:
- go get github.com/mattn/goveralls
- go get github.com/client9/misspell/cmd/misspell
before_script:
- go vet .
script:
- diff -u <(echo -n) <(gofmt -d ./)
- go test -v
- $GOPATH/bin/goveralls -service=travis-ci
- misspell -locale US -error *.md *.go

View File

@@ -1,76 +0,0 @@
# Contributor Covenant Code of Conduct
## Our Pledge
In the interest of fostering an open and welcoming environment, we as
contributors and maintainers pledge to making participation in our project and
our community a harassment-free experience for everyone, regardless of age, body
size, disability, ethnicity, sex characteristics, gender identity and expression,
level of experience, education, socio-economic status, nationality, personal
appearance, race, religion, or sexual identity and orientation.
## Our Standards
Examples of behavior that contributes to creating a positive environment
include:
* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members
Examples of unacceptable behavior by participants include:
* The use of sexualized language or imagery and unwelcome sexual attention or
advances
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic
address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Our Responsibilities
Project maintainers are responsible for clarifying the standards of acceptable
behavior and are expected to take appropriate and fair corrective action in
response to any instances of unacceptable behavior.
Project maintainers have the right and responsibility to remove, edit, or
reject comments, commits, code, wiki edits, issues, and other contributions
that are not aligned to this Code of Conduct, or to ban temporarily or
permanently any contributor for other behaviors that they deem inappropriate,
threatening, offensive, or harmful.
## Scope
This Code of Conduct applies both within project spaces and in public spaces
when an individual is representing the project or its community. Examples of
representing a project or community include using an official project e-mail
address, posting via an official social media account, or acting as an appointed
representative at an online or offline event. Representation of a project may be
further defined and clarified by project maintainers.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported by contacting the project team at vasile.gabriel@email.com. All
complaints will be reviewed and investigated and will result in a response that
is deemed necessary and appropriate to the circumstances. The project team is
obligated to maintain confidentiality with regard to the reporter of an incident.
Further details of specific enforcement policies may be posted separately.
Project maintainers who do not follow or enforce the Code of Conduct in good
faith may face temporary or permanent repercussions as determined by other
members of the project's leadership.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
[homepage]: https://www.contributor-covenant.org
For answers to common questions about this code of conduct, see
https://www.contributor-covenant.org/faq

View File

@@ -1,12 +0,0 @@
## Contribute
Contributions to **mimetype** are welcome. If you find an issue and you consider
contributing, you can use the [Github issues tracker](https://github.com/gabriel-vasile/mimetype/issues)
in order to report it, or better yet, open a pull request.
Code contributions must respect these rules:
- code must be test covered
- code must be formatted using gofmt tool
- exported names must be documented
**Important**: By submitting a pull request, you agree to allow the project
owner to license your work under the same license as that used by the project.

View File

@@ -1,79 +0,0 @@
## Examples
- [Detect MIME type](#detect)
- [Check against MIME type](#check)
- [Check base MIME type](#check-parent)
- [Binary file vs text file](#binary-file-vs-text-file)
### Detect
Get the MIME type from a slice of bytes, from a reader and from a file.
```go
// Detect the MIME type of a file stored as a byte slice.
file := "testdata/pdf.pdf"
// Detect the MIME type of a file.
mime, ferr := mimetype.DetectFile(file)
fmt.Println(mime, ferr)
// Output: application/pdf nil
// Detect the MIME type of a reader.
reader, _ := os.Open(file) // ignoring error for brevity's sake
mime, rerr := mimetype.DetectReader(reader)
fmt.Println(mime, rerr)
// Output: application/pdf nil
mime := mimetype.Detect(data)
fmt.Println(mime)
// Output: application/pdf
```
### Check
Test if a file has a specific MIME type. Also accepts MIME type aliases.
```go
mime, err := mimetype.DetectFile("testdata/zip.zip")
// application/x-zip is an alias of application/zip,
// therefore Is returns true both times.
fmt.Println(mime.Is("application/zip"), mime.Is("application/x-zip"), err)
// Output: true true <nil>
```
### Check parent
Test if a file has a specific base MIME type. First perform a detect on the
input and then navigate the parents until the base MIME type is found.
Considering JAR files are just ZIPs containing some metadata files,
if, for example, you need to tell if the input can be unzipped, go up the
MIME hierarchy until zip is found or the root is reached.
```go
detectedMIME, err := mimetype.DetectFile("testdata/jar.jar")
zip := false
for mime := detectedMIME; mime != nil; mime = mime.Parent() {
if mime.Is("application/zip") {
zip = true
}
}
// zip is true, even if the detected MIME was application/jar.
fmt.Println(zip, detectedMIME, err)
// Output: true application/jar <nil>
```
### Binary file vs text file
Considering the definition of a binary file as "a computer file that is not
a text file", they can be differentiated by searching for the text/plain MIME
in it's MIME hierarchy.
```go
detectedMIME, err := mimetype.DetectFile("testdata/xml.xml")
isBinary := true
for mime := detectedMIME; mime != nil; mime = mime.Parent() {
if mime.Is("text/plain") {
isBinary = false
}
}
fmt.Println(isBinary, detectedMIME, err)
// Output: false text/xml; charset=utf-8 <nil>
```

View File

@@ -1,21 +0,0 @@
MIT License
Copyright (c) 2018, 2019 Gabriel Vasile
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -1,74 +0,0 @@
<h1 align="center">
mimetype
</h1>
<h4 align="center">
A package for detecting MIME types and extensions based on magic numbers
</h4>
<h6 align="center">
No C bindings, zero dependencies and thread safe
</h6>
<p align="center">
<a href="https://travis-ci.org/gabriel-vasile/mimetype">
<img alt="Build Status" src="https://travis-ci.org/gabriel-vasile/mimetype.svg?branch=master">
</a>
<a href="https://godoc.org/github.com/gabriel-vasile/mimetype">
<img alt="Documentation" src="https://godoc.org/github.com/gabriel-vasile/mimetype?status.svg">
</a>
<a href="https://goreportcard.com/report/github.com/gabriel-vasile/mimetype">
<img alt="Go report card" src="https://goreportcard.com/badge/github.com/gabriel-vasile/mimetype">
</a>
<a href="https://coveralls.io/github/gabriel-vasile/mimetype?branch=master">
<img alt="Go report card" src="https://coveralls.io/repos/github/gabriel-vasile/mimetype/badge.svg?branch=master">
</a>
<a href="LICENSE">
<img alt="License" src="https://img.shields.io/badge/License-MIT-green.svg">
</a>
</p>
## Install
```bash
go get github.com/gabriel-vasile/mimetype
```
## Usage
There are quick [examples](EXAMPLES.md) and
[GoDoc](https://godoc.org/github.com/gabriel-vasile/mimetype) for full reference.
## Upgrade from v0.3.x to v1.x
In v1.x the detect functions no longer return the MIME type and extension as
strings. Instead they return a [MIME](https://godoc.org/github.com/gabriel-vasile/mimetype#MIME)
struct. To get the string value of the MIME and the extension, call the
`String()` and the `Extension()` methods.
In order to play better with the stdlib `mime` package, v1.x file extensions
include the leading dot, as in ".html".
In v1.x the `text/plain` MIME type is `text/plain; charset=utf-8`.
## Supported MIME types
See [supported mimes](supported_mimes.md) for the list of detected MIME types.
If support is needed for a specific file format, please open an [issue](https://github.com/gabriel-vasile/mimetype/issues/new/choose).
## Structure
**mimetype** uses an hierarchical structure to keep the MIME type detection logic.
This reduces the number of calls needed for detecting the file type. The reason
behind this choice is that there are file formats used as containers for other
file formats. For example, Microsoft Office files are just zip archives,
containing specific metadata files. Once a file a file has been identified as a
zip, there is no need to check if it is a text file, but it is worth checking if
it is an Microsoft Office file.
To prevent loading entire files into memory, when detecting from a
[reader](https://godoc.org/github.com/gabriel-vasile/mimetype#DetectReader)
or from a [file](https://godoc.org/github.com/gabriel-vasile/mimetype#DetectFile)
**mimetype** limits itself to reading only the first
[3072](https://github.com/gabriel-vasile/mimetype/blob/master/internal/matchers/matchers.go#L6)
bytes from the input.
<div align="center">
<img alt="structure" src="mimetype.gif" width="88%">
</div>
## Contributing
See [CONTRIBUTING.md](CONTRIBUTING.md).

View File

@@ -1,3 +0,0 @@
module github.com/gabriel-vasile/mimetype
go 1.12

View File

@@ -1,536 +0,0 @@
// Copyright (c) 2009 The Go Authors. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Package json provides a JSON value parser state machine.
// This package is almost entirely copied from the Go stdlib.
// Changes made to it permit users of the package to tell
// if some slice of bytes is a valid beginning of a json string.
package json
import "fmt"
type (
context int
scanStatus int
)
const (
contextKey context = iota
contextObj
contextArr
scanContinue scanStatus = iota // uninteresting byte
scanBeginLiteral // end implied by next result != scanContinue
scanBeginObject // begin object
scanObjectKey // just finished object key (string)
scanObjectValue // just finished non-last object value
scanEndObject // end object (implies scanObjectValue if possible)
scanBeginArray // begin array
scanArrayValue // just finished array value
scanEndArray // end array (implies scanArrayValue if possible)
scanSkipSpace // space byte; can skip; known to be last "continue" result
scanEnd // top-level value ended *before* this byte; known to be first "stop" result
scanError // hit an error, scanner.err.
)
type (
scanner struct {
step func(*scanner, byte) scanStatus
contexts []context
endTop bool
err error
index int
}
)
// Scan returns the number of bytes scanned and if there was any error
// in trying to reach the end of data
func Scan(data []byte) (int, error) {
s := &scanner{}
_ = checkValid(data, s)
return s.index, s.err
}
// checkValid verifies that data is valid JSON-encoded data.
// scan is passed in for use by checkValid to avoid an allocation.
func checkValid(data []byte, scan *scanner) error {
scan.reset()
for _, c := range data {
scan.index++
if scan.step(scan, c) == scanError {
return scan.err
}
}
if scan.eof() == scanError {
return scan.err
}
return nil
}
func isSpace(c byte) bool {
return c == ' ' || c == '\t' || c == '\r' || c == '\n'
}
func (s *scanner) reset() {
s.step = stateBeginValue
s.contexts = s.contexts[0:0]
s.err = nil
}
// eof tells the scanner that the end of input has been reached.
// It returns a scan status just as s.step does.
func (s *scanner) eof() scanStatus {
if s.err != nil {
return scanError
}
if s.endTop {
return scanEnd
}
s.step(s, ' ')
if s.endTop {
return scanEnd
}
if s.err == nil {
s.err = fmt.Errorf("unexpected end of JSON input")
}
return scanError
}
// pushContext pushes a new parse state p onto the parse stack.
func (s *scanner) pushParseState(p context) {
s.contexts = append(s.contexts, p)
}
// popParseState pops a parse state (already obtained) off the stack
// and updates s.step accordingly.
func (s *scanner) popParseState() {
n := len(s.contexts) - 1
s.contexts = s.contexts[0:n]
if n == 0 {
s.step = stateEndTop
s.endTop = true
} else {
s.step = stateEndValue
}
}
// stateBeginValueOrEmpty is the state after reading `[`.
func stateBeginValueOrEmpty(s *scanner, c byte) scanStatus {
if c <= ' ' && isSpace(c) {
return scanSkipSpace
}
if c == ']' {
return stateEndValue(s, c)
}
return stateBeginValue(s, c)
}
// stateBeginValue is the state at the beginning of the input.
func stateBeginValue(s *scanner, c byte) scanStatus {
if c <= ' ' && isSpace(c) {
return scanSkipSpace
}
switch c {
case '{':
s.step = stateBeginStringOrEmpty
s.pushParseState(contextKey)
return scanBeginObject
case '[':
s.step = stateBeginValueOrEmpty
s.pushParseState(contextArr)
return scanBeginArray
case '"':
s.step = stateInString
return scanBeginLiteral
case '-':
s.step = stateNeg
return scanBeginLiteral
case '0': // beginning of 0.123
s.step = state0
return scanBeginLiteral
case 't': // beginning of true
s.step = stateT
return scanBeginLiteral
case 'f': // beginning of false
s.step = stateF
return scanBeginLiteral
case 'n': // beginning of null
s.step = stateN
return scanBeginLiteral
}
if '1' <= c && c <= '9' { // beginning of 1234.5
s.step = state1
return scanBeginLiteral
}
return s.error(c, "looking for beginning of value")
}
// stateBeginStringOrEmpty is the state after reading `{`.
func stateBeginStringOrEmpty(s *scanner, c byte) scanStatus {
if c <= ' ' && isSpace(c) {
return scanSkipSpace
}
if c == '}' {
n := len(s.contexts)
s.contexts[n-1] = contextObj
return stateEndValue(s, c)
}
return stateBeginString(s, c)
}
// stateBeginString is the state after reading `{"key": value,`.
func stateBeginString(s *scanner, c byte) scanStatus {
if c <= ' ' && isSpace(c) {
return scanSkipSpace
}
if c == '"' {
s.step = stateInString
return scanBeginLiteral
}
return s.error(c, "looking for beginning of object key string")
}
// stateEndValue is the state after completing a value,
// such as after reading `{}` or `true` or `["x"`.
func stateEndValue(s *scanner, c byte) scanStatus {
n := len(s.contexts)
if n == 0 {
// Completed top-level before the current byte.
s.step = stateEndTop
s.endTop = true
return stateEndTop(s, c)
}
if c <= ' ' && isSpace(c) {
s.step = stateEndValue
return scanSkipSpace
}
ps := s.contexts[n-1]
switch ps {
case contextKey:
if c == ':' {
s.contexts[n-1] = contextObj
s.step = stateBeginValue
return scanObjectKey
}
return s.error(c, "after object key")
case contextObj:
if c == ',' {
s.contexts[n-1] = contextKey
s.step = stateBeginString
return scanObjectValue
}
if c == '}' {
s.popParseState()
return scanEndObject
}
return s.error(c, "after object key:value pair")
case contextArr:
if c == ',' {
s.step = stateBeginValue
return scanArrayValue
}
if c == ']' {
s.popParseState()
return scanEndArray
}
return s.error(c, "after array element")
}
return s.error(c, "")
}
// stateEndTop is the state after finishing the top-level value,
// such as after reading `{}` or `[1,2,3]`.
// Only space characters should be seen now.
func stateEndTop(s *scanner, c byte) scanStatus {
if c != ' ' && c != '\t' && c != '\r' && c != '\n' {
// Complain about non-space byte on next call.
s.error(c, "after top-level value")
}
return scanEnd
}
// stateInString is the state after reading `"`.
func stateInString(s *scanner, c byte) scanStatus {
if c == '"' {
s.step = stateEndValue
return scanContinue
}
if c == '\\' {
s.step = stateInStringEsc
return scanContinue
}
if c < 0x20 {
return s.error(c, "in string literal")
}
return scanContinue
}
// stateInStringEsc is the state after reading `"\` during a quoted string.
func stateInStringEsc(s *scanner, c byte) scanStatus {
switch c {
case 'b', 'f', 'n', 'r', 't', '\\', '/', '"':
s.step = stateInString
return scanContinue
case 'u':
s.step = stateInStringEscU
return scanContinue
}
return s.error(c, "in string escape code")
}
// stateInStringEscU is the state after reading `"\u` during a quoted string.
func stateInStringEscU(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
s.step = stateInStringEscU1
return scanContinue
}
// numbers
return s.error(c, "in \\u hexadecimal character escape")
}
// stateInStringEscU1 is the state after reading `"\u1` during a quoted string.
func stateInStringEscU1(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
s.step = stateInStringEscU12
return scanContinue
}
// numbers
return s.error(c, "in \\u hexadecimal character escape")
}
// stateInStringEscU12 is the state after reading `"\u12` during a quoted string.
func stateInStringEscU12(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
s.step = stateInStringEscU123
return scanContinue
}
// numbers
return s.error(c, "in \\u hexadecimal character escape")
}
// stateInStringEscU123 is the state after reading `"\u123` during a quoted string.
func stateInStringEscU123(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
s.step = stateInString
return scanContinue
}
// numbers
return s.error(c, "in \\u hexadecimal character escape")
}
// stateNeg is the state after reading `-` during a number.
func stateNeg(s *scanner, c byte) scanStatus {
if c == '0' {
s.step = state0
return scanContinue
}
if '1' <= c && c <= '9' {
s.step = state1
return scanContinue
}
return s.error(c, "in numeric literal")
}
// state1 is the state after reading a non-zero integer during a number,
// such as after reading `1` or `100` but not `0`.
func state1(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' {
s.step = state1
return scanContinue
}
return state0(s, c)
}
// state0 is the state after reading `0` during a number.
func state0(s *scanner, c byte) scanStatus {
if c == '.' {
s.step = stateDot
return scanContinue
}
if c == 'e' || c == 'E' {
s.step = stateE
return scanContinue
}
return stateEndValue(s, c)
}
// stateDot is the state after reading the integer and decimal point in a number,
// such as after reading `1.`.
func stateDot(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' {
s.step = stateDot0
return scanContinue
}
return s.error(c, "after decimal point in numeric literal")
}
// stateDot0 is the state after reading the integer, decimal point, and subsequent
// digits of a number, such as after reading `3.14`.
func stateDot0(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' {
return scanContinue
}
if c == 'e' || c == 'E' {
s.step = stateE
return scanContinue
}
return stateEndValue(s, c)
}
// stateE is the state after reading the mantissa and e in a number,
// such as after reading `314e` or `0.314e`.
func stateE(s *scanner, c byte) scanStatus {
if c == '+' || c == '-' {
s.step = stateESign
return scanContinue
}
return stateESign(s, c)
}
// stateESign is the state after reading the mantissa, e, and sign in a number,
// such as after reading `314e-` or `0.314e+`.
func stateESign(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' {
s.step = stateE0
return scanContinue
}
return s.error(c, "in exponent of numeric literal")
}
// stateE0 is the state after reading the mantissa, e, optional sign,
// and at least one digit of the exponent in a number,
// such as after reading `314e-2` or `0.314e+1` or `3.14e0`.
func stateE0(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' {
return scanContinue
}
return stateEndValue(s, c)
}
// stateT is the state after reading `t`.
func stateT(s *scanner, c byte) scanStatus {
if c == 'r' {
s.step = stateTr
return scanContinue
}
return s.error(c, "in literal true (expecting 'r')")
}
// stateTr is the state after reading `tr`.
func stateTr(s *scanner, c byte) scanStatus {
if c == 'u' {
s.step = stateTru
return scanContinue
}
return s.error(c, "in literal true (expecting 'u')")
}
// stateTru is the state after reading `tru`.
func stateTru(s *scanner, c byte) scanStatus {
if c == 'e' {
s.step = stateEndValue
return scanContinue
}
return s.error(c, "in literal true (expecting 'e')")
}
// stateF is the state after reading `f`.
func stateF(s *scanner, c byte) scanStatus {
if c == 'a' {
s.step = stateFa
return scanContinue
}
return s.error(c, "in literal false (expecting 'a')")
}
// stateFa is the state after reading `fa`.
func stateFa(s *scanner, c byte) scanStatus {
if c == 'l' {
s.step = stateFal
return scanContinue
}
return s.error(c, "in literal false (expecting 'l')")
}
// stateFal is the state after reading `fal`.
func stateFal(s *scanner, c byte) scanStatus {
if c == 's' {
s.step = stateFals
return scanContinue
}
return s.error(c, "in literal false (expecting 's')")
}
// stateFals is the state after reading `fals`.
func stateFals(s *scanner, c byte) scanStatus {
if c == 'e' {
s.step = stateEndValue
return scanContinue
}
return s.error(c, "in literal false (expecting 'e')")
}
// stateN is the state after reading `n`.
func stateN(s *scanner, c byte) scanStatus {
if c == 'u' {
s.step = stateNu
return scanContinue
}
return s.error(c, "in literal null (expecting 'u')")
}
// stateNu is the state after reading `nu`.
func stateNu(s *scanner, c byte) scanStatus {
if c == 'l' {
s.step = stateNul
return scanContinue
}
return s.error(c, "in literal null (expecting 'l')")
}
// stateNul is the state after reading `nul`.
func stateNul(s *scanner, c byte) scanStatus {
if c == 'l' {
s.step = stateEndValue
return scanContinue
}
return s.error(c, "in literal null (expecting 'l')")
}
// stateError is the state after reaching a syntax error,
// such as after reading `[1}` or `5.1.2`.
func stateError(s *scanner, c byte) scanStatus {
return scanError
}
// error records an error and switches to the error state.
func (s *scanner) error(c byte, context string) scanStatus {
s.step = stateError
s.err = fmt.Errorf("invalid character <<%c>> %s", c, context)
return scanError
}

View File

@@ -1,98 +0,0 @@
package matchers
import "bytes"
// Zip matches a zip archive.
func Zip(in []byte) bool {
return len(in) > 3 &&
in[0] == 0x50 && in[1] == 0x4B &&
(in[2] == 0x3 || in[2] == 0x5 || in[2] == 0x7) &&
(in[3] == 0x4 || in[3] == 0x6 || in[3] == 0x8)
}
// SevenZ matches a 7z archive.
func SevenZ(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C})
}
// Epub matches an EPUB file.
func Epub(in []byte) bool {
return len(in) > 58 && bytes.Equal(in[30:58], []byte("mimetypeapplication/epub+zip"))
}
// Jar matches a Java archive file.
func Jar(in []byte) bool {
return bytes.Contains(in, []byte("META-INF/MANIFEST.MF"))
}
// Gzip matched gzip files based on http://www.zlib.org/rfc-gzip.html#header-trailer.
func Gzip(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x1f, 0x8b})
}
// Crx matches a Chrome extension file: a zip archive prepended by "Cr24".
func Crx(in []byte) bool {
return bytes.HasPrefix(in, []byte("Cr24"))
}
// Tar matches a (t)ape (ar)chive file.
func Tar(in []byte) bool {
return len(in) > 262 && bytes.Equal(in[257:262], []byte("ustar"))
}
// Fits matches an Flexible Image Transport System file.
func Fits(in []byte) bool {
return bytes.HasPrefix(in, []byte{
0x53, 0x49, 0x4D, 0x50, 0x4C, 0x45, 0x20, 0x20, 0x3D, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x54,
})
}
// Xar matches an eXtensible ARchive format file.
func Xar(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x78, 0x61, 0x72, 0x21})
}
// Bz2 matches a bzip2 file.
func Bz2(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x42, 0x5A, 0x68})
}
// Ar matches an ar (Unix) archive file.
func Ar(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x21, 0x3C, 0x61, 0x72, 0x63, 0x68, 0x3E})
}
// Deb matches a Debian package file.
func Deb(in []byte) bool {
return len(in) > 8 && bytes.HasPrefix(in[8:], []byte{
0x64, 0x65, 0x62, 0x69, 0x61, 0x6E, 0x2D,
0x62, 0x69, 0x6E, 0x61, 0x72, 0x79,
})
}
// Rar matches a RAR archive file.
func Rar(in []byte) bool {
if !bytes.HasPrefix(in, []byte{0x52, 0x61, 0x72, 0x21, 0x1A, 0x07}) {
return false
}
return len(in) > 8 && (bytes.Equal(in[6:8], []byte{0x01, 0x00}) || in[6] == 0x00)
}
// Warc matches a Web ARChive file.
func Warc(in []byte) bool {
return bytes.HasPrefix(in, []byte("WARC/"))
}
// Zstd matches a Zstandard archive file.
func Zstd(in []byte) bool {
return len(in) >= 4 &&
(0x22 <= in[0] && in[0] <= 0x28 || in[0] == 0x1E) && // Different Zstandard versions.
bytes.HasPrefix(in[1:], []byte{0xB5, 0x2F, 0xFD})
}
// Cab matches a Cabinet archive file.
func Cab(in []byte) bool {
return bytes.HasPrefix(in, []byte("MSCF"))
}

View File

@@ -1,95 +0,0 @@
package matchers
import (
"bytes"
"encoding/binary"
)
// Mp3 matches an mp3 file.
func Mp3(in []byte) bool {
if len(in) < 3 {
return false
}
if bytes.HasPrefix(in, []byte("ID3")) {
// MP3s with an ID3v2 tag will start with "ID3"
// ID3v1 tags, however appear at the end of the file.
return true
}
// Match MP3 files without tags
switch binary.BigEndian.Uint16(in[:2]) & 0xFFFE {
case 0xFFFA:
// MPEG ADTS, layer III, v1
return true
case 0xFFF2:
// MPEG ADTS, layer III, v2
return true
case 0xFFE2:
// MPEG ADTS, layer III, v2.5
return true
}
return false
}
// Flac matches a Free Lossless Audio Codec file.
func Flac(in []byte) bool {
return bytes.HasPrefix(in, []byte("\x66\x4C\x61\x43\x00\x00\x00\x22"))
}
// Midi matches a Musical Instrument Digital Interface file.
func Midi(in []byte) bool {
return bytes.HasPrefix(in, []byte("\x4D\x54\x68\x64"))
}
// Ape matches a Monkey's Audio file.
func Ape(in []byte) bool {
return bytes.HasPrefix(in, []byte("\x4D\x41\x43\x20\x96\x0F\x00\x00\x34\x00\x00\x00\x18\x00\x00\x00\x90\xE3"))
}
// MusePack matches a Musepack file.
func MusePack(in []byte) bool {
return bytes.HasPrefix(in, []byte("MPCK"))
}
// Wav matches a Waveform Audio File Format file.
func Wav(in []byte) bool {
return len(in) > 12 &&
bytes.Equal(in[:4], []byte("RIFF")) &&
bytes.Equal(in[8:12], []byte("\x57\x41\x56\x45"))
}
// Aiff matches Audio Interchange File Format file.
func Aiff(in []byte) bool {
return len(in) > 12 &&
bytes.Equal(in[:4], []byte("\x46\x4F\x52\x4D")) &&
bytes.Equal(in[8:12], []byte("\x41\x49\x46\x46"))
}
// Au matches a Sun Microsystems au file.
func Au(in []byte) bool {
return bytes.HasPrefix(in, []byte("\x2E\x73\x6E\x64"))
}
// Amr matches an Adaptive Multi-Rate file.
func Amr(in []byte) bool {
return bytes.HasPrefix(in, []byte("\x23\x21\x41\x4D\x52"))
}
// Aac matches an Advanced Audio Coding file.
func Aac(in []byte) bool {
return bytes.HasPrefix(in, []byte{0xFF, 0xF1}) || bytes.HasPrefix(in, []byte{0xFF, 0xF9})
}
// Voc matches a Creative Voice file.
func Voc(in []byte) bool {
return bytes.HasPrefix(in, []byte("Creative Voice File"))
}
// Qcp matches a Qualcomm Pure Voice file.
func Qcp(in []byte) bool {
return len(in) > 12 &&
bytes.Equal(in[:4], []byte("RIFF")) &&
bytes.Equal(in[8:12], []byte("QLCM"))
}

View File

@@ -1,146 +0,0 @@
package matchers
import (
"bytes"
"debug/macho"
"encoding/binary"
)
// Java bytecode and Mach-O binaries share the same magic number.
// More info here https://github.com/threatstack/libmagic/blob/master/magic/Magdir/cafebabe
func classOrMachOFat(in []byte) bool {
// There should be at least 8 bytes for both of them because the only way to
// quickly distinguish them is by comparing byte at position 7
if len(in) < 8 {
return false
}
return bytes.HasPrefix(in, []byte{0xCA, 0xFE, 0xBA, 0xBE})
}
// Class matches a java class file.
func Class(in []byte) bool {
return classOrMachOFat(in) && in[7] > 30
}
// MachO matches Mach-O binaries format.
func MachO(in []byte) bool {
if classOrMachOFat(in) && in[7] < 20 {
return true
}
if len(in) < 4 {
return false
}
be := binary.BigEndian.Uint32(in)
le := binary.LittleEndian.Uint32(in)
return be == macho.Magic32 || le == macho.Magic32 || be == macho.Magic64 || le == macho.Magic64
}
// Swf matches an Adobe Flash swf file.
func Swf(in []byte) bool {
return bytes.HasPrefix(in, []byte("CWS")) ||
bytes.HasPrefix(in, []byte("FWS")) ||
bytes.HasPrefix(in, []byte("ZWS"))
}
// Wasm matches a web assembly File Format file.
func Wasm(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x00, 0x61, 0x73, 0x6D})
}
// Dbf matches a dBase file.
// https://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm
func Dbf(in []byte) bool {
if len(in) < 4 {
return false
}
// 3rd and 4th bytes contain the last update month and day of month
if !(0 < in[2] && in[2] < 13 && 0 < in[3] && in[3] < 32) {
return false
}
// dbf type is dictated by the first byte
dbfTypes := []byte{
0x02, 0x03, 0x04, 0x05, 0x30, 0x31, 0x32, 0x42, 0x62, 0x7B, 0x82,
0x83, 0x87, 0x8A, 0x8B, 0x8E, 0xB3, 0xCB, 0xE5, 0xF5, 0xF4, 0xFB,
}
for _, b := range dbfTypes {
if in[0] == b {
return true
}
}
return false
}
// Exe matches a Windows/DOS executable file.
func Exe(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x4D, 0x5A})
}
// Elf matches an Executable and Linkable Format file.
func Elf(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x7F, 0x45, 0x4C, 0x46})
}
// ElfObj matches an object file.
func ElfObj(in []byte) bool {
return len(in) > 17 && ((in[16] == 0x01 && in[17] == 0x00) ||
(in[16] == 0x00 && in[17] == 0x01))
}
// ElfExe matches an executable file.
func ElfExe(in []byte) bool {
return len(in) > 17 && ((in[16] == 0x02 && in[17] == 0x00) ||
(in[16] == 0x00 && in[17] == 0x02))
}
// ElfLib matches a shared library file.
func ElfLib(in []byte) bool {
return len(in) > 17 && ((in[16] == 0x03 && in[17] == 0x00) ||
(in[16] == 0x00 && in[17] == 0x03))
}
// ElfDump matches a core dump file.
func ElfDump(in []byte) bool {
return len(in) > 17 && ((in[16] == 0x04 && in[17] == 0x00) ||
(in[16] == 0x00 && in[17] == 0x04))
}
// Dcm matches a DICOM medical format file.
func Dcm(in []byte) bool {
return len(in) > 131 &&
bytes.Equal(in[128:132], []byte{0x44, 0x49, 0x43, 0x4D})
}
// Nes matches a Nintendo Entertainment system ROM file.
func Nes(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x4E, 0x45, 0x53, 0x1A})
}
// Marc matches a MARC21 (MAchine-Readable Cataloging) file.
func Marc(in []byte) bool {
// File is at least 24 bytes ("leader" field size)
if len(in) < 24 {
return false
}
// Fixed bytes at offset 20
if !bytes.Equal(in[20:24], []byte("4500")) {
return false
}
// First 5 bytes are ASCII digits
for i := 0; i < 5; i++ {
if in[i] < '0' || in[i] > '9' {
return false
}
}
// Field terminator is present
return bytes.Contains(in, []byte{0x1E})
}

View File

@@ -1,25 +0,0 @@
package matchers
import "bytes"
// Sqlite matches an SQLite database file.
func Sqlite(in []byte) bool {
return bytes.HasPrefix(in, []byte{
0x53, 0x51, 0x4c, 0x69, 0x74, 0x65, 0x20, 0x66,
0x6f, 0x72, 0x6d, 0x61, 0x74, 0x20, 0x33, 0x00,
})
}
// MsAccessAce matches Microsoft Access dababase file.
func MsAccessAce(in []byte) bool {
return msAccess(in, []byte("Standard ACE DB"))
}
// MsAccessMdb matches legacy Microsoft Access database file (JET, 2003 and earlier).
func MsAccessMdb(in []byte) bool {
return msAccess(in, []byte("Standard Jet DB"))
}
func msAccess(in []byte, magic []byte) bool {
return len(in) > 19 && bytes.Equal(in[4:19], magic)
}

View File

@@ -1,32 +0,0 @@
package matchers
import "bytes"
// Pdf matches a Portable Document Format file.
func Pdf(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x25, 0x50, 0x44, 0x46})
}
// DjVu matches a DjVu file.
func DjVu(in []byte) bool {
if len(in) < 12 {
return false
}
if !bytes.HasPrefix(in, []byte{0x41, 0x54, 0x26, 0x54, 0x46, 0x4F, 0x52, 0x4D}) {
return false
}
return bytes.HasPrefix(in[12:], []byte("DJVM")) ||
bytes.HasPrefix(in[12:], []byte("DJVU")) ||
bytes.HasPrefix(in[12:], []byte("DJVI")) ||
bytes.HasPrefix(in[12:], []byte("THUM"))
}
// Mobi matches a Mobi file.
func Mobi(in []byte) bool {
return len(in) > 67 && bytes.Equal(in[60:68], []byte("BOOKMOBI"))
}
// Lit matches a Microsoft Lit file.
func Lit(in []byte) bool {
return bytes.HasPrefix(in, []byte("ITOLITLS"))
}

View File

@@ -1,27 +0,0 @@
package matchers
import "bytes"
// Woff matches a Web Open Font Format file.
func Woff(in []byte) bool {
return bytes.HasPrefix(in, []byte("wOFF"))
}
// Woff2 matches a Web Open Font Format version 2 file.
func Woff2(in []byte) bool {
return bytes.HasPrefix(in, []byte("wOF2"))
}
// Otf matches an OpenType font file.
func Otf(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x4F, 0x54, 0x54, 0x4F, 0x00})
}
// Eot matches an Embedded OpenType font file.
func Eot(in []byte) bool {
return len(in) > 35 &&
bytes.Equal(in[34:36], []byte{0x4C, 0x50}) &&
(bytes.Equal(in[8:11], []byte{0x02, 0x00, 0x01}) ||
bytes.Equal(in[8:11], []byte{0x01, 0x00, 0x00}) ||
bytes.Equal(in[8:11], []byte{0x02, 0x00, 0x02}))
}

View File

@@ -1,44 +0,0 @@
package matchers
import (
"bytes"
"encoding/binary"
)
// Shp matches a shape format file.
// https://www.esri.com/library/whitepapers/pdfs/shapefile.pdf
func Shp(in []byte) bool {
if len(in) < 112 {
return false
}
shapeTypes := []int{
0, // Null shape
1, // Point
3, // Polyline
5, // Polygon
8, // MultiPoint
11, // PointZ
13, // PolylineZ
15, // PolygonZ
18, // MultiPointZ
21, // PointM
23, // PolylineM
25, // PolygonM
28, // MultiPointM
31, // MultiPatch
}
for _, st := range shapeTypes {
if st == int(binary.LittleEndian.Uint32(in[108:112])) {
return true
}
}
return false
}
// Shx matches a shape index format file.
// https://www.esri.com/library/whitepapers/pdfs/shapefile.pdf
func Shx(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x00, 0x00, 0x27, 0x0A})
}

View File

@@ -1,164 +0,0 @@
package matchers
import "bytes"
// Png matches a Portable Network Graphics file.
func Png(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A})
}
// Jpg matches a Joint Photographic Experts Group file.
func Jpg(in []byte) bool {
return bytes.HasPrefix(in, []byte{0xFF, 0xD8, 0xFF})
}
// isJpeg2k matches a generic JPEG2000 file.
func isJpeg2k(in []byte) bool {
if len(in) < 24 {
return false
}
signature := in[4:8]
return bytes.Equal(signature, []byte{0x6A, 0x50, 0x20, 0x20}) ||
bytes.Equal(signature, []byte{0x6A, 0x50, 0x32, 0x20})
}
// Jp2 matches a JPEG 2000 Image file (ISO 15444-1).
func Jp2(in []byte) bool {
return isJpeg2k(in) && bytes.Equal(in[20:24], []byte{0x6a, 0x70, 0x32, 0x20})
}
// Jpx matches a JPEG 2000 Image file (ISO 15444-2).
func Jpx(in []byte) bool {
return isJpeg2k(in) && bytes.Equal(in[20:24], []byte{0x6a, 0x70, 0x78, 0x20})
}
// Jpm matches a JPEG 2000 Image file (ISO 15444-6).
func Jpm(in []byte) bool {
return isJpeg2k(in) && bytes.Equal(in[20:24], []byte{0x6a, 0x70, 0x6D, 0x20})
}
// Gif matches a Graphics Interchange Format file.
func Gif(in []byte) bool {
return bytes.HasPrefix(in, []byte("GIF87a")) ||
bytes.HasPrefix(in, []byte("GIF89a"))
}
// Webp matches a WebP file.
func Webp(in []byte) bool {
return len(in) > 12 &&
bytes.Equal(in[0:4], []byte("RIFF")) &&
bytes.Equal(in[8:12], []byte{0x57, 0x45, 0x42, 0x50})
}
// Bmp matches a bitmap image file.
func Bmp(in []byte) bool {
return len(in) > 1 && in[0] == 0x42 && in[1] == 0x4D
}
// Ps matches a PostScript file.
func Ps(in []byte) bool {
return bytes.HasPrefix(in, []byte("%!PS-Adobe-"))
}
// Psd matches a Photoshop Document file.
func Psd(in []byte) bool {
return bytes.HasPrefix(in, []byte("8BPS"))
}
// Ico matches an ICO file.
func Ico(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x00, 0x00, 0x01, 0x00})
}
// Icns matches an ICNS (Apple Icon Image format) file.
func Icns(in []byte) bool {
return bytes.HasPrefix(in, []byte("icns"))
}
// Tiff matches a Tagged Image File Format file.
func Tiff(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x49, 0x49, 0x2A, 0x00}) ||
bytes.HasPrefix(in, []byte{0x4D, 0x4D, 0x00, 0x2A})
}
// Bpg matches a Better Portable Graphics file.
func Bpg(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x42, 0x50, 0x47, 0xFB})
}
// Dwg matches a CAD drawing file.
func Dwg(in []byte) bool {
if len(in) < 6 || in[0] != 0x41 || in[1] != 0x43 {
return false
}
dwgVersions := [][]byte{
{0x31, 0x2E, 0x34, 0x30},
{0x31, 0x2E, 0x35, 0x30},
{0x32, 0x2E, 0x31, 0x30},
{0x31, 0x30, 0x30, 0x32},
{0x31, 0x30, 0x30, 0x33},
{0x31, 0x30, 0x30, 0x34},
{0x31, 0x30, 0x30, 0x36},
{0x31, 0x30, 0x30, 0x39},
{0x31, 0x30, 0x31, 0x32},
{0x31, 0x30, 0x31, 0x34},
{0x31, 0x30, 0x31, 0x35},
{0x31, 0x30, 0x31, 0x38},
{0x31, 0x30, 0x32, 0x31},
{0x31, 0x30, 0x32, 0x34},
{0x31, 0x30, 0x33, 0x32},
}
for _, d := range dwgVersions {
if bytes.Equal(in[2:6], d) {
return true
}
}
return false
}
// Heic matches a High Efficiency Image Coding (HEIC) file.
func Heic(in []byte) bool {
if len(in) <= 12 {
return false
}
return bytes.Equal(in[4:12], []byte("ftypheic")) ||
bytes.Equal(in[4:12], []byte("ftypheix"))
}
// HeicSequence matches a High Efficiency Image Coding (HEIC) file sequence.
func HeicSequence(in []byte) bool {
if len(in) <= 12 {
return false
}
return bytes.Equal(in[4:12], []byte("ftyphevc")) ||
bytes.Equal(in[4:12], []byte("ftyphevx"))
}
// Heif matches a High Efficiency Image File Format (HEIF) file.
func Heif(in []byte) bool {
if len(in) <= 12 {
return false
}
return bytes.Equal(in[4:12], []byte("ftypmif1")) ||
bytes.Equal(in[4:12], []byte("ftypheim")) ||
bytes.Equal(in[4:12], []byte("ftypheis")) ||
bytes.Equal(in[4:12], []byte("ftypavic"))
}
// HeifSequence matches a High Efficiency Image File Format (HEIF) file sequence.
func HeifSequence(in []byte) bool {
if len(in) <= 12 {
return false
}
return bytes.Equal(in[4:12], []byte("ftypmsf1")) ||
bytes.Equal(in[4:12], []byte("ftyphevm")) ||
bytes.Equal(in[4:12], []byte("ftyphevs")) ||
bytes.Equal(in[4:12], []byte("ftypavcs"))
}

View File

@@ -1,48 +0,0 @@
// Package matchers holds the matching functions used to find MIME types.
package matchers
// ReadLimit is the maximum number of bytes read
// from the input when detecting a reader.
const ReadLimit = 3072
// True is a dummy matching function used to match any input.
func True([]byte) bool {
return true
}
// trimLWS trims whitespace from beginning of the input.
func trimLWS(in []byte) []byte {
firstNonWS := 0
for ; firstNonWS < len(in) && isWS(in[firstNonWS]); firstNonWS++ {
}
return in[firstNonWS:]
}
// trimRWS trims whitespace from the end of the input.
func trimRWS(in []byte) []byte {
lastNonWS := len(in) - 1
for ; lastNonWS > 0 && isWS(in[lastNonWS]); lastNonWS-- {
}
return in[:lastNonWS+1]
}
func firstLine(in []byte) []byte {
lineEnd := 0
for ; lineEnd < len(in) && in[lineEnd] != '\n'; lineEnd++ {
}
return in[:lineEnd]
}
func isWS(b byte) bool {
return b == '\t' || b == '\n' || b == '\x0c' || b == '\r' || b == ' '
}
func min(a, b int) int {
if a < b {
return a
}
return b
}

View File

@@ -1,154 +0,0 @@
package matchers
import (
"bytes"
"encoding/binary"
"regexp"
)
var msoXMLreg = regexp.MustCompile("\\[Content_Types\\]\\.xml|_rels/\\.rels|docProps")
// msoXML walks through the first 4 zip local file headers and returns whether
// any of the headers contain a file whose name starts with sig.
func msoXML(in, sig []byte) bool {
pkSig := []byte("PK\003\004")
if !msoXMLreg.Match(in[:min(len(in), 8000)]) {
return false
}
// 30 is the offset where the file name is located in each zip header.
lastCheckedIndex := 0
check := func(in, sig []byte, offset int) bool {
return len(in) > offset && bytes.HasPrefix(in[offset:], sig)
}
// github.com/file/file looks for the msoXML signature in the first 4 local
// headers, but some xlsx files have their signature in later headers.
// testdata/xlsx.1.xlsx is such an example, with the signature in the 5th header.
for i := 0; i < 6 && lastCheckedIndex < len(in); i++ {
in = in[lastCheckedIndex:]
pkIndex := bytes.Index(in, pkSig)
if pkIndex == -1 {
return false
}
if check(in, sig, pkIndex+30) {
return true
}
lastCheckedIndex = pkIndex + 30
}
return false
}
// Xlsx matches a Microsoft Excel 2007 file.
func Xlsx(in []byte) bool {
return msoXML(in, []byte("xl/"))
}
// Docx matches a Microsoft Office 2007 file.
func Docx(in []byte) bool {
return msoXML(in, []byte("word/"))
}
// Pptx matches a Microsoft PowerPoint 2007 file.
func Pptx(in []byte) bool {
return msoXML(in, []byte("ppt/"))
}
// Ole matches an Open Linking and Embedding file.
//
// https://en.wikipedia.org/wiki/Object_Linking_and_Embedding
func Ole(in []byte) bool {
return bytes.HasPrefix(in, []byte{0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1})
}
// Doc matches a Microsoft Office 97-2003 file.
//
// BUG(gabriel-vasile): Doc should look for subheaders like Ppt and Xls does.
//
// Ole is a container for Doc, Ppt, Pub and Xls.
// Right now, when an Ole file is detected, it is considered to be a Doc file
// if the checks for Ppt, Pub and Xls failed.
func Doc(in []byte) bool {
return true
}
// Ppt matches a Microsoft PowerPoint 97-2003 file.
func Ppt(in []byte) bool {
if len(in) < 520 {
return false
}
pptSubHeaders := [][]byte{
{0xA0, 0x46, 0x1D, 0xF0},
{0x00, 0x6E, 0x1E, 0xF0},
{0x0F, 0x00, 0xE8, 0x03},
}
for _, h := range pptSubHeaders {
if bytes.HasPrefix(in[512:], h) {
return true
}
}
if bytes.HasPrefix(in[512:], []byte{0xFD, 0xFF, 0xFF, 0xFF}) &&
in[518] == 0x00 && in[519] == 0x00 {
return true
}
return bytes.Contains(in, []byte("MS PowerPoint 97")) ||
bytes.Contains(in, []byte("P\x00o\x00w\x00e\x00r\x00P\x00o\x00i\x00n\x00t\x00 D\x00o\x00c\x00u\x00m\x00e\x00n\x00t"))
}
// Xls matches a Microsoft Excel 97-2003 file.
func Xls(in []byte) bool {
if len(in) <= 512 {
return false
}
xlsSubHeaders := [][]byte{
{0x09, 0x08, 0x10, 0x00, 0x00, 0x06, 0x05, 0x00},
{0xFD, 0xFF, 0xFF, 0xFF, 0x10},
{0xFD, 0xFF, 0xFF, 0xFF, 0x1F},
{0xFD, 0xFF, 0xFF, 0xFF, 0x22},
{0xFD, 0xFF, 0xFF, 0xFF, 0x23},
{0xFD, 0xFF, 0xFF, 0xFF, 0x28},
{0xFD, 0xFF, 0xFF, 0xFF, 0x29},
}
for _, h := range xlsSubHeaders {
if bytes.HasPrefix(in[512:], h) {
return true
}
}
return bytes.Contains(in, []byte("Microsoft Excel")) ||
bytes.Contains(in, []byte("W\x00o\x00r\x00k\x00b\x00o\x00o\x00k"))
}
// Pub matches a Microsoft Publisher file.
func Pub(in []byte) bool {
return matchOleClsid(in, []byte{
0x01, 0x12, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46,
})
}
// Helper to match by a specific CLSID of a compound file
//
// http://fileformats.archiveteam.org/wiki/Microsoft_Compound_File
func matchOleClsid(in []byte, clsid []byte) bool {
if len(in) <= 512 {
return false
}
// SecID of first sector of the directory stream
firstSecID := int(binary.LittleEndian.Uint32(in[48:52]))
// Expected offset of CLSID for root storage object
clsidOffset := 512*(1+firstSecID) + 80
if len(in) <= clsidOffset+16 {
return false
}
return bytes.HasPrefix(in[clsidOffset:], clsid)
}

View File

@@ -1,48 +0,0 @@
package matchers
import "bytes"
// Odt matches an OpenDocument Text file.
func Odt(in []byte) bool {
return bytes.Contains(in, []byte("mimetypeapplication/vnd.oasis.opendocument.text"))
}
// Ott matches an OpenDocument Text Template file.
func Ott(in []byte) bool {
return bytes.Contains(in, []byte("mimetypeapplication/vnd.oasis.opendocument.text-template"))
}
// Ods matches an OpenDocument Spreadsheet file.
func Ods(in []byte) bool {
return bytes.Contains(in, []byte("mimetypeapplication/vnd.oasis.opendocument.spreadsheet"))
}
// Ots matches an OpenDocument Spreadsheet Template file.
func Ots(in []byte) bool {
return bytes.Contains(in, []byte("mimetypeapplication/vnd.oasis.opendocument.spreadsheet-template"))
}
// Odp matches an OpenDocument Presentation file.
func Odp(in []byte) bool {
return bytes.Contains(in, []byte("mimetypeapplication/vnd.oasis.opendocument.presentation"))
}
// Otp matches an OpenDocument Presentation Template file.
func Otp(in []byte) bool {
return bytes.Contains(in, []byte("mimetypeapplication/vnd.oasis.opendocument.presentation-template"))
}
// Odg matches an OpenDocument Drawing file.
func Odg(in []byte) bool {
return bytes.Contains(in, []byte("mimetypeapplication/vnd.oasis.opendocument.graphics"))
}
// Otg matches an OpenDocument Drawing Template file.
func Otg(in []byte) bool {
return bytes.Contains(in, []byte("mimetypeapplication/vnd.oasis.opendocument.graphics-template"))
}
// Odf matches an OpenDocument Formula file.
func Odf(in []byte) bool {
return bytes.Contains(in, []byte("mimetypeapplication/vnd.oasis.opendocument.formula"))
}

View File

@@ -1,42 +0,0 @@
package matchers
import (
"bytes"
)
/*
NOTE:
In May 2003, two Internet RFCs were published relating to the format.
The Ogg bitstream was defined in RFC 3533 (which is classified as
'informative') and its Internet content type (application/ogg) in RFC
3534 (which is, as of 2006, a proposed standard protocol). In
September 2008, RFC 3534 was obsoleted by RFC 5334, which added
content types video/ogg, audio/ogg and filename extensions .ogx, .ogv,
.oga, .spx.
See:
https://tools.ietf.org/html/rfc3533
https://developer.mozilla.org/en-US/docs/Web/HTTP/Configuring_servers_for_Ogg_media#Serve_media_with_the_correct_MIME_type
https://github.com/file/file/blob/master/magic/Magdir/vorbis
*/
// Ogg matches an Ogg file.
func Ogg(in []byte) bool {
return bytes.HasPrefix(in, []byte("\x4F\x67\x67\x53\x00"))
}
// OggAudio matches an audio ogg file.
func OggAudio(in []byte) bool {
return len(in) >= 37 && (bytes.HasPrefix(in[28:], []byte("\x7fFLAC")) ||
bytes.HasPrefix(in[28:], []byte("\x01vorbis")) ||
bytes.HasPrefix(in[28:], []byte("OpusHead")) ||
bytes.HasPrefix(in[28:], []byte("Speex\x20\x20\x20")))
}
// OggVideo matches a video ogg file.
func OggVideo(in []byte) bool {
return len(in) >= 37 && (bytes.HasPrefix(in[28:], []byte("\x80theora")) ||
bytes.HasPrefix(in[28:], []byte("fishead\x00")) ||
bytes.HasPrefix(in[28:], []byte("\x01video\x00\x00\x00"))) // OGM video
}

View File

@@ -1,130 +0,0 @@
package matchers
import (
"bytes"
"fmt"
)
type (
markupSig []byte
ciSig []byte // case insensitive signature
shebangSig []byte // matches !# followed by the signature
ftypSig []byte // matches audio/video files. www.ftyps.com
xmlSig struct {
// the local name of the root tag
localName []byte
// the namespace of the XML document
xmlns []byte
}
sig interface {
detect([]byte) bool
}
)
func newXmlSig(localName, xmlns string) xmlSig {
ret := xmlSig{xmlns: []byte(xmlns)}
if localName != "" {
ret.localName = []byte(fmt.Sprintf("<%s", localName))
}
return ret
}
// Implement sig interface.
func (hSig markupSig) detect(in []byte) bool {
if len(in) < len(hSig)+1 {
return false
}
// perform case insensitive check
for i, b := range hSig {
db := in[i]
if 'A' <= b && b <= 'Z' {
db &= 0xDF
}
if b != db {
return false
}
}
// Next byte must be space or right angle bracket.
if db := in[len(hSig)]; db != ' ' && db != '>' {
return false
}
return true
}
// Implement sig interface.
func (tSig ciSig) detect(in []byte) bool {
if len(in) < len(tSig)+1 {
return false
}
// perform case insensitive check
for i, b := range tSig {
db := in[i]
if 'A' <= b && b <= 'Z' {
db &= 0xDF
}
if b != db {
return false
}
}
return true
}
// a valid shebang starts with the "#!" characters
// followed by any number of spaces
// followed by the path to the interpreter and optionally, the args for the interpreter
func (sSig shebangSig) detect(in []byte) bool {
in = firstLine(in)
if len(in) < len(sSig)+2 {
return false
}
if in[0] != '#' || in[1] != '!' {
return false
}
in = trimLWS(trimRWS(in[2:]))
return bytes.Equal(in, sSig)
}
// Implement sig interface.
func (fSig ftypSig) detect(in []byte) bool {
return len(in) > 12 &&
bytes.Equal(in[4:8], []byte("ftyp")) &&
bytes.Equal(in[8:12], fSig)
}
// Implement sig interface.
func (xSig xmlSig) detect(in []byte) bool {
l := 512
if len(in) < l {
l = len(in)
}
in = in[:l]
if len(xSig.localName) == 0 {
return bytes.Index(in, xSig.xmlns) > 0
}
if len(xSig.xmlns) == 0 {
return bytes.Index(in, xSig.localName) > 0
}
localNameIndex := bytes.Index(in, xSig.localName)
return localNameIndex != -1 && localNameIndex < bytes.Index(in, xSig.xmlns)
}
// detect returns true if any of the provided signatures pass for in input.
func detect(in []byte, sigs []sig) bool {
for _, sig := range sigs {
if sig.detect(in) {
return true
}
}
return false
}

View File

@@ -1,389 +0,0 @@
package matchers
import (
"bytes"
"github.com/gabriel-vasile/mimetype/internal/json"
)
var (
htmlSigs = []sig{
markupSig("<!DOCTYPE HTML"),
markupSig("<HTML"),
markupSig("<HEAD"),
markupSig("<SCRIPT"),
markupSig("<IFRAME"),
markupSig("<H1"),
markupSig("<DIV"),
markupSig("<FONT"),
markupSig("<TABLE"),
markupSig("<A"),
markupSig("<STYLE"),
markupSig("<TITLE"),
markupSig("<B"),
markupSig("<BODY"),
markupSig("<BR"),
markupSig("<P"),
markupSig("<!--"),
}
xmlSigs = []sig{
markupSig("<?XML"),
}
rssSigs = []sig{
newXmlSig("rss", ""),
}
atomSigs = []sig{
newXmlSig("feed", `xmlns="http://www.w3.org/2005/Atom"`),
}
kmlSigs = []sig{
newXmlSig("kml", `xmlns="http://www.opengis.net/kml/2.2"`),
newXmlSig("kml", `xmlns="http://earth.google.com/kml/2.0"`),
newXmlSig("kml", `xmlns="http://earth.google.com/kml/2.1"`),
newXmlSig("kml", `xmlns="http://earth.google.com/kml/2.2"`),
}
xliffSigs = []sig{
newXmlSig("xliff", `xmlns="urn:oasis:names:tc:xliff:document:1.2"`),
}
colladaSigs = []sig{
newXmlSig("COLLADA", `xmlns="http://www.collada.org/2005/11/COLLADASchema"`),
}
gmlSigs = []sig{
newXmlSig("", `xmlns:gml="http://www.opengis.net/gml"`),
newXmlSig("", `xmlns:gml="http://www.opengis.net/gml/3.2"`),
newXmlSig("", `xmlns:gml="http://www.opengis.net/gml/3.3/exr"`),
}
gpxSigs = []sig{
newXmlSig("gpx", `xmlns="http://www.topografix.com/GPX/1/1"`),
}
tcxSigs = []sig{
newXmlSig("TrainingCenterDatabase", `xmlns="http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2"`),
}
x3dSigs = []sig{
newXmlSig("X3D", `xmlns:xsd="http://www.w3.org/2001/XMLSchema-instance"`),
}
amfSigs = []sig{
newXmlSig("amf", ""),
}
threemfSigs = []sig{
newXmlSig("model", `xmlns="http://schemas.microsoft.com/3dmanufacturing/core/2015/02"`),
}
vCardSigs = []sig{
ciSig("BEGIN:VCARD\n"),
ciSig("BEGIN:VCARD\r\n"),
}
iCalSigs = []sig{
ciSig("BEGIN:VCALENDAR\n"),
ciSig("BEGIN:VCALENDAR\r\n"),
}
phpSigs = []sig{
ciSig("<?PHP"),
ciSig("<?\n"),
ciSig("<?\r"),
ciSig("<? "),
shebangSig("/usr/local/bin/php"),
shebangSig("/usr/bin/php"),
shebangSig("/usr/bin/env php"),
}
jsSigs = []sig{
shebangSig("/bin/node"),
shebangSig("/usr/bin/node"),
shebangSig("/bin/nodejs"),
shebangSig("/usr/bin/nodejs"),
shebangSig("/usr/bin/env node"),
shebangSig("/usr/bin/env nodejs"),
}
luaSigs = []sig{
shebangSig("/usr/bin/lua"),
shebangSig("/usr/local/bin/lua"),
shebangSig("/usr/bin/env lua"),
}
perlSigs = []sig{
shebangSig("/usr/bin/perl"),
shebangSig("/usr/bin/env perl"),
}
pythonSigs = []sig{
shebangSig("/usr/bin/python"),
shebangSig("/usr/local/bin/python"),
shebangSig("/usr/bin/env python"),
}
tclSigs = []sig{
shebangSig("/usr/bin/tcl"),
shebangSig("/usr/local/bin/tcl"),
shebangSig("/usr/bin/env tcl"),
shebangSig("/usr/bin/tclsh"),
shebangSig("/usr/local/bin/tclsh"),
shebangSig("/usr/bin/env tclsh"),
shebangSig("/usr/bin/wish"),
shebangSig("/usr/local/bin/wish"),
shebangSig("/usr/bin/env wish"),
}
)
// Utf32be matches a text file encoded with UTF-32 and with the characters
// represented in big endian.
func Utf32be(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x00, 0x00, 0xFE, 0xFF})
}
// Utf32le matches a text file encoded with UTF-32 and with the characters
// represented in little endian.
func Utf32le(in []byte) bool {
return bytes.HasPrefix(in, []byte{0xFF, 0xFE, 0x00, 0x00})
}
// Utf16be matches a text file encoded with UTF-16 and with the characters
// represented in big endian.
func Utf16be(in []byte) bool {
return bytes.HasPrefix(in, []byte{0xFE, 0xFF})
}
// Utf16le matches a text file encoded with UTF-16 and with the characters
// represented in little endian.
func Utf16le(in []byte) bool {
return bytes.HasPrefix(in, []byte{0xFF, 0xFE})
}
// Utf8 matches a UTF-8 text file.
func Utf8(in []byte) bool {
in = trimLWS(in)
for _, b := range in {
if b <= 0x08 ||
b == 0x0B ||
0x0E <= b && b <= 0x1A ||
0x1C <= b && b <= 0x1F {
return false
}
}
return true
}
// Html matches a Hypertext Markup Language file.
func Html(in []byte) bool {
in = trimLWS(in)
if len(in) == 0 {
return false
}
return detect(in, htmlSigs)
}
// Xml matches an Extensible Markup Language file.
func Xml(in []byte) bool {
in = trimLWS(in)
if len(in) == 0 {
return false
}
return detect(in, xmlSigs)
}
// Php matches a PHP: Hypertext Preprocessor file.
func Php(in []byte) bool {
return detect(in, phpSigs)
}
// Json matches a JavaScript Object Notation file.
func Json(in []byte) bool {
parsed, err := json.Scan(in)
if len(in) < ReadLimit {
return err == nil
}
return parsed == len(in)
}
// GeoJson matches a RFC 7946 GeoJSON file.
//
// BUG(gabriel-vasile): The "type" key should be searched for in the root object.
func GeoJson(in []byte) bool {
in = trimLWS(in)
if len(in) == 0 {
return false
}
// geojson is always an object
if in[0] != '{' {
return false
}
s := []byte(`"type"`)
si := bytes.Index(in, s)
sl := len(s)
if si == -1 {
return false
}
// if the "type" string is the suffix of the input
// there is no need to search for the value of the key
if si+sl == len(in) {
return false
}
// skip the "type" part
in = in[si+sl:]
// skip any whitespace before the colon
in = trimLWS(in)
// skip any whitesapce after the colon
// not checking if char is colon because json matcher already did check
in = trimLWS(in[1:])
geoJsonTypes := [][]byte{
[]byte(`"Feature"`),
[]byte(`"FeatureCollection"`),
[]byte(`"Point"`),
[]byte(`"LineString"`),
[]byte(`"Polygon"`),
[]byte(`"MultiPoint"`),
[]byte(`"MultiLineString"`),
[]byte(`"MultiPolygon"`),
[]byte(`"GeometryCollection"`),
}
for _, t := range geoJsonTypes {
if bytes.HasPrefix(in, t) {
return true
}
}
return false
}
// NdJson matches a Newline delimited JSON file.
func NdJson(in []byte) bool {
// Separator with carriage return and new line `\r\n`
srn := []byte{0x0D, 0x0A}
// Separator with only new line `\n`
sn := []byte{0x0A}
// total bytes scanned
parsed := 0
// Split by `srn`
for rni, insrn := range bytes.Split(in, srn) {
// separator byte count should be added only after the first split
if rni != 0 {
// Add two as `\r\n` is used for split
parsed += 2
}
// Return false if there is a carriage return `\r`
if bytes.Contains(insrn, []byte{0x0D}) {
return false
}
// Split again by `sn`
for ni, insn := range bytes.Split(insrn, sn) {
// separator byte count should be added only after the first split
if ni != 0 {
// Add one as `\n` is used for split
parsed++
}
// Empty line is valid
if len(insn) == 0 {
continue
}
p, err := json.Scan(insn)
parsed += p
if parsed < ReadLimit && err != nil {
return false
}
}
}
return parsed == len(in)
}
// Js matches a Javascript file.
func Js(in []byte) bool {
return detect(in, jsSigs)
}
// Lua matches a Lua programming language file.
func Lua(in []byte) bool {
return detect(in, luaSigs)
}
// Perl matches a Perl programming language file.
func Perl(in []byte) bool {
return detect(in, perlSigs)
}
// Python matches a Python programming language file.
func Python(in []byte) bool {
return detect(in, pythonSigs)
}
// Tcl matches a Tcl programming language file.
func Tcl(in []byte) bool {
return detect(in, tclSigs)
}
// Rtf matches a Rich Text Format file.
func Rtf(in []byte) bool {
return bytes.HasPrefix(in, []byte("{\\rtf1"))
}
// Svg matches a SVG file.
func Svg(in []byte) bool {
return bytes.Contains(in, []byte("<svg"))
}
// Rss matches a Rich Site Summary file.
func Rss(in []byte) bool {
return detect(in, rssSigs)
}
// Atom matches an Atom Syndication Format file.
func Atom(in []byte) bool {
return detect(in, atomSigs)
}
// Kml matches a Keyhole Markup Language file.
func Kml(in []byte) bool {
return detect(in, kmlSigs)
}
// Xliff matches a XML Localization Interchange File Format file.
func Xliff(in []byte) bool {
return detect(in, xliffSigs)
}
// Collada matches a COLLAborative Design Activity file.
func Collada(in []byte) bool {
return detect(in, colladaSigs)
}
// Gml matches a Geography Markup Language file.
func Gml(in []byte) bool {
return detect(in, gmlSigs)
}
// Gpx matches a GPS Exchange Format file.
func Gpx(in []byte) bool {
return detect(in, gpxSigs)
}
// Tcx matches a Training Center XML file.
func Tcx(in []byte) bool {
return detect(in, tcxSigs)
}
// Amf matches an Additive Manufacturing XML file.
func Amf(in []byte) bool {
return detect(in, amfSigs)
}
// Threemf matches a 3D Manufacturing Format file.
func Threemf(in []byte) bool {
return detect(in, threemfSigs)
}
// X3d matches an Extensible 3D Graphics file.
func X3d(in []byte) bool {
return detect(in, x3dSigs)
}
// VCard matches a Virtual Contact File.
func VCard(in []byte) bool {
return detect(in, vCardSigs)
}
// ICalendar matches a iCalendar file.
func ICalendar(in []byte) bool {
return detect(in, iCalSigs)
}

View File

@@ -1,46 +0,0 @@
package matchers
import (
"bytes"
"encoding/csv"
"io"
)
// Csv matches a comma-separated values file.
func Csv(in []byte) bool {
return sv(in, ',')
}
// Tsv matches a tab-separated values file.
func Tsv(in []byte) bool {
return sv(in, '\t')
}
func sv(in []byte, comma rune) bool {
r := csv.NewReader(butLastLineReader(in, ReadLimit))
r.Comma = comma
r.TrimLeadingSpace = true
r.LazyQuotes = true
r.Comment = '#'
lines, err := r.ReadAll()
return err == nil && r.FieldsPerRecord > 1 && len(lines) > 1
}
// butLastLineReader returns a reader to the provided byte slice.
// the reader is guaranteed to reach EOF before it reads `cutAt` bytes.
// bytes after the last newline are dropped from the input.
func butLastLineReader(in []byte, cutAt int) io.Reader {
if len(in) >= cutAt {
for i := cutAt - 1; i > 0; i-- {
if in[i] == '\n' {
return bytes.NewReader(in[:i])
}
}
// no newline was found between the 0 index and cutAt
return bytes.NewReader(in[:cutAt])
}
return bytes.NewReader(in)
}

View File

@@ -1,71 +0,0 @@
package matchers
import (
"bytes"
)
// WebM matches a WebM file.
func WebM(in []byte) bool {
return isMatroskaFileTypeMatched(in, "webm")
}
// Mkv matches a mkv file.
func Mkv(in []byte) bool {
return isMatroskaFileTypeMatched(in, "matroska")
}
// isMatroskaFileTypeMatched is used for webm and mkv file matching.
// It checks for .Eߣ sequence. If the sequence is found,
// then it means it is Matroska media container, including WebM.
// Then it verifies which of the file type it is representing by matching the
// file specific string.
func isMatroskaFileTypeMatched(in []byte, flType string) bool {
if bytes.HasPrefix(in, []byte("\x1A\x45\xDF\xA3")) {
return isFileTypeNamePresent(in, flType)
}
return false
}
// isFileTypeNamePresent accepts the matroska input data stream and searches
// for the given file type in the stream. Return whether a match is found.
// The logic of search is: find first instance of \x42\x82 and then
// search for given string after one byte of above instance.
func isFileTypeNamePresent(in []byte, flType string) bool {
ind, maxInd, lenIn := 0, 4096, len(in)
if lenIn < maxInd { // restricting length to 4096
maxInd = lenIn
}
ind = bytes.Index(in[:maxInd], []byte("\x42\x82"))
if ind > 0 && lenIn > ind+3 {
// filetype name will be present exactly
// one byte after the match of the two bytes "\x42\x82"
return bytes.HasPrefix(in[ind+3:], []byte(flType))
}
return false
}
// Flv matches a Flash video file.
func Flv(in []byte) bool {
return bytes.HasPrefix(in, []byte("\x46\x4C\x56\x01"))
}
// Mpeg matches a Moving Picture Experts Group file.
func Mpeg(in []byte) bool {
return len(in) > 3 && bytes.HasPrefix(in, []byte{0x00, 0x00, 0x01}) &&
in[3] >= 0xB0 && in[3] <= 0xBF
}
// Avi matches an Audio Video Interleaved file.
func Avi(in []byte) bool {
return len(in) > 16 &&
bytes.Equal(in[:4], []byte("RIFF")) &&
bytes.Equal(in[8:16], []byte("AVI LIST"))
}
// Asf matches an Advanced Systems Format file.
func Asf(in []byte) bool {
return bytes.HasPrefix(in, []byte{
0x30, 0x26, 0xB2, 0x75, 0x8E, 0x66, 0xCF, 0x11,
0xA6, 0xD9, 0x00, 0xAA, 0x00, 0x62, 0xCE, 0x6C,
})
}

View File

@@ -1,71 +0,0 @@
package matchers
var (
mp4Sigs = []sig{
ftypSig("avc1"), ftypSig("dash"), ftypSig("iso2"), ftypSig("iso3"),
ftypSig("iso4"), ftypSig("iso5"), ftypSig("iso6"), ftypSig("isom"),
ftypSig("mmp4"), ftypSig("mp41"), ftypSig("mp42"), ftypSig("mp4v"),
ftypSig("mp71"), ftypSig("MSNV"), ftypSig("NDAS"), ftypSig("NDSC"),
ftypSig("NSDC"), ftypSig("NSDH"), ftypSig("NDSM"), ftypSig("NDSP"),
ftypSig("NDSS"), ftypSig("NDXC"), ftypSig("NDXH"), ftypSig("NDXM"),
ftypSig("NDXP"), ftypSig("NDXS"), ftypSig("F4V "), ftypSig("F4P "),
}
threeGPSigs = []sig{
ftypSig("3gp1"), ftypSig("3gp2"), ftypSig("3gp3"), ftypSig("3gp4"),
ftypSig("3gp5"), ftypSig("3gp6"), ftypSig("3gp7"), ftypSig("3gs7"),
ftypSig("3ge6"), ftypSig("3ge7"), ftypSig("3gg6"),
}
threeG2Sigs = []sig{
ftypSig("3g24"), ftypSig("3g25"), ftypSig("3g26"), ftypSig("3g2a"),
ftypSig("3g2b"), ftypSig("3g2c"), ftypSig("KDDI"),
}
amp4Sigs = []sig{
// audio for Adobe Flash Player 9+
ftypSig("F4A "), ftypSig("F4B "),
// Apple iTunes AAC-LC (.M4A) Audio
ftypSig("M4B "), ftypSig("M4P "),
// MPEG-4 (.MP4) for SonyPSP
ftypSig("MSNV"),
// Nero Digital AAC Audio
ftypSig("NDAS"),
}
qtSigs = []sig{ftypSig("qt "), ftypSig("moov")}
mqvSigs = []sig{ftypSig("mqt ")}
m4aSigs = []sig{ftypSig("M4A ")}
// TODO: add support for remaining video formats at ftyps.com.
)
// Mp4 matches an MP4 file.
func Mp4(in []byte) bool {
return detect(in, mp4Sigs)
}
// ThreeGP matches a 3GPP file.
func ThreeGP(in []byte) bool {
return detect(in, threeGPSigs)
}
// ThreeG2 matches a 3GPP2 file.
func ThreeG2(in []byte) bool {
return detect(in, threeG2Sigs)
}
// AMp4 matches an audio MP4 file.
func AMp4(in []byte) bool {
return detect(in, amp4Sigs)
}
// QuickTime matches a QuickTime File Format file.
func QuickTime(in []byte) bool {
return detect(in, qtSigs)
}
// Mqv matches a Sony / Mobile QuickTime file.
func Mqv(in []byte) bool {
return detect(in, mqvSigs)
}
// M4a matches an audio M4A file.
func M4a(in []byte) bool {
return detect(in, m4aSigs)
}

View File

@@ -1,94 +0,0 @@
package mimetype
import "mime"
// MIME represents a file format in the tree structure of formats.
type MIME struct {
mime string
aliases []string
extension string
matchFunc func([]byte) bool
children []*MIME
parent *MIME
}
// String returns the string representation of the MIME type, e.g., "application/zip".
func (n *MIME) String() string {
return n.mime
}
// Extension returns the file extension associated with the MIME type.
// It includes the leading dot, as in ".html". When the file format does not
// have an extension, the empty string is returned.
func (n *MIME) Extension() string {
return n.extension
}
// Parent returns the parent MIME type from the tree structure.
// Each MIME type has a non-nil parent, except for the root MIME type.
func (n *MIME) Parent() *MIME {
return n.parent
}
// Is checks whether this MIME type, or any of its aliases, is equal to the
// expected MIME type. MIME type equality test is done on the "type/subtype"
// sections, ignores any optional MIME parameters, ignores any leading and
// trailing whitespace, and is case insensitive.
func (n *MIME) Is(expectedMIME string) bool {
// Parsing is needed because some detected MIME types contain parameters
// that need to be stripped for the comparison.
expectedMIME, _, _ = mime.ParseMediaType(expectedMIME)
found, _, _ := mime.ParseMediaType(n.mime)
if expectedMIME == found {
return true
}
for _, alias := range n.aliases {
if alias == expectedMIME {
return true
}
}
return false
}
func newMIME(mime, extension string, matchFunc func([]byte) bool, children ...*MIME) *MIME {
n := &MIME{
mime: mime,
extension: extension,
matchFunc: matchFunc,
children: children,
}
for _, c := range children {
c.parent = n
}
return n
}
func (n *MIME) alias(aliases ...string) *MIME {
n.aliases = aliases
return n
}
// match does a depth-first search on the matchers tree.
// it returns the deepest successful matcher for which all the children fail.
func (n *MIME) match(in []byte, deepestMatch *MIME) *MIME {
for _, c := range n.children {
if c.matchFunc(in) {
return c.match(in, c)
}
}
return deepestMatch
}
func (n *MIME) flatten() []*MIME {
out := []*MIME{n}
for _, c := range n.children {
out = append(out, c.flatten()...)
}
return out
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 744 KiB

View File

@@ -1,68 +0,0 @@
// Package mimetype uses magic number signatures to detect the MIME type of a file.
//
// mimetype stores the list of MIME types in a tree structure with
// "application/octet-stream" at the root of the hierarchy. The hierarchy
// approach minimizes the number of checks that need to be done on the input
// and allows for more precise results once the base type of file has been
// identified.
package mimetype
import (
"io"
"os"
"github.com/gabriel-vasile/mimetype/internal/matchers"
)
// Detect returns the MIME type found from the provided byte slice.
//
// The result is always a valid MIME type, with application/octet-stream
// returned when identification failed.
func Detect(in []byte) (mime *MIME) {
if len(in) == 0 {
return newMIME("inode/x-empty", "", matchers.True)
}
return root.match(in, root)
}
// DetectReader returns the MIME type of the provided reader.
//
// The result is always a valid MIME type, with application/octet-stream
// returned when identification failed with or without an error.
// Any error returned is related to the reading from the input reader.
//
// DetectReader assumes the reader offset is at the start. If the input
// is a ReadSeeker you read from before, it should be rewinded before detection:
// reader.Seek(0, io.SeekStart)
//
// To prevent loading entire files into memory, DetectReader reads at most
// matchers.ReadLimit bytes from the reader.
func DetectReader(r io.Reader) (mime *MIME, err error) {
in := make([]byte, matchers.ReadLimit)
n, err := r.Read(in)
if err != nil && err != io.EOF {
return root, err
}
in = in[:n]
return Detect(in), nil
}
// DetectFile returns the MIME type of the provided file.
//
// The result is always a valid MIME type, with application/octet-stream
// returned when identification failed with or without an error.
// Any error returned is related to the opening and reading from the input file.
//
// To prevent loading entire files into memory, DetectFile reads at most
// matchers.ReadLimit bytes from the reader.
func DetectFile(file string) (mime *MIME, err error) {
f, err := os.Open(file)
if err != nil {
return root, err
}
defer f.Close()
return DetectReader(f)
}

View File

@@ -1,146 +0,0 @@
## 141 Supported MIME types
This file is automatically generated when running tests. Do not edit manually.
Extension | MIME type | Aliases
--------- | --------- | -------
**n/a** | application/octet-stream | -
**.7z** | application/x-7z-compressed | -
**.zip** | application/zip | application/x-zip, application/x-zip-compressed
**.xlsx** | application/vnd.openxmlformats-officedocument.spreadsheetml.sheet | -
**.docx** | application/vnd.openxmlformats-officedocument.wordprocessingml.document | -
**.pptx** | application/vnd.openxmlformats-officedocument.presentationml.presentation | -
**.epub** | application/epub+zip | -
**.jar** | application/jar | -
**.odt** | application/vnd.oasis.opendocument.text | application/x-vnd.oasis.opendocument.text
**.ott** | application/vnd.oasis.opendocument.text-template | application/x-vnd.oasis.opendocument.text-template
**.ods** | application/vnd.oasis.opendocument.spreadsheet | application/x-vnd.oasis.opendocument.spreadsheet
**.ots** | application/vnd.oasis.opendocument.spreadsheet-template | application/x-vnd.oasis.opendocument.spreadsheet-template
**.odp** | application/vnd.oasis.opendocument.presentation | application/x-vnd.oasis.opendocument.presentation
**.otp** | application/vnd.oasis.opendocument.presentation-template | application/x-vnd.oasis.opendocument.presentation-template
**.odg** | application/vnd.oasis.opendocument.graphics | application/x-vnd.oasis.opendocument.graphics
**.otg** | application/vnd.oasis.opendocument.graphics-template | application/x-vnd.oasis.opendocument.graphics-template
**.odf** | application/vnd.oasis.opendocument.formula | application/x-vnd.oasis.opendocument.formula
**.pdf** | application/pdf | application/x-pdf
**n/a** | application/x-ole-storage | -
**.xls** | application/vnd.ms-excel | application/msexcel
**.pub** | application/vnd.ms-publisher | -
**.ppt** | application/vnd.ms-powerpoint | application/mspowerpoint
**.doc** | application/msword | application/vnd.ms-word
**.ps** | application/postscript | -
**.psd** | image/vnd.adobe.photoshop | image/x-psd, application/photoshop
**.ogg** | application/ogg | application/x-ogg
**.oga** | audio/ogg | -
**.ogv** | video/ogg | -
**.png** | image/png | -
**.jpg** | image/jpeg | -
**.jp2** | image/jp2 | -
**.jpf** | image/jpx | -
**.jpm** | image/jpm | video/jpm
**.gif** | image/gif | -
**.webp** | image/webp | -
**.exe** | application/vnd.microsoft.portable-executable | -
**n/a** | application/x-elf | -
**n/a** | application/x-object | -
**n/a** | application/x-executable | -
**.so** | application/x-sharedlib | -
**n/a** | application/x-coredump | -
**.a** | application/x-archive | application/x-unix-archive
**.deb** | application/vnd.debian.binary-package | -
**.tar** | application/x-tar | -
**.xar** | application/x-xar | -
**.bz2** | application/x-bzip2 | -
**.fits** | application/fits | -
**.tiff** | image/tiff | -
**.bmp** | image/bmp | image/x-bmp, image/x-ms-bmp
**.ico** | image/x-icon | -
**.mp3** | audio/mpeg | audio/x-mpeg, audio/mp3
**.flac** | audio/flac | -
**.midi** | audio/midi | audio/mid, audio/sp-midi, audio/x-mid, audio/x-midi
**.ape** | audio/ape | -
**.mpc** | audio/musepack | -
**.amr** | audio/amr | audio/amr-nb
**.wav** | audio/wav | audio/x-wav, audio/vnd.wave, audio/wave
**.aiff** | audio/aiff | -
**.au** | audio/basic | -
**.mpeg** | video/mpeg | -
**.mov** | video/quicktime | -
**.mqv** | video/quicktime | -
**.mp4** | video/mp4 | -
**.webm** | video/webm | audio/webm
**.3gp** | video/3gpp | video/3gp, audio/3gpp
**.3g2** | video/3gpp2 | video/3g2, audio/3gpp2
**.avi** | video/x-msvideo | video/avi, video/msvideo
**.flv** | video/x-flv | -
**.mkv** | video/x-matroska | -
**.asf** | video/x-ms-asf | video/asf, video/x-ms-wmv
**.aac** | audio/aac | -
**.voc** | audio/x-unknown | -
**.mp4** | audio/mp4 | audio/x-m4a, audio/x-mp4a
**.m4a** | audio/x-m4a | -
**.txt** | text/plain; charset=utf-32le | -
**.txt** | text/plain; charset=utf-32be | -
**.txt** | text/plain; charset=utf-16le | -
**.txt** | text/plain; charset=utf-16be | -
**.gz** | application/gzip | application/x-gzip, application/x-gunzip, application/gzipped, application/gzip-compressed, application/x-gzip-compressed, gzip/document
**.class** | application/x-java-applet; charset=binary | -
**.swf** | application/x-shockwave-flash | -
**.crx** | application/x-chrome-extension | -
**.woff** | font/woff | -
**.woff2** | font/woff2 | -
**.otf** | font/otf | -
**.eot** | application/vnd.ms-fontobject | -
**.wasm** | application/wasm | -
**.shx** | application/octet-stream | -
**.shp** | application/octet-stream | -
**.dbf** | application/x-dbf | -
**.dcm** | application/dicom | -
**.rar** | application/x-rar-compressed | application/x-rar
**.djvu** | image/vnd.djvu | -
**.mobi** | application/x-mobipocket-ebook | -
**.lit** | application/x-ms-reader | -
**.bpg** | image/bpg | -
**.sqlite** | application/x-sqlite3 | -
**.dwg** | image/vnd.dwg | image/x-dwg, application/acad, application/x-acad, application/autocad_dwg, application/dwg, application/x-dwg, application/x-autocad, drawing/dwg
**.nes** | application/vnd.nintendo.snes.rom | -
**.macho** | application/x-mach-binary | -
**.qcp** | audio/qcelp | -
**.icns** | image/x-icns | -
**.heic** | image/heic | -
**.heic** | image/heic-sequence | -
**.heif** | image/heif | -
**.heif** | image/heif-sequence | -
**.mrc** | application/marc | -
**.mdb** | application/x-msaccess | -
**.accdb** | application/x-msaccess | -
**.zst** | application/zstd | -
**.cab** | application/vnd.ms-cab-compressed | -
**.txt** | text/plain; charset=utf-8 | -
**.html** | text/html; charset=utf-8 | -
**.svg** | image/svg+xml | -
**.xml** | text/xml; charset=utf-8 | -
**.rss** | application/rss+xml | text/rss
**.atom** | application/atom+xml | -
**.x3d** | model/x3d+xml | -
**.kml** | application/vnd.google-earth.kml+xml | -
**.xlf** | application/x-xliff+xml | -
**.dae** | model/vnd.collada+xml | -
**.gml** | application/gml+xml | -
**.gpx** | application/gpx+xml | -
**.tcx** | application/vnd.garmin.tcx+xml | -
**.amf** | application/x-amf | -
**.3mf** | application/vnd.ms-package.3dmanufacturing-3dmodel+xml | -
**.php** | text/x-php; charset=utf-8 | -
**.js** | application/javascript | application/x-javascript, text/javascript
**.lua** | text/x-lua | -
**.pl** | text/x-perl | -
**.py** | application/x-python | -
**.json** | application/json | -
**.geojson** | application/geo+json | -
**.ndjson** | application/x-ndjson | -
**.rtf** | text/rtf | -
**.tcl** | text/x-tcl | application/x-tcl
**.csv** | text/csv | -
**.tsv** | text/tab-separated-values | -
**.vcf** | text/vcard | -
**.ics** | text/calendar | -
**.warc** | application/warc | -

View File

@@ -1,195 +0,0 @@
package mimetype
import "github.com/gabriel-vasile/mimetype/internal/matchers"
// root is a matcher which passes for any slice of bytes.
// When a matcher passes the check, the children matchers
// are tried in order to find a more accurate MIME type.
var root = newMIME("application/octet-stream", "", matchers.True,
sevenZ, zip, pdf, ole, ps, psd, ogg, png, jpg, jp2, jpx, jpm, gif, webp,
exe, elf, ar, tar, xar, bz2, fits, tiff, bmp, ico, mp3, flac, midi, ape,
musePack, amr, wav, aiff, au, mpeg, quickTime, mqv, mp4, webM, threeGP,
threeG2, avi, flv, mkv, asf, aac, voc, aMp4, m4a, utf32le, utf32be, utf16le,
utf16be, gzip, class, swf, crx, woff, woff2, otf, eot, wasm, shx, dbf, dcm,
rar, djvu, mobi, lit, bpg, sqlite3, dwg, nes, macho, qcp, icns, heic,
heicSeq, heif, heifSeq, mrc, mdb, accdb, zstd, cab, utf8,
)
// The list of nodes appended to the root node
var (
gzip = newMIME("application/gzip", ".gz", matchers.Gzip).
alias("application/x-gzip", "application/x-gunzip", "application/gzipped", "application/gzip-compressed", "application/x-gzip-compressed", "gzip/document")
sevenZ = newMIME("application/x-7z-compressed", ".7z", matchers.SevenZ)
zip = newMIME("application/zip", ".zip", matchers.Zip, xlsx, docx, pptx, epub, jar, odt, ods, odp, odg, odf).
alias("application/x-zip", "application/x-zip-compressed")
tar = newMIME("application/x-tar", ".tar", matchers.Tar)
xar = newMIME("application/x-xar", ".xar", matchers.Xar)
bz2 = newMIME("application/x-bzip2", ".bz2", matchers.Bz2)
pdf = newMIME("application/pdf", ".pdf", matchers.Pdf).
alias("application/x-pdf")
xlsx = newMIME("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ".xlsx", matchers.Xlsx)
docx = newMIME("application/vnd.openxmlformats-officedocument.wordprocessingml.document", ".docx", matchers.Docx)
pptx = newMIME("application/vnd.openxmlformats-officedocument.presentationml.presentation", ".pptx", matchers.Pptx)
epub = newMIME("application/epub+zip", ".epub", matchers.Epub)
jar = newMIME("application/jar", ".jar", matchers.Jar)
ole = newMIME("application/x-ole-storage", "", matchers.Ole, xls, pub, ppt, doc)
doc = newMIME("application/msword", ".doc", matchers.Doc).
alias("application/vnd.ms-word")
ppt = newMIME("application/vnd.ms-powerpoint", ".ppt", matchers.Ppt).
alias("application/mspowerpoint")
pub = newMIME("application/vnd.ms-publisher", ".pub", matchers.Pub)
xls = newMIME("application/vnd.ms-excel", ".xls", matchers.Xls).
alias("application/msexcel")
ps = newMIME("application/postscript", ".ps", matchers.Ps)
fits = newMIME("application/fits", ".fits", matchers.Fits)
ogg = newMIME("application/ogg", ".ogg", matchers.Ogg, oggAudio, oggVideo).
alias("application/x-ogg")
oggAudio = newMIME("audio/ogg", ".oga", matchers.OggAudio)
oggVideo = newMIME("video/ogg", ".ogv", matchers.OggVideo)
utf32le = newMIME("text/plain; charset=utf-32le", ".txt", matchers.Utf32le)
utf32be = newMIME("text/plain; charset=utf-32be", ".txt", matchers.Utf32be)
utf16le = newMIME("text/plain; charset=utf-16le", ".txt", matchers.Utf16le)
utf16be = newMIME("text/plain; charset=utf-16be", ".txt", matchers.Utf16be)
utf8 = newMIME("text/plain; charset=utf-8", ".txt", matchers.Utf8, html, svg, xml, php, js, lua, perl, python, json, ndJson, rtf, tcl, csv, tsv, vCard, iCalendar, warc)
xml = newMIME("text/xml; charset=utf-8", ".xml", matchers.Xml, rss, atom, x3d, kml, xliff, collada, gml, gpx, tcx, amf, threemf)
json = newMIME("application/json", ".json", matchers.Json, geoJson)
csv = newMIME("text/csv", ".csv", matchers.Csv)
tsv = newMIME("text/tab-separated-values", ".tsv", matchers.Tsv)
geoJson = newMIME("application/geo+json", ".geojson", matchers.GeoJson)
ndJson = newMIME("application/x-ndjson", ".ndjson", matchers.NdJson)
html = newMIME("text/html; charset=utf-8", ".html", matchers.Html)
php = newMIME("text/x-php; charset=utf-8", ".php", matchers.Php)
rtf = newMIME("text/rtf", ".rtf", matchers.Rtf)
js = newMIME("application/javascript", ".js", matchers.Js).
alias("application/x-javascript", "text/javascript")
lua = newMIME("text/x-lua", ".lua", matchers.Lua)
perl = newMIME("text/x-perl", ".pl", matchers.Perl)
python = newMIME("application/x-python", ".py", matchers.Python)
tcl = newMIME("text/x-tcl", ".tcl", matchers.Tcl).
alias("application/x-tcl")
vCard = newMIME("text/vcard", ".vcf", matchers.VCard)
iCalendar = newMIME("text/calendar", ".ics", matchers.ICalendar)
svg = newMIME("image/svg+xml", ".svg", matchers.Svg)
rss = newMIME("application/rss+xml", ".rss", matchers.Rss).
alias("text/rss")
atom = newMIME("application/atom+xml", ".atom", matchers.Atom)
x3d = newMIME("model/x3d+xml", ".x3d", matchers.X3d)
kml = newMIME("application/vnd.google-earth.kml+xml", ".kml", matchers.Kml)
xliff = newMIME("application/x-xliff+xml", ".xlf", matchers.Xliff)
collada = newMIME("model/vnd.collada+xml", ".dae", matchers.Collada)
gml = newMIME("application/gml+xml", ".gml", matchers.Gml)
gpx = newMIME("application/gpx+xml", ".gpx", matchers.Gpx)
tcx = newMIME("application/vnd.garmin.tcx+xml", ".tcx", matchers.Tcx)
amf = newMIME("application/x-amf", ".amf", matchers.Amf)
threemf = newMIME("application/vnd.ms-package.3dmanufacturing-3dmodel+xml", ".3mf", matchers.Threemf)
png = newMIME("image/png", ".png", matchers.Png)
jpg = newMIME("image/jpeg", ".jpg", matchers.Jpg)
jp2 = newMIME("image/jp2", ".jp2", matchers.Jp2)
jpx = newMIME("image/jpx", ".jpf", matchers.Jpx)
jpm = newMIME("image/jpm", ".jpm", matchers.Jpm).
alias("video/jpm")
bpg = newMIME("image/bpg", ".bpg", matchers.Bpg)
gif = newMIME("image/gif", ".gif", matchers.Gif)
webp = newMIME("image/webp", ".webp", matchers.Webp)
tiff = newMIME("image/tiff", ".tiff", matchers.Tiff)
bmp = newMIME("image/bmp", ".bmp", matchers.Bmp).
alias("image/x-bmp", "image/x-ms-bmp")
ico = newMIME("image/x-icon", ".ico", matchers.Ico)
icns = newMIME("image/x-icns", ".icns", matchers.Icns)
psd = newMIME("image/vnd.adobe.photoshop", ".psd", matchers.Psd).
alias("image/x-psd", "application/photoshop")
heic = newMIME("image/heic", ".heic", matchers.Heic)
heicSeq = newMIME("image/heic-sequence", ".heic", matchers.HeicSequence)
heif = newMIME("image/heif", ".heif", matchers.Heif)
heifSeq = newMIME("image/heif-sequence", ".heif", matchers.HeifSequence)
mp3 = newMIME("audio/mpeg", ".mp3", matchers.Mp3).
alias("audio/x-mpeg", "audio/mp3")
flac = newMIME("audio/flac", ".flac", matchers.Flac)
midi = newMIME("audio/midi", ".midi", matchers.Midi).
alias("audio/mid", "audio/sp-midi", "audio/x-mid", "audio/x-midi")
ape = newMIME("audio/ape", ".ape", matchers.Ape)
musePack = newMIME("audio/musepack", ".mpc", matchers.MusePack)
wav = newMIME("audio/wav", ".wav", matchers.Wav).
alias("audio/x-wav", "audio/vnd.wave", "audio/wave")
aiff = newMIME("audio/aiff", ".aiff", matchers.Aiff)
au = newMIME("audio/basic", ".au", matchers.Au)
amr = newMIME("audio/amr", ".amr", matchers.Amr).
alias("audio/amr-nb")
aac = newMIME("audio/aac", ".aac", matchers.Aac)
voc = newMIME("audio/x-unknown", ".voc", matchers.Voc)
aMp4 = newMIME("audio/mp4", ".mp4", matchers.AMp4).
alias("audio/x-m4a", "audio/x-mp4a")
m4a = newMIME("audio/x-m4a", ".m4a", matchers.M4a)
mp4 = newMIME("video/mp4", ".mp4", matchers.Mp4)
webM = newMIME("video/webm", ".webm", matchers.WebM).
alias("audio/webm")
mpeg = newMIME("video/mpeg", ".mpeg", matchers.Mpeg)
quickTime = newMIME("video/quicktime", ".mov", matchers.QuickTime)
mqv = newMIME("video/quicktime", ".mqv", matchers.Mqv)
threeGP = newMIME("video/3gpp", ".3gp", matchers.ThreeGP).
alias("video/3gp", "audio/3gpp")
threeG2 = newMIME("video/3gpp2", ".3g2", matchers.ThreeG2).
alias("video/3g2", "audio/3gpp2")
avi = newMIME("video/x-msvideo", ".avi", matchers.Avi).
alias("video/avi", "video/msvideo")
flv = newMIME("video/x-flv", ".flv", matchers.Flv)
mkv = newMIME("video/x-matroska", ".mkv", matchers.Mkv)
asf = newMIME("video/x-ms-asf", ".asf", matchers.Asf).
alias("video/asf", "video/x-ms-wmv")
class = newMIME("application/x-java-applet; charset=binary", ".class", matchers.Class)
swf = newMIME("application/x-shockwave-flash", ".swf", matchers.Swf)
crx = newMIME("application/x-chrome-extension", ".crx", matchers.Crx)
woff = newMIME("font/woff", ".woff", matchers.Woff)
woff2 = newMIME("font/woff2", ".woff2", matchers.Woff2)
otf = newMIME("font/otf", ".otf", matchers.Otf)
eot = newMIME("application/vnd.ms-fontobject", ".eot", matchers.Eot)
wasm = newMIME("application/wasm", ".wasm", matchers.Wasm)
shp = newMIME("application/octet-stream", ".shp", matchers.Shp)
shx = newMIME("application/octet-stream", ".shx", matchers.Shx, shp)
dbf = newMIME("application/x-dbf", ".dbf", matchers.Dbf)
exe = newMIME("application/vnd.microsoft.portable-executable", ".exe", matchers.Exe)
elf = newMIME("application/x-elf", "", matchers.Elf, elfObj, elfExe, elfLib, elfDump)
elfObj = newMIME("application/x-object", "", matchers.ElfObj)
elfExe = newMIME("application/x-executable", "", matchers.ElfExe)
elfLib = newMIME("application/x-sharedlib", ".so", matchers.ElfLib)
elfDump = newMIME("application/x-coredump", "", matchers.ElfDump)
ar = newMIME("application/x-archive", ".a", matchers.Ar, deb).
alias("application/x-unix-archive")
deb = newMIME("application/vnd.debian.binary-package", ".deb", matchers.Deb)
dcm = newMIME("application/dicom", ".dcm", matchers.Dcm)
odt = newMIME("application/vnd.oasis.opendocument.text", ".odt", matchers.Odt, ott).
alias("application/x-vnd.oasis.opendocument.text")
ott = newMIME("application/vnd.oasis.opendocument.text-template", ".ott", matchers.Ott).
alias("application/x-vnd.oasis.opendocument.text-template")
ods = newMIME("application/vnd.oasis.opendocument.spreadsheet", ".ods", matchers.Ods, ots).
alias("application/x-vnd.oasis.opendocument.spreadsheet")
ots = newMIME("application/vnd.oasis.opendocument.spreadsheet-template", ".ots", matchers.Ots).
alias("application/x-vnd.oasis.opendocument.spreadsheet-template")
odp = newMIME("application/vnd.oasis.opendocument.presentation", ".odp", matchers.Odp, otp).
alias("application/x-vnd.oasis.opendocument.presentation")
otp = newMIME("application/vnd.oasis.opendocument.presentation-template", ".otp", matchers.Otp).
alias("application/x-vnd.oasis.opendocument.presentation-template")
odg = newMIME("application/vnd.oasis.opendocument.graphics", ".odg", matchers.Odg, otg).
alias("application/x-vnd.oasis.opendocument.graphics")
otg = newMIME("application/vnd.oasis.opendocument.graphics-template", ".otg", matchers.Otg).
alias("application/x-vnd.oasis.opendocument.graphics-template")
odf = newMIME("application/vnd.oasis.opendocument.formula", ".odf", matchers.Odf).
alias("application/x-vnd.oasis.opendocument.formula")
rar = newMIME("application/x-rar-compressed", ".rar", matchers.Rar).
alias("application/x-rar")
djvu = newMIME("image/vnd.djvu", ".djvu", matchers.DjVu)
mobi = newMIME("application/x-mobipocket-ebook", ".mobi", matchers.Mobi)
lit = newMIME("application/x-ms-reader", ".lit", matchers.Lit)
sqlite3 = newMIME("application/x-sqlite3", ".sqlite", matchers.Sqlite)
dwg = newMIME("image/vnd.dwg", ".dwg", matchers.Dwg).
alias("image/x-dwg", "application/acad", "application/x-acad", "application/autocad_dwg", "application/dwg", "application/x-dwg", "application/x-autocad", "drawing/dwg")
warc = newMIME("application/warc", ".warc", matchers.Warc)
nes = newMIME("application/vnd.nintendo.snes.rom", ".nes", matchers.Nes)
macho = newMIME("application/x-mach-binary", ".macho", matchers.MachO)
qcp = newMIME("audio/qcelp", ".qcp", matchers.Qcp)
mrc = newMIME("application/marc", ".mrc", matchers.Marc)
mdb = newMIME("application/x-msaccess", ".mdb", matchers.MsAccessMdb)
accdb = newMIME("application/x-msaccess", ".accdb", matchers.MsAccessAce)
zstd = newMIME("application/zstd", ".zst", matchers.Zstd)
cab = newMIME("application/vnd.ms-cab-compressed", ".cab", matchers.Cab)
)

View File

@@ -1,28 +0,0 @@
Copyright (c) 2012 The Go Authors. All rights reserved.
Copyright (c) 2019 Klaus Post. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -1,817 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Copyright (c) 2015 Klaus Post
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import (
"fmt"
"io"
"math"
)
const (
NoCompression = 0
BestSpeed = 1
BestCompression = 9
DefaultCompression = -1
// HuffmanOnly disables Lempel-Ziv match searching and only performs Huffman
// entropy encoding. This mode is useful in compressing data that has
// already been compressed with an LZ style algorithm (e.g. Snappy or LZ4)
// that lacks an entropy encoder. Compression gains are achieved when
// certain bytes in the input stream occur more frequently than others.
//
// Note that HuffmanOnly produces a compressed output that is
// RFC 1951 compliant. That is, any valid DEFLATE decompressor will
// continue to be able to decompress this output.
HuffmanOnly = -2
ConstantCompression = HuffmanOnly // compatibility alias.
logWindowSize = 15
windowSize = 1 << logWindowSize
windowMask = windowSize - 1
logMaxOffsetSize = 15 // Standard DEFLATE
minMatchLength = 4 // The smallest match that the compressor looks for
maxMatchLength = 258 // The longest match for the compressor
minOffsetSize = 1 // The shortest offset that makes any sense
// The maximum number of tokens we put into a single flat block, just too
// stop things from getting too large.
maxFlateBlockTokens = 1 << 14
maxStoreBlockSize = 65535
hashBits = 17 // After 17 performance degrades
hashSize = 1 << hashBits
hashMask = (1 << hashBits) - 1
hashShift = (hashBits + minMatchLength - 1) / minMatchLength
maxHashOffset = 1 << 24
skipNever = math.MaxInt32
)
type compressionLevel struct {
good, lazy, nice, chain, fastSkipHashing, level int
}
// Compression levels have been rebalanced from zlib deflate defaults
// to give a bigger spread in speed and compression.
// See https://blog.klauspost.com/rebalancing-deflate-compression-levels/
var levels = []compressionLevel{
{}, // 0
// Level 1-6 uses specialized algorithm - values not used
{0, 0, 0, 0, 0, 1},
{0, 0, 0, 0, 0, 2},
{0, 0, 0, 0, 0, 3},
{0, 0, 0, 0, 0, 4},
{0, 0, 0, 0, 0, 5},
{0, 0, 0, 0, 0, 6},
// Levels 7-9 use increasingly more lazy matching
// and increasingly stringent conditions for "good enough".
{8, 8, 24, 16, skipNever, 7},
{10, 16, 24, 64, skipNever, 8},
{32, 258, 258, 4096, skipNever, 9},
}
// advancedState contains state for the advanced levels, with bigger hash tables, etc.
type advancedState struct {
// deflate state
length int
offset int
hash uint32
maxInsertIndex int
ii uint16 // position of last match, intended to overflow to reset.
// Input hash chains
// hashHead[hashValue] contains the largest inputIndex with the specified hash value
// If hashHead[hashValue] is within the current window, then
// hashPrev[hashHead[hashValue] & windowMask] contains the previous index
// with the same hash value.
chainHead int
hashHead [hashSize]uint32
hashPrev [windowSize]uint32
hashOffset int
// input window: unprocessed data is window[index:windowEnd]
index int
hashMatch [maxMatchLength + minMatchLength]uint32
}
type compressor struct {
compressionLevel
w *huffmanBitWriter
// compression algorithm
fill func(*compressor, []byte) int // copy data to window
step func(*compressor) // process window
sync bool // requesting flush
window []byte
windowEnd int
blockStart int // window index where current tokens start
byteAvailable bool // if true, still need to process window[index-1].
err error
// queued output tokens
tokens tokens
fast fastEnc
state *advancedState
}
func (d *compressor) fillDeflate(b []byte) int {
s := d.state
if s.index >= 2*windowSize-(minMatchLength+maxMatchLength) {
// shift the window by windowSize
copy(d.window[:], d.window[windowSize:2*windowSize])
s.index -= windowSize
d.windowEnd -= windowSize
if d.blockStart >= windowSize {
d.blockStart -= windowSize
} else {
d.blockStart = math.MaxInt32
}
s.hashOffset += windowSize
if s.hashOffset > maxHashOffset {
delta := s.hashOffset - 1
s.hashOffset -= delta
s.chainHead -= delta
// Iterate over slices instead of arrays to avoid copying
// the entire table onto the stack (Issue #18625).
for i, v := range s.hashPrev[:] {
if int(v) > delta {
s.hashPrev[i] = uint32(int(v) - delta)
} else {
s.hashPrev[i] = 0
}
}
for i, v := range s.hashHead[:] {
if int(v) > delta {
s.hashHead[i] = uint32(int(v) - delta)
} else {
s.hashHead[i] = 0
}
}
}
}
n := copy(d.window[d.windowEnd:], b)
d.windowEnd += n
return n
}
func (d *compressor) writeBlock(tok *tokens, index int, eof bool) error {
if index > 0 || eof {
var window []byte
if d.blockStart <= index {
window = d.window[d.blockStart:index]
}
d.blockStart = index
d.w.writeBlock(tok, eof, window)
return d.w.err
}
return nil
}
// writeBlockSkip writes the current block and uses the number of tokens
// to determine if the block should be stored on no matches, or
// only huffman encoded.
func (d *compressor) writeBlockSkip(tok *tokens, index int, eof bool) error {
if index > 0 || eof {
if d.blockStart <= index {
window := d.window[d.blockStart:index]
// If we removed less than a 64th of all literals
// we huffman compress the block.
if int(tok.n) > len(window)-int(tok.n>>6) {
d.w.writeBlockHuff(eof, window, d.sync)
} else {
// Write a dynamic huffman block.
d.w.writeBlockDynamic(tok, eof, window, d.sync)
}
} else {
d.w.writeBlock(tok, eof, nil)
}
d.blockStart = index
return d.w.err
}
return nil
}
// fillWindow will fill the current window with the supplied
// dictionary and calculate all hashes.
// This is much faster than doing a full encode.
// Should only be used after a start/reset.
func (d *compressor) fillWindow(b []byte) {
// Do not fill window if we are in store-only or huffman mode.
if d.level <= 0 {
return
}
if d.fast != nil {
// encode the last data, but discard the result
if len(b) > maxMatchOffset {
b = b[len(b)-maxMatchOffset:]
}
d.fast.Encode(&d.tokens, b)
d.tokens.Reset()
return
}
s := d.state
// If we are given too much, cut it.
if len(b) > windowSize {
b = b[len(b)-windowSize:]
}
// Add all to window.
n := copy(d.window[d.windowEnd:], b)
// Calculate 256 hashes at the time (more L1 cache hits)
loops := (n + 256 - minMatchLength) / 256
for j := 0; j < loops; j++ {
startindex := j * 256
end := startindex + 256 + minMatchLength - 1
if end > n {
end = n
}
tocheck := d.window[startindex:end]
dstSize := len(tocheck) - minMatchLength + 1
if dstSize <= 0 {
continue
}
dst := s.hashMatch[:dstSize]
bulkHash4(tocheck, dst)
var newH uint32
for i, val := range dst {
di := i + startindex
newH = val & hashMask
// Get previous value with the same hash.
// Our chain should point to the previous value.
s.hashPrev[di&windowMask] = s.hashHead[newH]
// Set the head of the hash chain to us.
s.hashHead[newH] = uint32(di + s.hashOffset)
}
s.hash = newH
}
// Update window information.
d.windowEnd += n
s.index = n
}
// Try to find a match starting at index whose length is greater than prevSize.
// We only look at chainCount possibilities before giving up.
// pos = s.index, prevHead = s.chainHead-s.hashOffset, prevLength=minMatchLength-1, lookahead
func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) {
minMatchLook := maxMatchLength
if lookahead < minMatchLook {
minMatchLook = lookahead
}
win := d.window[0 : pos+minMatchLook]
// We quit when we get a match that's at least nice long
nice := len(win) - pos
if d.nice < nice {
nice = d.nice
}
// If we've got a match that's good enough, only look in 1/4 the chain.
tries := d.chain
length = prevLength
if length >= d.good {
tries >>= 2
}
wEnd := win[pos+length]
wPos := win[pos:]
minIndex := pos - windowSize
for i := prevHead; tries > 0; tries-- {
if wEnd == win[i+length] {
n := matchLen(win[i:i+minMatchLook], wPos)
if n > length && (n > minMatchLength || pos-i <= 4096) {
length = n
offset = pos - i
ok = true
if n >= nice {
// The match is good enough that we don't try to find a better one.
break
}
wEnd = win[pos+n]
}
}
if i == minIndex {
// hashPrev[i & windowMask] has already been overwritten, so stop now.
break
}
i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset
if i < minIndex || i < 0 {
break
}
}
return
}
func (d *compressor) writeStoredBlock(buf []byte) error {
if d.w.writeStoredHeader(len(buf), false); d.w.err != nil {
return d.w.err
}
d.w.writeBytes(buf)
return d.w.err
}
// hash4 returns a hash representation of the first 4 bytes
// of the supplied slice.
// The caller must ensure that len(b) >= 4.
func hash4(b []byte) uint32 {
b = b[:4]
return hash4u(uint32(b[3])|uint32(b[2])<<8|uint32(b[1])<<16|uint32(b[0])<<24, hashBits)
}
// bulkHash4 will compute hashes using the same
// algorithm as hash4
func bulkHash4(b []byte, dst []uint32) {
if len(b) < 4 {
return
}
hb := uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24
dst[0] = hash4u(hb, hashBits)
end := len(b) - 4 + 1
for i := 1; i < end; i++ {
hb = (hb << 8) | uint32(b[i+3])
dst[i] = hash4u(hb, hashBits)
}
}
func (d *compressor) initDeflate() {
d.window = make([]byte, 2*windowSize)
d.byteAvailable = false
d.err = nil
if d.state == nil {
return
}
s := d.state
s.index = 0
s.hashOffset = 1
s.length = minMatchLength - 1
s.offset = 0
s.hash = 0
s.chainHead = -1
}
// deflateLazy is the same as deflate, but with d.fastSkipHashing == skipNever,
// meaning it always has lazy matching on.
func (d *compressor) deflateLazy() {
s := d.state
// Sanity enables additional runtime tests.
// It's intended to be used during development
// to supplement the currently ad-hoc unit tests.
const sanity = false
if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync {
return
}
s.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
if s.index < s.maxInsertIndex {
s.hash = hash4(d.window[s.index : s.index+minMatchLength])
}
for {
if sanity && s.index > d.windowEnd {
panic("index > windowEnd")
}
lookahead := d.windowEnd - s.index
if lookahead < minMatchLength+maxMatchLength {
if !d.sync {
return
}
if sanity && s.index > d.windowEnd {
panic("index > windowEnd")
}
if lookahead == 0 {
// Flush current output block if any.
if d.byteAvailable {
// There is still one pending token that needs to be flushed
d.tokens.AddLiteral(d.window[s.index-1])
d.byteAvailable = false
}
if d.tokens.n > 0 {
if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
return
}
d.tokens.Reset()
}
return
}
}
if s.index < s.maxInsertIndex {
// Update the hash
s.hash = hash4(d.window[s.index : s.index+minMatchLength])
ch := s.hashHead[s.hash&hashMask]
s.chainHead = int(ch)
s.hashPrev[s.index&windowMask] = ch
s.hashHead[s.hash&hashMask] = uint32(s.index + s.hashOffset)
}
prevLength := s.length
prevOffset := s.offset
s.length = minMatchLength - 1
s.offset = 0
minIndex := s.index - windowSize
if minIndex < 0 {
minIndex = 0
}
if s.chainHead-s.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy {
if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, minMatchLength-1, lookahead); ok {
s.length = newLength
s.offset = newOffset
}
}
if prevLength >= minMatchLength && s.length <= prevLength {
// There was a match at the previous step, and the current match is
// not better. Output the previous match.
d.tokens.AddMatch(uint32(prevLength-3), uint32(prevOffset-minOffsetSize))
// Insert in the hash table all strings up to the end of the match.
// index and index-1 are already inserted. If there is not enough
// lookahead, the last two strings are not inserted into the hash
// table.
var newIndex int
newIndex = s.index + prevLength - 1
// Calculate missing hashes
end := newIndex
if end > s.maxInsertIndex {
end = s.maxInsertIndex
}
end += minMatchLength - 1
startindex := s.index + 1
if startindex > s.maxInsertIndex {
startindex = s.maxInsertIndex
}
tocheck := d.window[startindex:end]
dstSize := len(tocheck) - minMatchLength + 1
if dstSize > 0 {
dst := s.hashMatch[:dstSize]
bulkHash4(tocheck, dst)
var newH uint32
for i, val := range dst {
di := i + startindex
newH = val & hashMask
// Get previous value with the same hash.
// Our chain should point to the previous value.
s.hashPrev[di&windowMask] = s.hashHead[newH]
// Set the head of the hash chain to us.
s.hashHead[newH] = uint32(di + s.hashOffset)
}
s.hash = newH
}
s.index = newIndex
d.byteAvailable = false
s.length = minMatchLength - 1
if d.tokens.n == maxFlateBlockTokens {
// The block includes the current character
if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
return
}
d.tokens.Reset()
}
} else {
// Reset, if we got a match this run.
if s.length >= minMatchLength {
s.ii = 0
}
// We have a byte waiting. Emit it.
if d.byteAvailable {
s.ii++
d.tokens.AddLiteral(d.window[s.index-1])
if d.tokens.n == maxFlateBlockTokens {
if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
return
}
d.tokens.Reset()
}
s.index++
// If we have a long run of no matches, skip additional bytes
// Resets when s.ii overflows after 64KB.
if s.ii > 31 {
n := int(s.ii >> 5)
for j := 0; j < n; j++ {
if s.index >= d.windowEnd-1 {
break
}
d.tokens.AddLiteral(d.window[s.index-1])
if d.tokens.n == maxFlateBlockTokens {
if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
return
}
d.tokens.Reset()
}
s.index++
}
// Flush last byte
d.tokens.AddLiteral(d.window[s.index-1])
d.byteAvailable = false
// s.length = minMatchLength - 1 // not needed, since s.ii is reset above, so it should never be > minMatchLength
if d.tokens.n == maxFlateBlockTokens {
if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
return
}
d.tokens.Reset()
}
}
} else {
s.index++
d.byteAvailable = true
}
}
}
}
func (d *compressor) store() {
if d.windowEnd > 0 && (d.windowEnd == maxStoreBlockSize || d.sync) {
d.err = d.writeStoredBlock(d.window[:d.windowEnd])
d.windowEnd = 0
}
}
// fillWindow will fill the buffer with data for huffman-only compression.
// The number of bytes copied is returned.
func (d *compressor) fillBlock(b []byte) int {
n := copy(d.window[d.windowEnd:], b)
d.windowEnd += n
return n
}
// storeHuff will compress and store the currently added data,
// if enough has been accumulated or we at the end of the stream.
// Any error that occurred will be in d.err
func (d *compressor) storeHuff() {
if d.windowEnd < len(d.window) && !d.sync || d.windowEnd == 0 {
return
}
d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync)
d.err = d.w.err
d.windowEnd = 0
}
// storeFast will compress and store the currently added data,
// if enough has been accumulated or we at the end of the stream.
// Any error that occurred will be in d.err
func (d *compressor) storeFast() {
// We only compress if we have maxStoreBlockSize.
if d.windowEnd < len(d.window) {
if !d.sync {
return
}
// Handle extremely small sizes.
if d.windowEnd < 128 {
if d.windowEnd == 0 {
return
}
if d.windowEnd <= 32 {
d.err = d.writeStoredBlock(d.window[:d.windowEnd])
} else {
d.w.writeBlockHuff(false, d.window[:d.windowEnd], true)
d.err = d.w.err
}
d.tokens.Reset()
d.windowEnd = 0
d.fast.Reset()
return
}
}
d.fast.Encode(&d.tokens, d.window[:d.windowEnd])
// If we made zero matches, store the block as is.
if d.tokens.n == 0 {
d.err = d.writeStoredBlock(d.window[:d.windowEnd])
// If we removed less than 1/16th, huffman compress the block.
} else if int(d.tokens.n) > d.windowEnd-(d.windowEnd>>4) {
d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync)
d.err = d.w.err
} else {
d.w.writeBlockDynamic(&d.tokens, false, d.window[:d.windowEnd], d.sync)
d.err = d.w.err
}
d.tokens.Reset()
d.windowEnd = 0
}
// write will add input byte to the stream.
// Unless an error occurs all bytes will be consumed.
func (d *compressor) write(b []byte) (n int, err error) {
if d.err != nil {
return 0, d.err
}
n = len(b)
for len(b) > 0 {
d.step(d)
b = b[d.fill(d, b):]
if d.err != nil {
return 0, d.err
}
}
return n, d.err
}
func (d *compressor) syncFlush() error {
d.sync = true
if d.err != nil {
return d.err
}
d.step(d)
if d.err == nil {
d.w.writeStoredHeader(0, false)
d.w.flush()
d.err = d.w.err
}
d.sync = false
return d.err
}
func (d *compressor) init(w io.Writer, level int) (err error) {
d.w = newHuffmanBitWriter(w)
switch {
case level == NoCompression:
d.window = make([]byte, maxStoreBlockSize)
d.fill = (*compressor).fillBlock
d.step = (*compressor).store
case level == ConstantCompression:
d.w.logNewTablePenalty = 4
d.window = make([]byte, maxStoreBlockSize)
d.fill = (*compressor).fillBlock
d.step = (*compressor).storeHuff
case level == DefaultCompression:
level = 5
fallthrough
case level >= 1 && level <= 6:
d.w.logNewTablePenalty = 6
d.fast = newFastEnc(level)
d.window = make([]byte, maxStoreBlockSize)
d.fill = (*compressor).fillBlock
d.step = (*compressor).storeFast
case 7 <= level && level <= 9:
d.w.logNewTablePenalty = 10
d.state = &advancedState{}
d.compressionLevel = levels[level]
d.initDeflate()
d.fill = (*compressor).fillDeflate
d.step = (*compressor).deflateLazy
default:
return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level)
}
d.level = level
return nil
}
// reset the state of the compressor.
func (d *compressor) reset(w io.Writer) {
d.w.reset(w)
d.sync = false
d.err = nil
// We only need to reset a few things for Snappy.
if d.fast != nil {
d.fast.Reset()
d.windowEnd = 0
d.tokens.Reset()
return
}
switch d.compressionLevel.chain {
case 0:
// level was NoCompression or ConstantCompresssion.
d.windowEnd = 0
default:
s := d.state
s.chainHead = -1
for i := range s.hashHead {
s.hashHead[i] = 0
}
for i := range s.hashPrev {
s.hashPrev[i] = 0
}
s.hashOffset = 1
s.index, d.windowEnd = 0, 0
d.blockStart, d.byteAvailable = 0, false
d.tokens.Reset()
s.length = minMatchLength - 1
s.offset = 0
s.hash = 0
s.ii = 0
s.maxInsertIndex = 0
}
}
func (d *compressor) close() error {
if d.err != nil {
return d.err
}
d.sync = true
d.step(d)
if d.err != nil {
return d.err
}
if d.w.writeStoredHeader(0, true); d.w.err != nil {
return d.w.err
}
d.w.flush()
d.w.reset(nil)
return d.w.err
}
// NewWriter returns a new Writer compressing data at the given level.
// Following zlib, levels range from 1 (BestSpeed) to 9 (BestCompression);
// higher levels typically run slower but compress more.
// Level 0 (NoCompression) does not attempt any compression; it only adds the
// necessary DEFLATE framing.
// Level -1 (DefaultCompression) uses the default compression level.
// Level -2 (ConstantCompression) will use Huffman compression only, giving
// a very fast compression for all types of input, but sacrificing considerable
// compression efficiency.
//
// If level is in the range [-2, 9] then the error returned will be nil.
// Otherwise the error returned will be non-nil.
func NewWriter(w io.Writer, level int) (*Writer, error) {
var dw Writer
if err := dw.d.init(w, level); err != nil {
return nil, err
}
return &dw, nil
}
// NewWriterDict is like NewWriter but initializes the new
// Writer with a preset dictionary. The returned Writer behaves
// as if the dictionary had been written to it without producing
// any compressed output. The compressed data written to w
// can only be decompressed by a Reader initialized with the
// same dictionary.
func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) {
zw, err := NewWriter(w, level)
if err != nil {
return nil, err
}
zw.d.fillWindow(dict)
zw.dict = append(zw.dict, dict...) // duplicate dictionary for Reset method.
return zw, err
}
// A Writer takes data written to it and writes the compressed
// form of that data to an underlying writer (see NewWriter).
type Writer struct {
d compressor
dict []byte
}
// Write writes data to w, which will eventually write the
// compressed form of data to its underlying writer.
func (w *Writer) Write(data []byte) (n int, err error) {
return w.d.write(data)
}
// Flush flushes any pending data to the underlying writer.
// It is useful mainly in compressed network protocols, to ensure that
// a remote reader has enough data to reconstruct a packet.
// Flush does not return until the data has been written.
// Calling Flush when there is no pending data still causes the Writer
// to emit a sync marker of at least 4 bytes.
// If the underlying writer returns an error, Flush returns that error.
//
// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
func (w *Writer) Flush() error {
// For more about flushing:
// http://www.bolet.org/~pornin/deflate-flush.html
return w.d.syncFlush()
}
// Close flushes and closes the writer.
func (w *Writer) Close() error {
return w.d.close()
}
// Reset discards the writer's state and makes it equivalent to
// the result of NewWriter or NewWriterDict called with dst
// and w's level and dictionary.
func (w *Writer) Reset(dst io.Writer) {
if len(w.dict) > 0 {
// w was created with NewWriterDict
w.d.reset(dst)
if dst != nil {
w.d.fillWindow(w.dict)
}
} else {
// w was created with NewWriter
w.d.reset(dst)
}
}
// ResetDict discards the writer's state and makes it equivalent to
// the result of NewWriter or NewWriterDict called with dst
// and w's level, but sets a specific dictionary.
func (w *Writer) ResetDict(dst io.Writer, dict []byte) {
w.dict = dict
w.d.reset(dst)
w.d.fillWindow(w.dict)
}

View File

@@ -1,184 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
// dictDecoder implements the LZ77 sliding dictionary as used in decompression.
// LZ77 decompresses data through sequences of two forms of commands:
//
// * Literal insertions: Runs of one or more symbols are inserted into the data
// stream as is. This is accomplished through the writeByte method for a
// single symbol, or combinations of writeSlice/writeMark for multiple symbols.
// Any valid stream must start with a literal insertion if no preset dictionary
// is used.
//
// * Backward copies: Runs of one or more symbols are copied from previously
// emitted data. Backward copies come as the tuple (dist, length) where dist
// determines how far back in the stream to copy from and length determines how
// many bytes to copy. Note that it is valid for the length to be greater than
// the distance. Since LZ77 uses forward copies, that situation is used to
// perform a form of run-length encoding on repeated runs of symbols.
// The writeCopy and tryWriteCopy are used to implement this command.
//
// For performance reasons, this implementation performs little to no sanity
// checks about the arguments. As such, the invariants documented for each
// method call must be respected.
type dictDecoder struct {
hist []byte // Sliding window history
// Invariant: 0 <= rdPos <= wrPos <= len(hist)
wrPos int // Current output position in buffer
rdPos int // Have emitted hist[:rdPos] already
full bool // Has a full window length been written yet?
}
// init initializes dictDecoder to have a sliding window dictionary of the given
// size. If a preset dict is provided, it will initialize the dictionary with
// the contents of dict.
func (dd *dictDecoder) init(size int, dict []byte) {
*dd = dictDecoder{hist: dd.hist}
if cap(dd.hist) < size {
dd.hist = make([]byte, size)
}
dd.hist = dd.hist[:size]
if len(dict) > len(dd.hist) {
dict = dict[len(dict)-len(dd.hist):]
}
dd.wrPos = copy(dd.hist, dict)
if dd.wrPos == len(dd.hist) {
dd.wrPos = 0
dd.full = true
}
dd.rdPos = dd.wrPos
}
// histSize reports the total amount of historical data in the dictionary.
func (dd *dictDecoder) histSize() int {
if dd.full {
return len(dd.hist)
}
return dd.wrPos
}
// availRead reports the number of bytes that can be flushed by readFlush.
func (dd *dictDecoder) availRead() int {
return dd.wrPos - dd.rdPos
}
// availWrite reports the available amount of output buffer space.
func (dd *dictDecoder) availWrite() int {
return len(dd.hist) - dd.wrPos
}
// writeSlice returns a slice of the available buffer to write data to.
//
// This invariant will be kept: len(s) <= availWrite()
func (dd *dictDecoder) writeSlice() []byte {
return dd.hist[dd.wrPos:]
}
// writeMark advances the writer pointer by cnt.
//
// This invariant must be kept: 0 <= cnt <= availWrite()
func (dd *dictDecoder) writeMark(cnt int) {
dd.wrPos += cnt
}
// writeByte writes a single byte to the dictionary.
//
// This invariant must be kept: 0 < availWrite()
func (dd *dictDecoder) writeByte(c byte) {
dd.hist[dd.wrPos] = c
dd.wrPos++
}
// writeCopy copies a string at a given (dist, length) to the output.
// This returns the number of bytes copied and may be less than the requested
// length if the available space in the output buffer is too small.
//
// This invariant must be kept: 0 < dist <= histSize()
func (dd *dictDecoder) writeCopy(dist, length int) int {
dstBase := dd.wrPos
dstPos := dstBase
srcPos := dstPos - dist
endPos := dstPos + length
if endPos > len(dd.hist) {
endPos = len(dd.hist)
}
// Copy non-overlapping section after destination position.
//
// This section is non-overlapping in that the copy length for this section
// is always less than or equal to the backwards distance. This can occur
// if a distance refers to data that wraps-around in the buffer.
// Thus, a backwards copy is performed here; that is, the exact bytes in
// the source prior to the copy is placed in the destination.
if srcPos < 0 {
srcPos += len(dd.hist)
dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:])
srcPos = 0
}
// Copy possibly overlapping section before destination position.
//
// This section can overlap if the copy length for this section is larger
// than the backwards distance. This is allowed by LZ77 so that repeated
// strings can be succinctly represented using (dist, length) pairs.
// Thus, a forwards copy is performed here; that is, the bytes copied is
// possibly dependent on the resulting bytes in the destination as the copy
// progresses along. This is functionally equivalent to the following:
//
// for i := 0; i < endPos-dstPos; i++ {
// dd.hist[dstPos+i] = dd.hist[srcPos+i]
// }
// dstPos = endPos
//
for dstPos < endPos {
dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
}
dd.wrPos = dstPos
return dstPos - dstBase
}
// tryWriteCopy tries to copy a string at a given (distance, length) to the
// output. This specialized version is optimized for short distances.
//
// This method is designed to be inlined for performance reasons.
//
// This invariant must be kept: 0 < dist <= histSize()
func (dd *dictDecoder) tryWriteCopy(dist, length int) int {
dstPos := dd.wrPos
endPos := dstPos + length
if dstPos < dist || endPos > len(dd.hist) {
return 0
}
dstBase := dstPos
srcPos := dstPos - dist
// Copy possibly overlapping section before destination position.
loop:
dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
if dstPos < endPos {
goto loop // Avoid for-loop so that this function can be inlined
}
dd.wrPos = dstPos
return dstPos - dstBase
}
// readFlush returns a slice of the historical buffer that is ready to be
// emitted to the user. The data returned by readFlush must be fully consumed
// before calling any other dictDecoder methods.
func (dd *dictDecoder) readFlush() []byte {
toRead := dd.hist[dd.rdPos:dd.wrPos]
dd.rdPos = dd.wrPos
if dd.wrPos == len(dd.hist) {
dd.wrPos, dd.rdPos = 0, 0
dd.full = true
}
return toRead
}

View File

@@ -1,255 +0,0 @@
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
// Modified for deflate by Klaus Post (c) 2015.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import (
"fmt"
"math/bits"
)
type fastEnc interface {
Encode(dst *tokens, src []byte)
Reset()
}
func newFastEnc(level int) fastEnc {
switch level {
case 1:
return &fastEncL1{fastGen: fastGen{cur: maxStoreBlockSize}}
case 2:
return &fastEncL2{fastGen: fastGen{cur: maxStoreBlockSize}}
case 3:
return &fastEncL3{fastGen: fastGen{cur: maxStoreBlockSize}}
case 4:
return &fastEncL4{fastGen: fastGen{cur: maxStoreBlockSize}}
case 5:
return &fastEncL5{fastGen: fastGen{cur: maxStoreBlockSize}}
case 6:
return &fastEncL6{fastGen: fastGen{cur: maxStoreBlockSize}}
default:
panic("invalid level specified")
}
}
const (
tableBits = 16 // Bits used in the table
tableSize = 1 << tableBits // Size of the table
tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32.
baseMatchOffset = 1 // The smallest match offset
baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5
maxMatchOffset = 1 << 15 // The largest match offset
bTableBits = 18 // Bits used in the big tables
bTableSize = 1 << bTableBits // Size of the table
allocHistory = maxStoreBlockSize * 20 // Size to preallocate for history.
bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize - 1 // Reset the buffer offset when reaching this.
)
const (
prime3bytes = 506832829
prime4bytes = 2654435761
prime5bytes = 889523592379
prime6bytes = 227718039650203
prime7bytes = 58295818150454627
prime8bytes = 0xcf1bbcdcb7a56463
)
func load32(b []byte, i int) uint32 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:]
b = b[:4]
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
}
func load64(b []byte, i int) uint64 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:]
b = b[:8]
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
}
func load3232(b []byte, i int32) uint32 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:]
b = b[:4]
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
}
func load6432(b []byte, i int32) uint64 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:]
b = b[:8]
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
}
func hash(u uint32) uint32 {
return (u * 0x1e35a7bd) >> tableShift
}
type tableEntry struct {
val uint32
offset int32
}
// fastGen maintains the table for matches,
// and the previous byte block for level 2.
// This is the generic implementation.
type fastGen struct {
hist []byte
cur int32
}
func (e *fastGen) addBlock(src []byte) int32 {
// check if we have space already
if len(e.hist)+len(src) > cap(e.hist) {
if cap(e.hist) == 0 {
e.hist = make([]byte, 0, allocHistory)
} else {
if cap(e.hist) < maxMatchOffset*2 {
panic("unexpected buffer size")
}
// Move down
offset := int32(len(e.hist)) - maxMatchOffset
copy(e.hist[0:maxMatchOffset], e.hist[offset:])
e.cur += offset
e.hist = e.hist[:maxMatchOffset]
}
}
s := int32(len(e.hist))
e.hist = append(e.hist, src...)
return s
}
// hash4 returns the hash of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <32.
func hash4u(u uint32, h uint8) uint32 {
return (u * prime4bytes) >> ((32 - h) & 31)
}
type tableEntryPrev struct {
Cur tableEntry
Prev tableEntry
}
// hash4x64 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <32.
func hash4x64(u uint64, h uint8) uint32 {
return (uint32(u) * prime4bytes) >> ((32 - h) & 31)
}
// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash7(u uint64, h uint8) uint32 {
return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & 63))
}
// hash8 returns the hash of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash8(u uint64, h uint8) uint32 {
return uint32((u * prime8bytes) >> ((64 - h) & 63))
}
// hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash6(u uint64, h uint8) uint32 {
return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63))
}
// matchlen will return the match length between offsets and t in src.
// The maximum length returned is maxMatchLength - 4.
// It is assumed that s > t, that t >=0 and s < len(src).
func (e *fastGen) matchlen(s, t int32, src []byte) int32 {
if debugDecode {
if t >= s {
panic(fmt.Sprint("t >=s:", t, s))
}
if int(s) >= len(src) {
panic(fmt.Sprint("s >= len(src):", s, len(src)))
}
if t < 0 {
panic(fmt.Sprint("t < 0:", t))
}
if s-t > maxMatchOffset {
panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
}
}
s1 := int(s) + maxMatchLength - 4
if s1 > len(src) {
s1 = len(src)
}
// Extend the match to be as long as possible.
return int32(matchLen(src[s:s1], src[t:]))
}
// matchlenLong will return the match length between offsets and t in src.
// It is assumed that s > t, that t >=0 and s < len(src).
func (e *fastGen) matchlenLong(s, t int32, src []byte) int32 {
if debugDecode {
if t >= s {
panic(fmt.Sprint("t >=s:", t, s))
}
if int(s) >= len(src) {
panic(fmt.Sprint("s >= len(src):", s, len(src)))
}
if t < 0 {
panic(fmt.Sprint("t < 0:", t))
}
if s-t > maxMatchOffset {
panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
}
}
// Extend the match to be as long as possible.
return int32(matchLen(src[s:], src[t:]))
}
// Reset the encoding table.
func (e *fastGen) Reset() {
if cap(e.hist) < allocHistory {
e.hist = make([]byte, 0, allocHistory)
}
// We offset current position so everything will be out of reach.
// If we are above the buffer reset it will be cleared anyway since len(hist) == 0.
if e.cur <= bufferReset {
e.cur += maxMatchOffset + int32(len(e.hist))
}
e.hist = e.hist[:0]
}
// matchLen returns the maximum length.
// 'a' must be the shortest of the two.
func matchLen(a, b []byte) int {
b = b[:len(a)]
var checked int
if len(a) > 4 {
// Try 4 bytes first
if diff := load32(a, 0) ^ load32(b, 0); diff != 0 {
return bits.TrailingZeros32(diff) >> 3
}
// Switch to 8 byte matching.
checked = 4
a = a[4:]
b = b[4:]
for len(a) >= 8 {
b = b[:len(a)]
if diff := load64(a, 0) ^ load64(b, 0); diff != 0 {
return checked + (bits.TrailingZeros64(diff) >> 3)
}
checked += 8
a = a[8:]
b = b[8:]
}
}
b = b[:len(a)]
for i := range a {
if a[i] != b[i] {
return int(i) + checked
}
}
return len(a) + checked
}

View File

@@ -1,898 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import (
"io"
)
const (
// The largest offset code.
offsetCodeCount = 30
// The special code used to mark the end of a block.
endBlockMarker = 256
// The first length code.
lengthCodesStart = 257
// The number of codegen codes.
codegenCodeCount = 19
badCode = 255
// bufferFlushSize indicates the buffer size
// after which bytes are flushed to the writer.
// Should preferably be a multiple of 6, since
// we accumulate 6 bytes between writes to the buffer.
bufferFlushSize = 240
// bufferSize is the actual output byte buffer size.
// It must have additional headroom for a flush
// which can contain up to 8 bytes.
bufferSize = bufferFlushSize + 8
)
// The number of extra bits needed by length code X - LENGTH_CODES_START.
var lengthExtraBits = [32]int8{
/* 257 */ 0, 0, 0,
/* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
/* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
/* 280 */ 4, 5, 5, 5, 5, 0,
}
// The length indicated by length code X - LENGTH_CODES_START.
var lengthBase = [32]uint8{
0, 1, 2, 3, 4, 5, 6, 7, 8, 10,
12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
64, 80, 96, 112, 128, 160, 192, 224, 255,
}
// offset code word extra bits.
var offsetExtraBits = [64]int8{
0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
/* extended window */
14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20,
}
var offsetBase = [64]uint32{
/* normal deflate */
0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
0x000020, 0x000030, 0x000040, 0x000060, 0x000080,
0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300,
0x000400, 0x000600, 0x000800, 0x000c00, 0x001000,
0x001800, 0x002000, 0x003000, 0x004000, 0x006000,
/* extended window */
0x008000, 0x00c000, 0x010000, 0x018000, 0x020000,
0x030000, 0x040000, 0x060000, 0x080000, 0x0c0000,
0x100000, 0x180000, 0x200000, 0x300000,
}
// The odd order in which the codegen code sizes are written.
var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
type huffmanBitWriter struct {
// writer is the underlying writer.
// Do not use it directly; use the write method, which ensures
// that Write errors are sticky.
writer io.Writer
// Data waiting to be written is bytes[0:nbytes]
// and then the low nbits of bits.
bits uint64
nbits uint16
nbytes uint8
literalEncoding *huffmanEncoder
offsetEncoding *huffmanEncoder
codegenEncoding *huffmanEncoder
err error
lastHeader int
// Set between 0 (reused block can be up to 2x the size)
logNewTablePenalty uint
lastHuffMan bool
bytes [256]byte
literalFreq [lengthCodesStart + 32]uint16
offsetFreq [32]uint16
codegenFreq [codegenCodeCount]uint16
// codegen must have an extra space for the final symbol.
codegen [literalCount + offsetCodeCount + 1]uint8
}
// Huffman reuse.
//
// The huffmanBitWriter supports reusing huffman tables and thereby combining block sections.
//
// This is controlled by several variables:
//
// If lastHeader is non-zero the Huffman table can be reused.
// This also indicates that a Huffman table has been generated that can output all
// possible symbols.
// It also indicates that an EOB has not yet been emitted, so if a new tabel is generated
// an EOB with the previous table must be written.
//
// If lastHuffMan is set, a table for outputting literals has been generated and offsets are invalid.
//
// An incoming block estimates the output size of a new table using a 'fresh' by calculating the
// optimal size and adding a penalty in 'logNewTablePenalty'.
// A Huffman table is not optimal, which is why we add a penalty, and generating a new table
// is slower both for compression and decompression.
func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
return &huffmanBitWriter{
writer: w,
literalEncoding: newHuffmanEncoder(literalCount),
codegenEncoding: newHuffmanEncoder(codegenCodeCount),
offsetEncoding: newHuffmanEncoder(offsetCodeCount),
}
}
func (w *huffmanBitWriter) reset(writer io.Writer) {
w.writer = writer
w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil
w.lastHeader = 0
w.lastHuffMan = false
}
func (w *huffmanBitWriter) canReuse(t *tokens) (offsets, lits bool) {
offsets, lits = true, true
a := t.offHist[:offsetCodeCount]
b := w.offsetFreq[:len(a)]
for i := range a {
if b[i] == 0 && a[i] != 0 {
offsets = false
break
}
}
a = t.extraHist[:literalCount-256]
b = w.literalFreq[256:literalCount]
b = b[:len(a)]
for i := range a {
if b[i] == 0 && a[i] != 0 {
lits = false
break
}
}
if lits {
a = t.litHist[:]
b = w.literalFreq[:len(a)]
for i := range a {
if b[i] == 0 && a[i] != 0 {
lits = false
break
}
}
}
return
}
func (w *huffmanBitWriter) flush() {
if w.err != nil {
w.nbits = 0
return
}
if w.lastHeader > 0 {
// We owe an EOB
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.lastHeader = 0
}
n := w.nbytes
for w.nbits != 0 {
w.bytes[n] = byte(w.bits)
w.bits >>= 8
if w.nbits > 8 { // Avoid underflow
w.nbits -= 8
} else {
w.nbits = 0
}
n++
}
w.bits = 0
w.write(w.bytes[:n])
w.nbytes = 0
}
func (w *huffmanBitWriter) write(b []byte) {
if w.err != nil {
return
}
_, w.err = w.writer.Write(b)
}
func (w *huffmanBitWriter) writeBits(b int32, nb uint16) {
w.bits |= uint64(b) << (w.nbits & 63)
w.nbits += nb
if w.nbits >= 48 {
w.writeOutBits()
}
}
func (w *huffmanBitWriter) writeBytes(bytes []byte) {
if w.err != nil {
return
}
n := w.nbytes
if w.nbits&7 != 0 {
w.err = InternalError("writeBytes with unfinished bits")
return
}
for w.nbits != 0 {
w.bytes[n] = byte(w.bits)
w.bits >>= 8
w.nbits -= 8
n++
}
if n != 0 {
w.write(w.bytes[:n])
}
w.nbytes = 0
w.write(bytes)
}
// RFC 1951 3.2.7 specifies a special run-length encoding for specifying
// the literal and offset lengths arrays (which are concatenated into a single
// array). This method generates that run-length encoding.
//
// The result is written into the codegen array, and the frequencies
// of each code is written into the codegenFreq array.
// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
// information. Code badCode is an end marker
//
// numLiterals The number of literals in literalEncoding
// numOffsets The number of offsets in offsetEncoding
// litenc, offenc The literal and offset encoder to use
func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litEnc, offEnc *huffmanEncoder) {
for i := range w.codegenFreq {
w.codegenFreq[i] = 0
}
// Note that we are using codegen both as a temporary variable for holding
// a copy of the frequencies, and as the place where we put the result.
// This is fine because the output is always shorter than the input used
// so far.
codegen := w.codegen[:] // cache
// Copy the concatenated code sizes to codegen. Put a marker at the end.
cgnl := codegen[:numLiterals]
for i := range cgnl {
cgnl[i] = uint8(litEnc.codes[i].len)
}
cgnl = codegen[numLiterals : numLiterals+numOffsets]
for i := range cgnl {
cgnl[i] = uint8(offEnc.codes[i].len)
}
codegen[numLiterals+numOffsets] = badCode
size := codegen[0]
count := 1
outIndex := 0
for inIndex := 1; size != badCode; inIndex++ {
// INVARIANT: We have seen "count" copies of size that have not yet
// had output generated for them.
nextSize := codegen[inIndex]
if nextSize == size {
count++
continue
}
// We need to generate codegen indicating "count" of size.
if size != 0 {
codegen[outIndex] = size
outIndex++
w.codegenFreq[size]++
count--
for count >= 3 {
n := 6
if n > count {
n = count
}
codegen[outIndex] = 16
outIndex++
codegen[outIndex] = uint8(n - 3)
outIndex++
w.codegenFreq[16]++
count -= n
}
} else {
for count >= 11 {
n := 138
if n > count {
n = count
}
codegen[outIndex] = 18
outIndex++
codegen[outIndex] = uint8(n - 11)
outIndex++
w.codegenFreq[18]++
count -= n
}
if count >= 3 {
// count >= 3 && count <= 10
codegen[outIndex] = 17
outIndex++
codegen[outIndex] = uint8(count - 3)
outIndex++
w.codegenFreq[17]++
count = 0
}
}
count--
for ; count >= 0; count-- {
codegen[outIndex] = size
outIndex++
w.codegenFreq[size]++
}
// Set up invariant for next time through the loop.
size = nextSize
count = 1
}
// Marker indicating the end of the codegen.
codegen[outIndex] = badCode
}
func (w *huffmanBitWriter) codegens() int {
numCodegens := len(w.codegenFreq)
for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
numCodegens--
}
return numCodegens
}
func (w *huffmanBitWriter) headerSize() (size, numCodegens int) {
numCodegens = len(w.codegenFreq)
for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
numCodegens--
}
return 3 + 5 + 5 + 4 + (3 * numCodegens) +
w.codegenEncoding.bitLength(w.codegenFreq[:]) +
int(w.codegenFreq[16])*2 +
int(w.codegenFreq[17])*3 +
int(w.codegenFreq[18])*7, numCodegens
}
// dynamicSize returns the size of dynamically encoded data in bits.
func (w *huffmanBitWriter) dynamicReuseSize(litEnc, offEnc *huffmanEncoder) (size int) {
size = litEnc.bitLength(w.literalFreq[:]) +
offEnc.bitLength(w.offsetFreq[:])
return size
}
// dynamicSize returns the size of dynamically encoded data in bits.
func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) {
header, numCodegens := w.headerSize()
size = header +
litEnc.bitLength(w.literalFreq[:]) +
offEnc.bitLength(w.offsetFreq[:]) +
extraBits
return size, numCodegens
}
// extraBitSize will return the number of bits that will be written
// as "extra" bits on matches.
func (w *huffmanBitWriter) extraBitSize() int {
total := 0
for i, n := range w.literalFreq[257:literalCount] {
total += int(n) * int(lengthExtraBits[i&31])
}
for i, n := range w.offsetFreq[:offsetCodeCount] {
total += int(n) * int(offsetExtraBits[i&31])
}
return total
}
// fixedSize returns the size of dynamically encoded data in bits.
func (w *huffmanBitWriter) fixedSize(extraBits int) int {
return 3 +
fixedLiteralEncoding.bitLength(w.literalFreq[:]) +
fixedOffsetEncoding.bitLength(w.offsetFreq[:]) +
extraBits
}
// storedSize calculates the stored size, including header.
// The function returns the size in bits and whether the block
// fits inside a single block.
func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) {
if in == nil {
return 0, false
}
if len(in) <= maxStoreBlockSize {
return (len(in) + 5) * 8, true
}
return 0, false
}
func (w *huffmanBitWriter) writeCode(c hcode) {
// The function does not get inlined if we "& 63" the shift.
w.bits |= uint64(c.code) << w.nbits
w.nbits += c.len
if w.nbits >= 48 {
w.writeOutBits()
}
}
// writeOutBits will write bits to the buffer.
func (w *huffmanBitWriter) writeOutBits() {
bits := w.bits
w.bits >>= 48
w.nbits -= 48
n := w.nbytes
w.bytes[n] = byte(bits)
w.bytes[n+1] = byte(bits >> 8)
w.bytes[n+2] = byte(bits >> 16)
w.bytes[n+3] = byte(bits >> 24)
w.bytes[n+4] = byte(bits >> 32)
w.bytes[n+5] = byte(bits >> 40)
n += 6
if n >= bufferFlushSize {
if w.err != nil {
n = 0
return
}
w.write(w.bytes[:n])
n = 0
}
w.nbytes = n
}
// Write the header of a dynamic Huffman block to the output stream.
//
// numLiterals The number of literals specified in codegen
// numOffsets The number of offsets specified in codegen
// numCodegens The number of codegens used in codegen
func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) {
if w.err != nil {
return
}
var firstBits int32 = 4
if isEof {
firstBits = 5
}
w.writeBits(firstBits, 3)
w.writeBits(int32(numLiterals-257), 5)
w.writeBits(int32(numOffsets-1), 5)
w.writeBits(int32(numCodegens-4), 4)
for i := 0; i < numCodegens; i++ {
value := uint(w.codegenEncoding.codes[codegenOrder[i]].len)
w.writeBits(int32(value), 3)
}
i := 0
for {
var codeWord = uint32(w.codegen[i])
i++
if codeWord == badCode {
break
}
w.writeCode(w.codegenEncoding.codes[codeWord])
switch codeWord {
case 16:
w.writeBits(int32(w.codegen[i]), 2)
i++
case 17:
w.writeBits(int32(w.codegen[i]), 3)
i++
case 18:
w.writeBits(int32(w.codegen[i]), 7)
i++
}
}
}
func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
if w.err != nil {
return
}
if w.lastHeader > 0 {
// We owe an EOB
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.lastHeader = 0
}
var flag int32
if isEof {
flag = 1
}
w.writeBits(flag, 3)
w.flush()
w.writeBits(int32(length), 16)
w.writeBits(int32(^uint16(length)), 16)
}
func (w *huffmanBitWriter) writeFixedHeader(isEof bool) {
if w.err != nil {
return
}
if w.lastHeader > 0 {
// We owe an EOB
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.lastHeader = 0
}
// Indicate that we are a fixed Huffman block
var value int32 = 2
if isEof {
value = 3
}
w.writeBits(value, 3)
}
// writeBlock will write a block of tokens with the smallest encoding.
// The original input can be supplied, and if the huffman encoded data
// is larger than the original bytes, the data will be written as a
// stored block.
// If the input is nil, the tokens will always be Huffman encoded.
func (w *huffmanBitWriter) writeBlock(tokens *tokens, eof bool, input []byte) {
if w.err != nil {
return
}
tokens.AddEOB()
if w.lastHeader > 0 {
// We owe an EOB
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.lastHeader = 0
}
numLiterals, numOffsets := w.indexTokens(tokens, false)
w.generate(tokens)
var extraBits int
storedSize, storable := w.storedSize(input)
if storable {
extraBits = w.extraBitSize()
}
// Figure out smallest code.
// Fixed Huffman baseline.
var literalEncoding = fixedLiteralEncoding
var offsetEncoding = fixedOffsetEncoding
var size = w.fixedSize(extraBits)
// Dynamic Huffman?
var numCodegens int
// Generate codegen and codegenFrequencies, which indicates how to encode
// the literalEncoding and the offsetEncoding.
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
w.codegenEncoding.generate(w.codegenFreq[:], 7)
dynamicSize, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits)
if dynamicSize < size {
size = dynamicSize
literalEncoding = w.literalEncoding
offsetEncoding = w.offsetEncoding
}
// Stored bytes?
if storable && storedSize < size {
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
return
}
// Huffman.
if literalEncoding == fixedLiteralEncoding {
w.writeFixedHeader(eof)
} else {
w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
}
// Write the tokens.
w.writeTokens(tokens.Slice(), literalEncoding.codes, offsetEncoding.codes)
}
// writeBlockDynamic encodes a block using a dynamic Huffman table.
// This should be used if the symbols used have a disproportionate
// histogram distribution.
// If input is supplied and the compression savings are below 1/16th of the
// input size the block is stored.
func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []byte, sync bool) {
if w.err != nil {
return
}
sync = sync || eof
if sync {
tokens.AddEOB()
}
// We cannot reuse pure huffman table, and must mark as EOF.
if (w.lastHuffMan || eof) && w.lastHeader > 0 {
// We will not try to reuse.
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.lastHeader = 0
w.lastHuffMan = false
}
if !sync {
tokens.Fill()
}
numLiterals, numOffsets := w.indexTokens(tokens, !sync)
var size int
// Check if we should reuse.
if w.lastHeader > 0 {
// Estimate size for using a new table.
// Use the previous header size as the best estimate.
newSize := w.lastHeader + tokens.EstimatedBits()
newSize += newSize >> w.logNewTablePenalty
// The estimated size is calculated as an optimal table.
// We add a penalty to make it more realistic and re-use a bit more.
reuseSize := w.dynamicReuseSize(w.literalEncoding, w.offsetEncoding) + w.extraBitSize()
// Check if a new table is better.
if newSize < reuseSize {
// Write the EOB we owe.
w.writeCode(w.literalEncoding.codes[endBlockMarker])
size = newSize
w.lastHeader = 0
} else {
size = reuseSize
}
// Check if we get a reasonable size decrease.
if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
w.lastHeader = 0
return
}
}
// We want a new block/table
if w.lastHeader == 0 {
w.generate(tokens)
// Generate codegen and codegenFrequencies, which indicates how to encode
// the literalEncoding and the offsetEncoding.
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
w.codegenEncoding.generate(w.codegenFreq[:], 7)
var numCodegens int
size, numCodegens = w.dynamicSize(w.literalEncoding, w.offsetEncoding, w.extraBitSize())
// Store bytes, if we don't get a reasonable improvement.
if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
w.lastHeader = 0
return
}
// Write Huffman table.
w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
w.lastHeader, _ = w.headerSize()
w.lastHuffMan = false
}
if sync {
w.lastHeader = 0
}
// Write the tokens.
w.writeTokens(tokens.Slice(), w.literalEncoding.codes, w.offsetEncoding.codes)
}
// indexTokens indexes a slice of tokens, and updates
// literalFreq and offsetFreq, and generates literalEncoding
// and offsetEncoding.
// The number of literal and offset tokens is returned.
func (w *huffmanBitWriter) indexTokens(t *tokens, filled bool) (numLiterals, numOffsets int) {
copy(w.literalFreq[:], t.litHist[:])
copy(w.literalFreq[256:], t.extraHist[:])
copy(w.offsetFreq[:], t.offHist[:offsetCodeCount])
if t.n == 0 {
return
}
if filled {
return maxNumLit, maxNumDist
}
// get the number of literals
numLiterals = len(w.literalFreq)
for w.literalFreq[numLiterals-1] == 0 {
numLiterals--
}
// get the number of offsets
numOffsets = len(w.offsetFreq)
for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 {
numOffsets--
}
if numOffsets == 0 {
// We haven't found a single match. If we want to go with the dynamic encoding,
// we should count at least one offset to be sure that the offset huffman tree could be encoded.
w.offsetFreq[0] = 1
numOffsets = 1
}
return
}
func (w *huffmanBitWriter) generate(t *tokens) {
w.literalEncoding.generate(w.literalFreq[:literalCount], 15)
w.offsetEncoding.generate(w.offsetFreq[:offsetCodeCount], 15)
}
// writeTokens writes a slice of tokens to the output.
// codes for literal and offset encoding must be supplied.
func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) {
if w.err != nil {
return
}
if len(tokens) == 0 {
return
}
// Only last token should be endBlockMarker.
var deferEOB bool
if tokens[len(tokens)-1] == endBlockMarker {
tokens = tokens[:len(tokens)-1]
deferEOB = true
}
// Create slices up to the next power of two to avoid bounds checks.
lits := leCodes[:256]
offs := oeCodes[:32]
lengths := leCodes[lengthCodesStart:]
lengths = lengths[:32]
for _, t := range tokens {
if t < matchType {
w.writeCode(lits[t.literal()])
continue
}
// Write the length
length := t.length()
lengthCode := lengthCode(length)
if false {
w.writeCode(lengths[lengthCode&31])
} else {
// inlined
c := lengths[lengthCode&31]
w.bits |= uint64(c.code) << (w.nbits & 63)
w.nbits += c.len
if w.nbits >= 48 {
w.writeOutBits()
}
}
extraLengthBits := uint16(lengthExtraBits[lengthCode&31])
if extraLengthBits > 0 {
extraLength := int32(length - lengthBase[lengthCode&31])
w.writeBits(extraLength, extraLengthBits)
}
// Write the offset
offset := t.offset()
offsetCode := offsetCode(offset)
if false {
w.writeCode(offs[offsetCode&31])
} else {
// inlined
c := offs[offsetCode&31]
w.bits |= uint64(c.code) << (w.nbits & 63)
w.nbits += c.len
if w.nbits >= 48 {
w.writeOutBits()
}
}
extraOffsetBits := uint16(offsetExtraBits[offsetCode&63])
if extraOffsetBits > 0 {
extraOffset := int32(offset - offsetBase[offsetCode&63])
w.writeBits(extraOffset, extraOffsetBits)
}
}
if deferEOB {
w.writeCode(leCodes[endBlockMarker])
}
}
// huffOffset is a static offset encoder used for huffman only encoding.
// It can be reused since we will not be encoding offset values.
var huffOffset *huffmanEncoder
func init() {
w := newHuffmanBitWriter(nil)
w.offsetFreq[0] = 1
huffOffset = newHuffmanEncoder(offsetCodeCount)
huffOffset.generate(w.offsetFreq[:offsetCodeCount], 15)
}
// writeBlockHuff encodes a block of bytes as either
// Huffman encoded literals or uncompressed bytes if the
// results only gains very little from compression.
func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
if w.err != nil {
return
}
// Clear histogram
for i := range w.literalFreq[:] {
w.literalFreq[i] = 0
}
if !w.lastHuffMan {
for i := range w.offsetFreq[:] {
w.offsetFreq[i] = 0
}
}
// Add everything as literals
// We have to estimate the header size.
// Assume header is around 70 bytes:
// https://stackoverflow.com/a/25454430
const guessHeaderSizeBits = 70 * 8
estBits, estExtra := histogramSize(input, w.literalFreq[:], !eof && !sync)
estBits += w.lastHeader + 15
if w.lastHeader == 0 {
estBits += guessHeaderSizeBits
}
estBits += estBits >> w.logNewTablePenalty
// Store bytes, if we don't get a reasonable improvement.
ssize, storable := w.storedSize(input)
if storable && ssize < estBits {
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
return
}
if w.lastHeader > 0 {
reuseSize := w.literalEncoding.bitLength(w.literalFreq[:256])
estBits += estExtra
if estBits < reuseSize {
// We owe an EOB
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.lastHeader = 0
}
}
const numLiterals = endBlockMarker + 1
const numOffsets = 1
if w.lastHeader == 0 {
w.literalFreq[endBlockMarker] = 1
w.literalEncoding.generate(w.literalFreq[:numLiterals], 15)
// Generate codegen and codegenFrequencies, which indicates how to encode
// the literalEncoding and the offsetEncoding.
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset)
w.codegenEncoding.generate(w.codegenFreq[:], 7)
numCodegens := w.codegens()
// Huffman.
w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
w.lastHuffMan = true
w.lastHeader, _ = w.headerSize()
}
encoding := w.literalEncoding.codes[:257]
for _, t := range input {
// Bitwriting inlined, ~30% speedup
c := encoding[t]
w.bits |= uint64(c.code) << ((w.nbits) & 63)
w.nbits += c.len
if w.nbits >= 48 {
bits := w.bits
w.bits >>= 48
w.nbits -= 48
n := w.nbytes
w.bytes[n] = byte(bits)
w.bytes[n+1] = byte(bits >> 8)
w.bytes[n+2] = byte(bits >> 16)
w.bytes[n+3] = byte(bits >> 24)
w.bytes[n+4] = byte(bits >> 32)
w.bytes[n+5] = byte(bits >> 40)
n += 6
if n >= bufferFlushSize {
if w.err != nil {
n = 0
return
}
w.write(w.bytes[:n])
n = 0
}
w.nbytes = n
}
}
if eof || sync {
w.writeCode(encoding[endBlockMarker])
w.lastHeader = 0
w.lastHuffMan = false
}
}

View File

@@ -1,363 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import (
"math"
"math/bits"
)
const (
maxBitsLimit = 16
// number of valid literals
literalCount = 286
)
// hcode is a huffman code with a bit code and bit length.
type hcode struct {
code, len uint16
}
type huffmanEncoder struct {
codes []hcode
freqcache []literalNode
bitCount [17]int32
}
type literalNode struct {
literal uint16
freq uint16
}
// A levelInfo describes the state of the constructed tree for a given depth.
type levelInfo struct {
// Our level. for better printing
level int32
// The frequency of the last node at this level
lastFreq int32
// The frequency of the next character to add to this level
nextCharFreq int32
// The frequency of the next pair (from level below) to add to this level.
// Only valid if the "needed" value of the next lower level is 0.
nextPairFreq int32
// The number of chains remaining to generate for this level before moving
// up to the next level
needed int32
}
// set sets the code and length of an hcode.
func (h *hcode) set(code uint16, length uint16) {
h.len = length
h.code = code
}
func reverseBits(number uint16, bitLength byte) uint16 {
return bits.Reverse16(number << ((16 - bitLength) & 15))
}
func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxUint16} }
func newHuffmanEncoder(size int) *huffmanEncoder {
// Make capacity to next power of two.
c := uint(bits.Len32(uint32(size - 1)))
return &huffmanEncoder{codes: make([]hcode, size, 1<<c)}
}
// Generates a HuffmanCode corresponding to the fixed literal table
func generateFixedLiteralEncoding() *huffmanEncoder {
h := newHuffmanEncoder(literalCount)
codes := h.codes
var ch uint16
for ch = 0; ch < literalCount; ch++ {
var bits uint16
var size uint16
switch {
case ch < 144:
// size 8, 000110000 .. 10111111
bits = ch + 48
size = 8
case ch < 256:
// size 9, 110010000 .. 111111111
bits = ch + 400 - 144
size = 9
case ch < 280:
// size 7, 0000000 .. 0010111
bits = ch - 256
size = 7
default:
// size 8, 11000000 .. 11000111
bits = ch + 192 - 280
size = 8
}
codes[ch] = hcode{code: reverseBits(bits, byte(size)), len: size}
}
return h
}
func generateFixedOffsetEncoding() *huffmanEncoder {
h := newHuffmanEncoder(30)
codes := h.codes
for ch := range codes {
codes[ch] = hcode{code: reverseBits(uint16(ch), 5), len: 5}
}
return h
}
var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding()
var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding()
func (h *huffmanEncoder) bitLength(freq []uint16) int {
var total int
for i, f := range freq {
if f != 0 {
total += int(f) * int(h.codes[i].len)
}
}
return total
}
// Return the number of literals assigned to each bit size in the Huffman encoding
//
// This method is only called when list.length >= 3
// The cases of 0, 1, and 2 literals are handled by special case code.
//
// list An array of the literals with non-zero frequencies
// and their associated frequencies. The array is in order of increasing
// frequency, and has as its last element a special element with frequency
// MaxInt32
// maxBits The maximum number of bits that should be used to encode any literal.
// Must be less than 16.
// return An integer array in which array[i] indicates the number of literals
// that should be encoded in i bits.
func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
if maxBits >= maxBitsLimit {
panic("flate: maxBits too large")
}
n := int32(len(list))
list = list[0 : n+1]
list[n] = maxNode()
// The tree can't have greater depth than n - 1, no matter what. This
// saves a little bit of work in some small cases
if maxBits > n-1 {
maxBits = n - 1
}
// Create information about each of the levels.
// A bogus "Level 0" whose sole purpose is so that
// level1.prev.needed==0. This makes level1.nextPairFreq
// be a legitimate value that never gets chosen.
var levels [maxBitsLimit]levelInfo
// leafCounts[i] counts the number of literals at the left
// of ancestors of the rightmost node at level i.
// leafCounts[i][j] is the number of literals at the left
// of the level j ancestor.
var leafCounts [maxBitsLimit][maxBitsLimit]int32
for level := int32(1); level <= maxBits; level++ {
// For every level, the first two items are the first two characters.
// We initialize the levels as if we had already figured this out.
levels[level] = levelInfo{
level: level,
lastFreq: int32(list[1].freq),
nextCharFreq: int32(list[2].freq),
nextPairFreq: int32(list[0].freq) + int32(list[1].freq),
}
leafCounts[level][level] = 2
if level == 1 {
levels[level].nextPairFreq = math.MaxInt32
}
}
// We need a total of 2*n - 2 items at top level and have already generated 2.
levels[maxBits].needed = 2*n - 4
level := maxBits
for {
l := &levels[level]
if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 {
// We've run out of both leafs and pairs.
// End all calculations for this level.
// To make sure we never come back to this level or any lower level,
// set nextPairFreq impossibly large.
l.needed = 0
levels[level+1].nextPairFreq = math.MaxInt32
level++
continue
}
prevFreq := l.lastFreq
if l.nextCharFreq < l.nextPairFreq {
// The next item on this row is a leaf node.
n := leafCounts[level][level] + 1
l.lastFreq = l.nextCharFreq
// Lower leafCounts are the same of the previous node.
leafCounts[level][level] = n
e := list[n]
if e.literal < math.MaxUint16 {
l.nextCharFreq = int32(e.freq)
} else {
l.nextCharFreq = math.MaxInt32
}
} else {
// The next item on this row is a pair from the previous row.
// nextPairFreq isn't valid until we generate two
// more values in the level below
l.lastFreq = l.nextPairFreq
// Take leaf counts from the lower level, except counts[level] remains the same.
copy(leafCounts[level][:level], leafCounts[level-1][:level])
levels[l.level-1].needed = 2
}
if l.needed--; l.needed == 0 {
// We've done everything we need to do for this level.
// Continue calculating one level up. Fill in nextPairFreq
// of that level with the sum of the two nodes we've just calculated on
// this level.
if l.level == maxBits {
// All done!
break
}
levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq
level++
} else {
// If we stole from below, move down temporarily to replenish it.
for levels[level-1].needed > 0 {
level--
}
}
}
// Somethings is wrong if at the end, the top level is null or hasn't used
// all of the leaves.
if leafCounts[maxBits][maxBits] != n {
panic("leafCounts[maxBits][maxBits] != n")
}
bitCount := h.bitCount[:maxBits+1]
bits := 1
counts := &leafCounts[maxBits]
for level := maxBits; level > 0; level-- {
// chain.leafCount gives the number of literals requiring at least "bits"
// bits to encode.
bitCount[bits] = counts[level] - counts[level-1]
bits++
}
return bitCount
}
// Look at the leaves and assign them a bit count and an encoding as specified
// in RFC 1951 3.2.2
func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) {
code := uint16(0)
for n, bits := range bitCount {
code <<= 1
if n == 0 || bits == 0 {
continue
}
// The literals list[len(list)-bits] .. list[len(list)-bits]
// are encoded using "bits" bits, and get the values
// code, code + 1, .... The code values are
// assigned in literal order (not frequency order).
chunk := list[len(list)-int(bits):]
sortByLiteral(chunk)
for _, node := range chunk {
h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint16(n)}
code++
}
list = list[0 : len(list)-int(bits)]
}
}
// Update this Huffman Code object to be the minimum code for the specified frequency count.
//
// freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
// maxBits The maximum number of bits to use for any literal.
func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
if h.freqcache == nil {
// Allocate a reusable buffer with the longest possible frequency table.
// Possible lengths are codegenCodeCount, offsetCodeCount and literalCount.
// The largest of these is literalCount, so we allocate for that case.
h.freqcache = make([]literalNode, literalCount+1)
}
list := h.freqcache[:len(freq)+1]
// Number of non-zero literals
count := 0
// Set list to be the set of all non-zero literals and their frequencies
for i, f := range freq {
if f != 0 {
list[count] = literalNode{uint16(i), f}
count++
} else {
list[count] = literalNode{}
h.codes[i].len = 0
}
}
list[len(freq)] = literalNode{}
list = list[:count]
if count <= 2 {
// Handle the small cases here, because they are awkward for the general case code. With
// two or fewer literals, everything has bit length 1.
for i, node := range list {
// "list" is in order of increasing literal value.
h.codes[node.literal].set(uint16(i), 1)
}
return
}
sortByFreq(list)
// Get the number of literals for each bit count
bitCount := h.bitCounts(list, maxBits)
// And do the assignment
h.assignEncodingAndSize(bitCount, list)
}
func atLeastOne(v float32) float32 {
if v < 1 {
return 1
}
return v
}
// histogramSize accumulates a histogram of b in h.
// An estimated size in bits is returned.
// Unassigned values are assigned '1' in the histogram.
// len(h) must be >= 256, and h's elements must be all zeroes.
func histogramSize(b []byte, h []uint16, fill bool) (int, int) {
h = h[:256]
for _, t := range b {
h[t]++
}
invTotal := 1.0 / float32(len(b))
shannon := float32(0.0)
var extra float32
if fill {
oneBits := atLeastOne(-mFastLog2(invTotal))
for i, v := range h[:] {
if v > 0 {
n := float32(v)
shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
} else {
h[i] = 1
extra += oneBits
}
}
} else {
for _, v := range h[:] {
if v > 0 {
n := float32(v)
shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
}
}
}
return int(shannon + 0.99), int(extra + 0.99)
}

View File

@@ -1,178 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
// Sort sorts data.
// It makes one call to data.Len to determine n, and O(n*log(n)) calls to
// data.Less and data.Swap. The sort is not guaranteed to be stable.
func sortByFreq(data []literalNode) {
n := len(data)
quickSortByFreq(data, 0, n, maxDepth(n))
}
func quickSortByFreq(data []literalNode, a, b, maxDepth int) {
for b-a > 12 { // Use ShellSort for slices <= 12 elements
if maxDepth == 0 {
heapSort(data, a, b)
return
}
maxDepth--
mlo, mhi := doPivotByFreq(data, a, b)
// Avoiding recursion on the larger subproblem guarantees
// a stack depth of at most lg(b-a).
if mlo-a < b-mhi {
quickSortByFreq(data, a, mlo, maxDepth)
a = mhi // i.e., quickSortByFreq(data, mhi, b)
} else {
quickSortByFreq(data, mhi, b, maxDepth)
b = mlo // i.e., quickSortByFreq(data, a, mlo)
}
}
if b-a > 1 {
// Do ShellSort pass with gap 6
// It could be written in this simplified form cause b-a <= 12
for i := a + 6; i < b; i++ {
if data[i].freq == data[i-6].freq && data[i].literal < data[i-6].literal || data[i].freq < data[i-6].freq {
data[i], data[i-6] = data[i-6], data[i]
}
}
insertionSortByFreq(data, a, b)
}
}
// siftDownByFreq implements the heap property on data[lo, hi).
// first is an offset into the array where the root of the heap lies.
func siftDownByFreq(data []literalNode, lo, hi, first int) {
root := lo
for {
child := 2*root + 1
if child >= hi {
break
}
if child+1 < hi && (data[first+child].freq == data[first+child+1].freq && data[first+child].literal < data[first+child+1].literal || data[first+child].freq < data[first+child+1].freq) {
child++
}
if data[first+root].freq == data[first+child].freq && data[first+root].literal > data[first+child].literal || data[first+root].freq > data[first+child].freq {
return
}
data[first+root], data[first+child] = data[first+child], data[first+root]
root = child
}
}
func doPivotByFreq(data []literalNode, lo, hi int) (midlo, midhi int) {
m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
if hi-lo > 40 {
// Tukey's ``Ninther,'' median of three medians of three.
s := (hi - lo) / 8
medianOfThreeSortByFreq(data, lo, lo+s, lo+2*s)
medianOfThreeSortByFreq(data, m, m-s, m+s)
medianOfThreeSortByFreq(data, hi-1, hi-1-s, hi-1-2*s)
}
medianOfThreeSortByFreq(data, lo, m, hi-1)
// Invariants are:
// data[lo] = pivot (set up by ChoosePivot)
// data[lo < i < a] < pivot
// data[a <= i < b] <= pivot
// data[b <= i < c] unexamined
// data[c <= i < hi-1] > pivot
// data[hi-1] >= pivot
pivot := lo
a, c := lo+1, hi-1
for ; a < c && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ {
}
b := a
for {
for ; b < c && (data[pivot].freq == data[b].freq && data[pivot].literal > data[b].literal || data[pivot].freq > data[b].freq); b++ { // data[b] <= pivot
}
for ; b < c && (data[pivot].freq == data[c-1].freq && data[pivot].literal < data[c-1].literal || data[pivot].freq < data[c-1].freq); c-- { // data[c-1] > pivot
}
if b >= c {
break
}
// data[b] > pivot; data[c-1] <= pivot
data[b], data[c-1] = data[c-1], data[b]
b++
c--
}
// If hi-c<3 then there are duplicates (by property of median of nine).
// Let's be a bit more conservative, and set border to 5.
protect := hi-c < 5
if !protect && hi-c < (hi-lo)/4 {
// Lets test some points for equality to pivot
dups := 0
if data[pivot].freq == data[hi-1].freq && data[pivot].literal > data[hi-1].literal || data[pivot].freq > data[hi-1].freq { // data[hi-1] = pivot
data[c], data[hi-1] = data[hi-1], data[c]
c++
dups++
}
if data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq { // data[b-1] = pivot
b--
dups++
}
// m-lo = (hi-lo)/2 > 6
// b-lo > (hi-lo)*3/4-1 > 8
// ==> m < b ==> data[m] <= pivot
if data[m].freq == data[pivot].freq && data[m].literal > data[pivot].literal || data[m].freq > data[pivot].freq { // data[m] = pivot
data[m], data[b-1] = data[b-1], data[m]
b--
dups++
}
// if at least 2 points are equal to pivot, assume skewed distribution
protect = dups > 1
}
if protect {
// Protect against a lot of duplicates
// Add invariant:
// data[a <= i < b] unexamined
// data[b <= i < c] = pivot
for {
for ; a < b && (data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq); b-- { // data[b] == pivot
}
for ; a < b && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { // data[a] < pivot
}
if a >= b {
break
}
// data[a] == pivot; data[b-1] < pivot
data[a], data[b-1] = data[b-1], data[a]
a++
b--
}
}
// Swap pivot into middle
data[pivot], data[b-1] = data[b-1], data[pivot]
return b - 1, c
}
// Insertion sort
func insertionSortByFreq(data []literalNode, a, b int) {
for i := a + 1; i < b; i++ {
for j := i; j > a && (data[j].freq == data[j-1].freq && data[j].literal < data[j-1].literal || data[j].freq < data[j-1].freq); j-- {
data[j], data[j-1] = data[j-1], data[j]
}
}
}
// quickSortByFreq, loosely following Bentley and McIlroy,
// ``Engineering a Sort Function,'' SP&E November 1993.
// medianOfThreeSortByFreq moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
func medianOfThreeSortByFreq(data []literalNode, m1, m0, m2 int) {
// sort 3 elements
if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq {
data[m1], data[m0] = data[m0], data[m1]
}
// data[m0] <= data[m1]
if data[m2].freq == data[m1].freq && data[m2].literal < data[m1].literal || data[m2].freq < data[m1].freq {
data[m2], data[m1] = data[m1], data[m2]
// data[m0] <= data[m2] && data[m1] < data[m2]
if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq {
data[m1], data[m0] = data[m0], data[m1]
}
}
// now data[m0] <= data[m1] <= data[m2]
}

View File

@@ -1,201 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
// Sort sorts data.
// It makes one call to data.Len to determine n, and O(n*log(n)) calls to
// data.Less and data.Swap. The sort is not guaranteed to be stable.
func sortByLiteral(data []literalNode) {
n := len(data)
quickSort(data, 0, n, maxDepth(n))
}
func quickSort(data []literalNode, a, b, maxDepth int) {
for b-a > 12 { // Use ShellSort for slices <= 12 elements
if maxDepth == 0 {
heapSort(data, a, b)
return
}
maxDepth--
mlo, mhi := doPivot(data, a, b)
// Avoiding recursion on the larger subproblem guarantees
// a stack depth of at most lg(b-a).
if mlo-a < b-mhi {
quickSort(data, a, mlo, maxDepth)
a = mhi // i.e., quickSort(data, mhi, b)
} else {
quickSort(data, mhi, b, maxDepth)
b = mlo // i.e., quickSort(data, a, mlo)
}
}
if b-a > 1 {
// Do ShellSort pass with gap 6
// It could be written in this simplified form cause b-a <= 12
for i := a + 6; i < b; i++ {
if data[i].literal < data[i-6].literal {
data[i], data[i-6] = data[i-6], data[i]
}
}
insertionSort(data, a, b)
}
}
func heapSort(data []literalNode, a, b int) {
first := a
lo := 0
hi := b - a
// Build heap with greatest element at top.
for i := (hi - 1) / 2; i >= 0; i-- {
siftDown(data, i, hi, first)
}
// Pop elements, largest first, into end of data.
for i := hi - 1; i >= 0; i-- {
data[first], data[first+i] = data[first+i], data[first]
siftDown(data, lo, i, first)
}
}
// siftDown implements the heap property on data[lo, hi).
// first is an offset into the array where the root of the heap lies.
func siftDown(data []literalNode, lo, hi, first int) {
root := lo
for {
child := 2*root + 1
if child >= hi {
break
}
if child+1 < hi && data[first+child].literal < data[first+child+1].literal {
child++
}
if data[first+root].literal > data[first+child].literal {
return
}
data[first+root], data[first+child] = data[first+child], data[first+root]
root = child
}
}
func doPivot(data []literalNode, lo, hi int) (midlo, midhi int) {
m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
if hi-lo > 40 {
// Tukey's ``Ninther,'' median of three medians of three.
s := (hi - lo) / 8
medianOfThree(data, lo, lo+s, lo+2*s)
medianOfThree(data, m, m-s, m+s)
medianOfThree(data, hi-1, hi-1-s, hi-1-2*s)
}
medianOfThree(data, lo, m, hi-1)
// Invariants are:
// data[lo] = pivot (set up by ChoosePivot)
// data[lo < i < a] < pivot
// data[a <= i < b] <= pivot
// data[b <= i < c] unexamined
// data[c <= i < hi-1] > pivot
// data[hi-1] >= pivot
pivot := lo
a, c := lo+1, hi-1
for ; a < c && data[a].literal < data[pivot].literal; a++ {
}
b := a
for {
for ; b < c && data[pivot].literal > data[b].literal; b++ { // data[b] <= pivot
}
for ; b < c && data[pivot].literal < data[c-1].literal; c-- { // data[c-1] > pivot
}
if b >= c {
break
}
// data[b] > pivot; data[c-1] <= pivot
data[b], data[c-1] = data[c-1], data[b]
b++
c--
}
// If hi-c<3 then there are duplicates (by property of median of nine).
// Let's be a bit more conservative, and set border to 5.
protect := hi-c < 5
if !protect && hi-c < (hi-lo)/4 {
// Lets test some points for equality to pivot
dups := 0
if data[pivot].literal > data[hi-1].literal { // data[hi-1] = pivot
data[c], data[hi-1] = data[hi-1], data[c]
c++
dups++
}
if data[b-1].literal > data[pivot].literal { // data[b-1] = pivot
b--
dups++
}
// m-lo = (hi-lo)/2 > 6
// b-lo > (hi-lo)*3/4-1 > 8
// ==> m < b ==> data[m] <= pivot
if data[m].literal > data[pivot].literal { // data[m] = pivot
data[m], data[b-1] = data[b-1], data[m]
b--
dups++
}
// if at least 2 points are equal to pivot, assume skewed distribution
protect = dups > 1
}
if protect {
// Protect against a lot of duplicates
// Add invariant:
// data[a <= i < b] unexamined
// data[b <= i < c] = pivot
for {
for ; a < b && data[b-1].literal > data[pivot].literal; b-- { // data[b] == pivot
}
for ; a < b && data[a].literal < data[pivot].literal; a++ { // data[a] < pivot
}
if a >= b {
break
}
// data[a] == pivot; data[b-1] < pivot
data[a], data[b-1] = data[b-1], data[a]
a++
b--
}
}
// Swap pivot into middle
data[pivot], data[b-1] = data[b-1], data[pivot]
return b - 1, c
}
// Insertion sort
func insertionSort(data []literalNode, a, b int) {
for i := a + 1; i < b; i++ {
for j := i; j > a && data[j].literal < data[j-1].literal; j-- {
data[j], data[j-1] = data[j-1], data[j]
}
}
}
// maxDepth returns a threshold at which quicksort should switch
// to heapsort. It returns 2*ceil(lg(n+1)).
func maxDepth(n int) int {
var depth int
for i := n; i > 0; i >>= 1 {
depth++
}
return depth * 2
}
// medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
func medianOfThree(data []literalNode, m1, m0, m2 int) {
// sort 3 elements
if data[m1].literal < data[m0].literal {
data[m1], data[m0] = data[m0], data[m1]
}
// data[m0] <= data[m1]
if data[m2].literal < data[m1].literal {
data[m2], data[m1] = data[m1], data[m2]
// data[m0] <= data[m2] && data[m1] < data[m2]
if data[m1].literal < data[m0].literal {
data[m1], data[m0] = data[m0], data[m1]
}
}
// now data[m0] <= data[m1] <= data[m2]
}

View File

@@ -1,937 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package flate implements the DEFLATE compressed data format, described in
// RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file
// formats.
package flate
import (
"bufio"
"fmt"
"io"
"math/bits"
"strconv"
"sync"
)
const (
maxCodeLen = 16 // max length of Huffman code
maxCodeLenMask = 15 // mask for max length of Huffman code
// The next three numbers come from the RFC section 3.2.7, with the
// additional proviso in section 3.2.5 which implies that distance codes
// 30 and 31 should never occur in compressed data.
maxNumLit = 286
maxNumDist = 30
numCodes = 19 // number of codes in Huffman meta-code
debugDecode = false
)
// Initialize the fixedHuffmanDecoder only once upon first use.
var fixedOnce sync.Once
var fixedHuffmanDecoder huffmanDecoder
// A CorruptInputError reports the presence of corrupt input at a given offset.
type CorruptInputError int64
func (e CorruptInputError) Error() string {
return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
}
// An InternalError reports an error in the flate code itself.
type InternalError string
func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
// A ReadError reports an error encountered while reading input.
//
// Deprecated: No longer returned.
type ReadError struct {
Offset int64 // byte offset where error occurred
Err error // error returned by underlying Read
}
func (e *ReadError) Error() string {
return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
}
// A WriteError reports an error encountered while writing output.
//
// Deprecated: No longer returned.
type WriteError struct {
Offset int64 // byte offset where error occurred
Err error // error returned by underlying Write
}
func (e *WriteError) Error() string {
return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
}
// Resetter resets a ReadCloser returned by NewReader or NewReaderDict to
// to switch to a new underlying Reader. This permits reusing a ReadCloser
// instead of allocating a new one.
type Resetter interface {
// Reset discards any buffered data and resets the Resetter as if it was
// newly initialized with the given reader.
Reset(r io.Reader, dict []byte) error
}
// The data structure for decoding Huffman tables is based on that of
// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
// For codes smaller than the table width, there are multiple entries
// (each combination of trailing bits has the same value). For codes
// larger than the table width, the table contains a link to an overflow
// table. The width of each entry in the link table is the maximum code
// size minus the chunk width.
//
// Note that you can do a lookup in the table even without all bits
// filled. Since the extra bits are zero, and the DEFLATE Huffman codes
// have the property that shorter codes come before longer ones, the
// bit length estimate in the result is a lower bound on the actual
// number of bits.
//
// See the following:
// http://www.gzip.org/algorithm.txt
// chunk & 15 is number of bits
// chunk >> 4 is value, including table link
const (
huffmanChunkBits = 9
huffmanNumChunks = 1 << huffmanChunkBits
huffmanCountMask = 15
huffmanValueShift = 4
)
type huffmanDecoder struct {
min int // the minimum code length
chunks *[huffmanNumChunks]uint16 // chunks as described above
links [][]uint16 // overflow links
linkMask uint32 // mask the width of the link table
}
// Initialize Huffman decoding tables from array of code lengths.
// Following this function, h is guaranteed to be initialized into a complete
// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
// degenerate case where the tree has only a single symbol with length 1. Empty
// trees are permitted.
func (h *huffmanDecoder) init(lengths []int) bool {
// Sanity enables additional runtime tests during Huffman
// table construction. It's intended to be used during
// development to supplement the currently ad-hoc unit tests.
const sanity = false
if h.chunks == nil {
h.chunks = &[huffmanNumChunks]uint16{}
}
if h.min != 0 {
*h = huffmanDecoder{chunks: h.chunks, links: h.links}
}
// Count number of codes of each length,
// compute min and max length.
var count [maxCodeLen]int
var min, max int
for _, n := range lengths {
if n == 0 {
continue
}
if min == 0 || n < min {
min = n
}
if n > max {
max = n
}
count[n&maxCodeLenMask]++
}
// Empty tree. The decompressor.huffSym function will fail later if the tree
// is used. Technically, an empty tree is only valid for the HDIST tree and
// not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
// is guaranteed to fail since it will attempt to use the tree to decode the
// codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
// guaranteed to fail later since the compressed data section must be
// composed of at least one symbol (the end-of-block marker).
if max == 0 {
return true
}
code := 0
var nextcode [maxCodeLen]int
for i := min; i <= max; i++ {
code <<= 1
nextcode[i&maxCodeLenMask] = code
code += count[i&maxCodeLenMask]
}
// Check that the coding is complete (i.e., that we've
// assigned all 2-to-the-max possible bit sequences).
// Exception: To be compatible with zlib, we also need to
// accept degenerate single-code codings. See also
// TestDegenerateHuffmanCoding.
if code != 1<<uint(max) && !(code == 1 && max == 1) {
if debugDecode {
fmt.Println("coding failed, code, max:", code, max, code == 1<<uint(max), code == 1 && max == 1, "(one should be true)")
}
return false
}
h.min = min
chunks := h.chunks[:]
for i := range chunks {
chunks[i] = 0
}
if max > huffmanChunkBits {
numLinks := 1 << (uint(max) - huffmanChunkBits)
h.linkMask = uint32(numLinks - 1)
// create link tables
link := nextcode[huffmanChunkBits+1] >> 1
if cap(h.links) < huffmanNumChunks-link {
h.links = make([][]uint16, huffmanNumChunks-link)
} else {
h.links = h.links[:huffmanNumChunks-link]
}
for j := uint(link); j < huffmanNumChunks; j++ {
reverse := int(bits.Reverse16(uint16(j)))
reverse >>= uint(16 - huffmanChunkBits)
off := j - uint(link)
if sanity && h.chunks[reverse] != 0 {
panic("impossible: overwriting existing chunk")
}
h.chunks[reverse] = uint16(off<<huffmanValueShift | (huffmanChunkBits + 1))
if cap(h.links[off]) < numLinks {
h.links[off] = make([]uint16, numLinks)
} else {
links := h.links[off][:0]
h.links[off] = links[:numLinks]
}
}
} else {
h.links = h.links[:0]
}
for i, n := range lengths {
if n == 0 {
continue
}
code := nextcode[n]
nextcode[n]++
chunk := uint16(i<<huffmanValueShift | n)
reverse := int(bits.Reverse16(uint16(code)))
reverse >>= uint(16 - n)
if n <= huffmanChunkBits {
for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
// We should never need to overwrite
// an existing chunk. Also, 0 is
// never a valid chunk, because the
// lower 4 "count" bits should be
// between 1 and 15.
if sanity && h.chunks[off] != 0 {
panic("impossible: overwriting existing chunk")
}
h.chunks[off] = chunk
}
} else {
j := reverse & (huffmanNumChunks - 1)
if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
// Longer codes should have been
// associated with a link table above.
panic("impossible: not an indirect chunk")
}
value := h.chunks[j] >> huffmanValueShift
linktab := h.links[value]
reverse >>= huffmanChunkBits
for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
if sanity && linktab[off] != 0 {
panic("impossible: overwriting existing chunk")
}
linktab[off] = chunk
}
}
}
if sanity {
// Above we've sanity checked that we never overwrote
// an existing entry. Here we additionally check that
// we filled the tables completely.
for i, chunk := range h.chunks {
if chunk == 0 {
// As an exception, in the degenerate
// single-code case, we allow odd
// chunks to be missing.
if code == 1 && i%2 == 1 {
continue
}
panic("impossible: missing chunk")
}
}
for _, linktab := range h.links {
for _, chunk := range linktab {
if chunk == 0 {
panic("impossible: missing chunk")
}
}
}
}
return true
}
// The actual read interface needed by NewReader.
// If the passed in io.Reader does not also have ReadByte,
// the NewReader will introduce its own buffering.
type Reader interface {
io.Reader
io.ByteReader
}
// Decompress state.
type decompressor struct {
// Input source.
r Reader
roffset int64
// Input bits, in top of b.
b uint32
nb uint
// Huffman decoders for literal/length, distance.
h1, h2 huffmanDecoder
// Length arrays used to define Huffman codes.
bits *[maxNumLit + maxNumDist]int
codebits *[numCodes]int
// Output history, buffer.
dict dictDecoder
// Temporary buffer (avoids repeated allocation).
buf [4]byte
// Next step in the decompression,
// and decompression state.
step func(*decompressor)
stepState int
final bool
err error
toRead []byte
hl, hd *huffmanDecoder
copyLen int
copyDist int
}
func (f *decompressor) nextBlock() {
for f.nb < 1+2 {
if f.err = f.moreBits(); f.err != nil {
return
}
}
f.final = f.b&1 == 1
f.b >>= 1
typ := f.b & 3
f.b >>= 2
f.nb -= 1 + 2
switch typ {
case 0:
f.dataBlock()
case 1:
// compressed, fixed Huffman tables
f.hl = &fixedHuffmanDecoder
f.hd = nil
f.huffmanBlock()
case 2:
// compressed, dynamic Huffman tables
if f.err = f.readHuffman(); f.err != nil {
break
}
f.hl = &f.h1
f.hd = &f.h2
f.huffmanBlock()
default:
// 3 is reserved.
if debugDecode {
fmt.Println("reserved data block encountered")
}
f.err = CorruptInputError(f.roffset)
}
}
func (f *decompressor) Read(b []byte) (int, error) {
for {
if len(f.toRead) > 0 {
n := copy(b, f.toRead)
f.toRead = f.toRead[n:]
if len(f.toRead) == 0 {
return n, f.err
}
return n, nil
}
if f.err != nil {
return 0, f.err
}
f.step(f)
if f.err != nil && len(f.toRead) == 0 {
f.toRead = f.dict.readFlush() // Flush what's left in case of error
}
}
}
// Support the io.WriteTo interface for io.Copy and friends.
func (f *decompressor) WriteTo(w io.Writer) (int64, error) {
total := int64(0)
flushed := false
for {
if len(f.toRead) > 0 {
n, err := w.Write(f.toRead)
total += int64(n)
if err != nil {
f.err = err
return total, err
}
if n != len(f.toRead) {
return total, io.ErrShortWrite
}
f.toRead = f.toRead[:0]
}
if f.err != nil && flushed {
if f.err == io.EOF {
return total, nil
}
return total, f.err
}
if f.err == nil {
f.step(f)
}
if len(f.toRead) == 0 && f.err != nil && !flushed {
f.toRead = f.dict.readFlush() // Flush what's left in case of error
flushed = true
}
}
}
func (f *decompressor) Close() error {
if f.err == io.EOF {
return nil
}
return f.err
}
// RFC 1951 section 3.2.7.
// Compression with dynamic Huffman codes
var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
func (f *decompressor) readHuffman() error {
// HLIT[5], HDIST[5], HCLEN[4].
for f.nb < 5+5+4 {
if err := f.moreBits(); err != nil {
return err
}
}
nlit := int(f.b&0x1F) + 257
if nlit > maxNumLit {
if debugDecode {
fmt.Println("nlit > maxNumLit", nlit)
}
return CorruptInputError(f.roffset)
}
f.b >>= 5
ndist := int(f.b&0x1F) + 1
if ndist > maxNumDist {
if debugDecode {
fmt.Println("ndist > maxNumDist", ndist)
}
return CorruptInputError(f.roffset)
}
f.b >>= 5
nclen := int(f.b&0xF) + 4
// numCodes is 19, so nclen is always valid.
f.b >>= 4
f.nb -= 5 + 5 + 4
// (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
for i := 0; i < nclen; i++ {
for f.nb < 3 {
if err := f.moreBits(); err != nil {
return err
}
}
f.codebits[codeOrder[i]] = int(f.b & 0x7)
f.b >>= 3
f.nb -= 3
}
for i := nclen; i < len(codeOrder); i++ {
f.codebits[codeOrder[i]] = 0
}
if !f.h1.init(f.codebits[0:]) {
if debugDecode {
fmt.Println("init codebits failed")
}
return CorruptInputError(f.roffset)
}
// HLIT + 257 code lengths, HDIST + 1 code lengths,
// using the code length Huffman code.
for i, n := 0, nlit+ndist; i < n; {
x, err := f.huffSym(&f.h1)
if err != nil {
return err
}
if x < 16 {
// Actual length.
f.bits[i] = x
i++
continue
}
// Repeat previous length or zero.
var rep int
var nb uint
var b int
switch x {
default:
return InternalError("unexpected length code")
case 16:
rep = 3
nb = 2
if i == 0 {
if debugDecode {
fmt.Println("i==0")
}
return CorruptInputError(f.roffset)
}
b = f.bits[i-1]
case 17:
rep = 3
nb = 3
b = 0
case 18:
rep = 11
nb = 7
b = 0
}
for f.nb < nb {
if err := f.moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits:", err)
}
return err
}
}
rep += int(f.b & uint32(1<<nb-1))
f.b >>= nb
f.nb -= nb
if i+rep > n {
if debugDecode {
fmt.Println("i+rep > n", i, rep, n)
}
return CorruptInputError(f.roffset)
}
for j := 0; j < rep; j++ {
f.bits[i] = b
i++
}
}
if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
if debugDecode {
fmt.Println("init2 failed")
}
return CorruptInputError(f.roffset)
}
// As an optimization, we can initialize the min bits to read at a time
// for the HLIT tree to the length of the EOB marker since we know that
// every block must terminate with one. This preserves the property that
// we never read any extra bytes after the end of the DEFLATE stream.
if f.h1.min < f.bits[endBlockMarker] {
f.h1.min = f.bits[endBlockMarker]
}
return nil
}
// Decode a single Huffman block from f.
// hl and hd are the Huffman states for the lit/length values
// and the distance values, respectively. If hd == nil, using the
// fixed distance encoding associated with fixed Huffman blocks.
func (f *decompressor) huffmanBlock() {
const (
stateInit = iota // Zero value must be stateInit
stateDict
)
switch f.stepState {
case stateInit:
goto readLiteral
case stateDict:
goto copyHistory
}
readLiteral:
// Read literal and/or (length, distance) according to RFC section 3.2.3.
{
v, err := f.huffSym(f.hl)
if err != nil {
f.err = err
return
}
var n uint // number of bits extra
var length int
switch {
case v < 256:
f.dict.writeByte(byte(v))
if f.dict.availWrite() == 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBlock
f.stepState = stateInit
return
}
goto readLiteral
case v == 256:
f.finishBlock()
return
// otherwise, reference to older data
case v < 265:
length = v - (257 - 3)
n = 0
case v < 269:
length = v*2 - (265*2 - 11)
n = 1
case v < 273:
length = v*4 - (269*4 - 19)
n = 2
case v < 277:
length = v*8 - (273*8 - 35)
n = 3
case v < 281:
length = v*16 - (277*16 - 67)
n = 4
case v < 285:
length = v*32 - (281*32 - 131)
n = 5
case v < maxNumLit:
length = 258
n = 0
default:
if debugDecode {
fmt.Println(v, ">= maxNumLit")
}
f.err = CorruptInputError(f.roffset)
return
}
if n > 0 {
for f.nb < n {
if err = f.moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits n>0:", err)
}
f.err = err
return
}
}
length += int(f.b & uint32(1<<n-1))
f.b >>= n
f.nb -= n
}
var dist int
if f.hd == nil {
for f.nb < 5 {
if err = f.moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits f.nb<5:", err)
}
f.err = err
return
}
}
dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
f.b >>= 5
f.nb -= 5
} else {
if dist, err = f.huffSym(f.hd); err != nil {
if debugDecode {
fmt.Println("huffsym:", err)
}
f.err = err
return
}
}
switch {
case dist < 4:
dist++
case dist < maxNumDist:
nb := uint(dist-2) >> 1
// have 1 bit in bottom of dist, need nb more.
extra := (dist & 1) << nb
for f.nb < nb {
if err = f.moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits f.nb<nb:", err)
}
f.err = err
return
}
}
extra |= int(f.b & uint32(1<<nb-1))
f.b >>= nb
f.nb -= nb
dist = 1<<(nb+1) + 1 + extra
default:
if debugDecode {
fmt.Println("dist too big:", dist, maxNumDist)
}
f.err = CorruptInputError(f.roffset)
return
}
// No check on length; encoding can be prescient.
if dist > f.dict.histSize() {
if debugDecode {
fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
}
f.err = CorruptInputError(f.roffset)
return
}
f.copyLen, f.copyDist = length, dist
goto copyHistory
}
copyHistory:
// Perform a backwards copy according to RFC section 3.2.3.
{
cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
if cnt == 0 {
cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
}
f.copyLen -= cnt
if f.dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBlock // We need to continue this work
f.stepState = stateDict
return
}
goto readLiteral
}
}
// Copy a single uncompressed data block from input to output.
func (f *decompressor) dataBlock() {
// Uncompressed.
// Discard current half-byte.
f.nb = 0
f.b = 0
// Length then ones-complement of length.
nr, err := io.ReadFull(f.r, f.buf[0:4])
f.roffset += int64(nr)
if err != nil {
f.err = noEOF(err)
return
}
n := int(f.buf[0]) | int(f.buf[1])<<8
nn := int(f.buf[2]) | int(f.buf[3])<<8
if uint16(nn) != uint16(^n) {
if debugDecode {
fmt.Println("uint16(nn) != uint16(^n)", nn, ^n)
}
f.err = CorruptInputError(f.roffset)
return
}
if n == 0 {
f.toRead = f.dict.readFlush()
f.finishBlock()
return
}
f.copyLen = n
f.copyData()
}
// copyData copies f.copyLen bytes from the underlying reader into f.hist.
// It pauses for reads when f.hist is full.
func (f *decompressor) copyData() {
buf := f.dict.writeSlice()
if len(buf) > f.copyLen {
buf = buf[:f.copyLen]
}
cnt, err := io.ReadFull(f.r, buf)
f.roffset += int64(cnt)
f.copyLen -= cnt
f.dict.writeMark(cnt)
if err != nil {
f.err = noEOF(err)
return
}
if f.dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).copyData
return
}
f.finishBlock()
}
func (f *decompressor) finishBlock() {
if f.final {
if f.dict.availRead() > 0 {
f.toRead = f.dict.readFlush()
}
f.err = io.EOF
}
f.step = (*decompressor).nextBlock
}
// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
func noEOF(e error) error {
if e == io.EOF {
return io.ErrUnexpectedEOF
}
return e
}
func (f *decompressor) moreBits() error {
c, err := f.r.ReadByte()
if err != nil {
return noEOF(err)
}
f.roffset++
f.b |= uint32(c) << f.nb
f.nb += 8
return nil
}
// Read the next Huffman-encoded symbol from f according to h.
func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
// Since a huffmanDecoder can be empty or be composed of a degenerate tree
// with single element, huffSym must error on these two edge cases. In both
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(h.min)
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
// but is smart enough to keep local variables in registers, so use nb and b,
// inline call to moreBits and reassign b,nb back to f on return.
nb, b := f.nb, f.b
for {
for nb < n {
c, err := f.r.ReadByte()
if err != nil {
f.b = b
f.nb = nb
return 0, noEOF(err)
}
f.roffset++
b |= uint32(c) << (nb & 31)
nb += 8
}
chunk := h.chunks[b&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
n = uint(chunk & huffmanCountMask)
}
if n <= nb {
if n == 0 {
f.b = b
f.nb = nb
if debugDecode {
fmt.Println("huffsym: n==0")
}
f.err = CorruptInputError(f.roffset)
return 0, f.err
}
f.b = b >> (n & 31)
f.nb = nb - n
return int(chunk >> huffmanValueShift), nil
}
}
}
func makeReader(r io.Reader) Reader {
if rr, ok := r.(Reader); ok {
return rr
}
return bufio.NewReader(r)
}
func fixedHuffmanDecoderInit() {
fixedOnce.Do(func() {
// These come from the RFC section 3.2.6.
var bits [288]int
for i := 0; i < 144; i++ {
bits[i] = 8
}
for i := 144; i < 256; i++ {
bits[i] = 9
}
for i := 256; i < 280; i++ {
bits[i] = 7
}
for i := 280; i < 288; i++ {
bits[i] = 8
}
fixedHuffmanDecoder.init(bits[:])
})
}
func (f *decompressor) Reset(r io.Reader, dict []byte) error {
*f = decompressor{
r: makeReader(r),
bits: f.bits,
codebits: f.codebits,
h1: f.h1,
h2: f.h2,
dict: f.dict,
step: (*decompressor).nextBlock,
}
f.dict.init(maxMatchOffset, dict)
return nil
}
// NewReader returns a new ReadCloser that can be used
// to read the uncompressed version of r.
// If r does not also implement io.ByteReader,
// the decompressor may read more data than necessary from r.
// It is the caller's responsibility to call Close on the ReadCloser
// when finished reading.
//
// The ReadCloser returned by NewReader also implements Resetter.
func NewReader(r io.Reader) io.ReadCloser {
fixedHuffmanDecoderInit()
var f decompressor
f.r = makeReader(r)
f.bits = new([maxNumLit + maxNumDist]int)
f.codebits = new([numCodes]int)
f.step = (*decompressor).nextBlock
f.dict.init(maxMatchOffset, nil)
return &f
}
// NewReaderDict is like NewReader but initializes the reader
// with a preset dictionary. The returned Reader behaves as if
// the uncompressed data stream started with the given dictionary,
// which has already been read. NewReaderDict is typically used
// to read data compressed by NewWriterDict.
//
// The ReadCloser returned by NewReader also implements Resetter.
func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
fixedHuffmanDecoderInit()
var f decompressor
f.r = makeReader(r)
f.bits = new([maxNumLit + maxNumDist]int)
f.codebits = new([numCodes]int)
f.step = (*decompressor).nextBlock
f.dict.init(maxMatchOffset, dict)
return &f
}

View File

@@ -1,179 +0,0 @@
package flate
import "fmt"
// fastGen maintains the table for matches,
// and the previous byte block for level 2.
// This is the generic implementation.
type fastEncL1 struct {
fastGen
table [tableSize]tableEntry
}
// EncodeL1 uses a similar algorithm to level 1
func (e *fastEncL1) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
if debugDecode && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i].offset
if v <= minOff {
v = 0
} else {
v = v - e.cur + maxMatchOffset
}
e.table[i].offset = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load3232(src, s)
for {
const skipLog = 5
const doEvery = 2
nextS := s
var candidate tableEntry
for {
nextHash := hash(cv)
candidate = e.table[nextHash]
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
now := load6432(src, nextS)
e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv}
nextHash = hash(uint32(now))
offset := s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == candidate.val {
e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)}
break
}
// Do one right away...
cv = uint32(now)
s = nextS
nextS++
candidate = e.table[nextHash]
now >>= 8
e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv}
offset = s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == candidate.val {
e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)}
break
}
cv = uint32(now)
s = nextS
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
for {
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
// Extend the 4-byte match as long as possible.
t := candidate.offset - e.cur
l := e.matchlenLong(s+4, t+4, src) + 4
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
emitLiteral(dst, src[nextEmit:s])
}
// Save the match found
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
// Index first pair after match end.
if int(s+l+4) < len(src) {
cv := load3232(src, s)
e.table[hash(cv)] = tableEntry{offset: s + e.cur, val: cv}
}
goto emitRemainder
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-2 and at s. If
// another emitCopy is not our next move, also calculate nextHash
// at s+1. At least on GOARCH=amd64, these three hash calculations
// are faster as one load64 call (with some shifts) instead of
// three load32 calls.
x := load6432(src, s-2)
o := e.cur + s - 2
prevHash := hash(uint32(x))
e.table[prevHash] = tableEntry{offset: o, val: uint32(x)}
x >>= 16
currHash := hash(uint32(x))
candidate = e.table[currHash]
e.table[currHash] = tableEntry{offset: o + 2, val: uint32(x)}
offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || uint32(x) != candidate.val {
cv = uint32(x >> 8)
s++
break
}
}
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}

View File

@@ -1,205 +0,0 @@
package flate
import "fmt"
// fastGen maintains the table for matches,
// and the previous byte block for level 2.
// This is the generic implementation.
type fastEncL2 struct {
fastGen
table [bTableSize]tableEntry
}
// EncodeL2 uses a similar algorithm to level 1, but is capable
// of matching across blocks giving better compression at a small slowdown.
func (e *fastEncL2) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
if debugDecode && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i].offset
if v <= minOff {
v = 0
} else {
v = v - e.cur + maxMatchOffset
}
e.table[i].offset = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load3232(src, s)
for {
// When should we start skipping if we haven't found matches in a long while.
const skipLog = 5
const doEvery = 2
nextS := s
var candidate tableEntry
for {
nextHash := hash4u(cv, bTableBits)
s = nextS
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
candidate = e.table[nextHash]
now := load6432(src, nextS)
e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv}
nextHash = hash4u(uint32(now), bTableBits)
offset := s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == candidate.val {
e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)}
break
}
// Do one right away...
cv = uint32(now)
s = nextS
nextS++
candidate = e.table[nextHash]
now >>= 8
e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv}
offset = s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == candidate.val {
break
}
cv = uint32(now)
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
// Call emitCopy, and then see if another emitCopy could be our next
// move. Repeat until we find no match for the input immediately after
// what was consumed by the last emitCopy call.
//
// If we exit this loop normally then we need to call emitLiteral next,
// though we don't yet know how big the literal will be. We handle that
// by proceeding to the next iteration of the main loop. We also can
// exit this loop via goto if we get close to exhausting the input.
for {
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
// Extend the 4-byte match as long as possible.
t := candidate.offset - e.cur
l := e.matchlenLong(s+4, t+4, src) + 4
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
emitLiteral(dst, src[nextEmit:s])
}
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
// Index first pair after match end.
if int(s+l+4) < len(src) {
cv := load3232(src, s)
e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur, val: cv}
}
goto emitRemainder
}
// Store every second hash in-between, but offset by 1.
for i := s - l + 2; i < s-5; i += 7 {
x := load6432(src, int32(i))
nextHash := hash4u(uint32(x), bTableBits)
e.table[nextHash] = tableEntry{offset: e.cur + i, val: uint32(x)}
// Skip one
x >>= 16
nextHash = hash4u(uint32(x), bTableBits)
e.table[nextHash] = tableEntry{offset: e.cur + i + 2, val: uint32(x)}
// Skip one
x >>= 16
nextHash = hash4u(uint32(x), bTableBits)
e.table[nextHash] = tableEntry{offset: e.cur + i + 4, val: uint32(x)}
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-2 to s. If
// another emitCopy is not our next move, also calculate nextHash
// at s+1. At least on GOARCH=amd64, these three hash calculations
// are faster as one load64 call (with some shifts) instead of
// three load32 calls.
x := load6432(src, s-2)
o := e.cur + s - 2
prevHash := hash4u(uint32(x), bTableBits)
prevHash2 := hash4u(uint32(x>>8), bTableBits)
e.table[prevHash] = tableEntry{offset: o, val: uint32(x)}
e.table[prevHash2] = tableEntry{offset: o + 1, val: uint32(x >> 8)}
currHash := hash4u(uint32(x>>16), bTableBits)
candidate = e.table[currHash]
e.table[currHash] = tableEntry{offset: o + 2, val: uint32(x >> 16)}
offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || uint32(x>>16) != candidate.val {
cv = uint32(x >> 24)
s++
break
}
}
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}

View File

@@ -1,231 +0,0 @@
package flate
import "fmt"
// fastEncL3
type fastEncL3 struct {
fastGen
table [tableSize]tableEntryPrev
}
// Encode uses a similar algorithm to level 2, will check up to two candidates.
func (e *fastEncL3) Encode(dst *tokens, src []byte) {
const (
inputMargin = 8 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
if debugDecode && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntryPrev{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i]
if v.Cur.offset <= minOff {
v.Cur.offset = 0
} else {
v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
}
if v.Prev.offset <= minOff {
v.Prev.offset = 0
} else {
v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
}
e.table[i] = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// Skip if too small.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load3232(src, s)
for {
const skipLog = 6
nextS := s
var candidate tableEntry
for {
nextHash := hash(cv)
s = nextS
nextS = s + 1 + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
candidates := e.table[nextHash]
now := load3232(src, nextS)
e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur, val: cv}}
// Check both candidates
candidate = candidates.Cur
offset := s - (candidate.offset - e.cur)
if cv == candidate.val {
if offset > maxMatchOffset {
cv = now
// Previous will also be invalid, we have nothing.
continue
}
o2 := s - (candidates.Prev.offset - e.cur)
if cv != candidates.Prev.val || o2 > maxMatchOffset {
break
}
// Both match and are valid, pick longest.
l1, l2 := matchLen(src[s+4:], src[s-offset+4:]), matchLen(src[s+4:], src[s-o2+4:])
if l2 > l1 {
candidate = candidates.Prev
}
break
} else {
// We only check if value mismatches.
// Offset will always be invalid in other cases.
candidate = candidates.Prev
if cv == candidate.val {
offset := s - (candidate.offset - e.cur)
if offset <= maxMatchOffset {
break
}
}
}
cv = now
}
// Call emitCopy, and then see if another emitCopy could be our next
// move. Repeat until we find no match for the input immediately after
// what was consumed by the last emitCopy call.
//
// If we exit this loop normally then we need to call emitLiteral next,
// though we don't yet know how big the literal will be. We handle that
// by proceeding to the next iteration of the main loop. We also can
// exit this loop via goto if we get close to exhausting the input.
for {
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
// Extend the 4-byte match as long as possible.
//
t := candidate.offset - e.cur
l := e.matchlenLong(s+4, t+4, src) + 4
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
emitLiteral(dst, src[nextEmit:s])
}
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
t += l
// Index first pair after match end.
if int(t+4) < len(src) && t > 0 {
cv := load3232(src, t)
nextHash := hash(cv)
e.table[nextHash] = tableEntryPrev{
Prev: e.table[nextHash].Cur,
Cur: tableEntry{offset: e.cur + t, val: cv},
}
}
goto emitRemainder
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-3 to s.
x := load6432(src, s-3)
prevHash := hash(uint32(x))
e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 3, val: uint32(x)},
}
x >>= 8
prevHash = hash(uint32(x))
e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 2, val: uint32(x)},
}
x >>= 8
prevHash = hash(uint32(x))
e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 1, val: uint32(x)},
}
x >>= 8
currHash := hash(uint32(x))
candidates := e.table[currHash]
cv = uint32(x)
e.table[currHash] = tableEntryPrev{
Prev: candidates.Cur,
Cur: tableEntry{offset: s + e.cur, val: cv},
}
// Check both candidates
candidate = candidates.Cur
if cv == candidate.val {
offset := s - (candidate.offset - e.cur)
if offset <= maxMatchOffset {
continue
}
} else {
// We only check if value mismatches.
// Offset will always be invalid in other cases.
candidate = candidates.Prev
if cv == candidate.val {
offset := s - (candidate.offset - e.cur)
if offset <= maxMatchOffset {
continue
}
}
}
cv = uint32(x >> 8)
s++
break
}
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}

View File

@@ -1,212 +0,0 @@
package flate
import "fmt"
type fastEncL4 struct {
fastGen
table [tableSize]tableEntry
bTable [tableSize]tableEntry
}
func (e *fastEncL4) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
if debugDecode && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
for i := range e.bTable[:] {
e.bTable[i] = tableEntry{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i].offset
if v <= minOff {
v = 0
} else {
v = v - e.cur + maxMatchOffset
}
e.table[i].offset = v
}
for i := range e.bTable[:] {
v := e.bTable[i].offset
if v <= minOff {
v = 0
} else {
v = v - e.cur + maxMatchOffset
}
e.bTable[i].offset = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load6432(src, s)
for {
const skipLog = 6
const doEvery = 1
nextS := s
var t int32
for {
nextHashS := hash4x64(cv, tableBits)
nextHashL := hash7(cv, tableBits)
s = nextS
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
// Fetch a short+long candidate
sCandidate := e.table[nextHashS]
lCandidate := e.bTable[nextHashL]
next := load6432(src, nextS)
entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
e.table[nextHashS] = entry
e.bTable[nextHashL] = entry
t = lCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == lCandidate.val {
// We got a long match. Use that.
break
}
t = sCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == sCandidate.val {
// Found a 4 match...
lCandidate = e.bTable[hash7(next, tableBits)]
// If the next long is a candidate, check if we should use that instead...
lOff := nextS - (lCandidate.offset - e.cur)
if lOff < maxMatchOffset && lCandidate.val == uint32(next) {
l1, l2 := matchLen(src[s+4:], src[t+4:]), matchLen(src[nextS+4:], src[nextS-lOff+4:])
if l2 > l1 {
s = nextS
t = lCandidate.offset - e.cur
}
}
break
}
cv = next
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
// Extend the 4-byte match as long as possible.
l := e.matchlenLong(s+4, t+4, src) + 4
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
emitLiteral(dst, src[nextEmit:s])
}
if false {
if t >= s {
panic("s-t")
}
if (s - t) > maxMatchOffset {
panic(fmt.Sprintln("mmo", t))
}
if l < baseMatchLength {
panic("bml")
}
}
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
// Index first pair after match end.
if int(s+8) < len(src) {
cv := load6432(src, s)
e.table[hash4x64(cv, tableBits)] = tableEntry{offset: s + e.cur, val: uint32(cv)}
e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur, val: uint32(cv)}
}
goto emitRemainder
}
// Store every 3rd hash in-between
if true {
i := nextS
if i < s-1 {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur, val: uint32(cv)}
t2 := tableEntry{val: uint32(cv >> 8), offset: t.offset + 1}
e.bTable[hash7(cv, tableBits)] = t
e.bTable[hash7(cv>>8, tableBits)] = t2
e.table[hash4u(t2.val, tableBits)] = t2
i += 3
for ; i < s-1; i += 3 {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur, val: uint32(cv)}
t2 := tableEntry{val: uint32(cv >> 8), offset: t.offset + 1}
e.bTable[hash7(cv, tableBits)] = t
e.bTable[hash7(cv>>8, tableBits)] = t2
e.table[hash4u(t2.val, tableBits)] = t2
}
}
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-1 and at s.
x := load6432(src, s-1)
o := e.cur + s - 1
prevHashS := hash4x64(x, tableBits)
prevHashL := hash7(x, tableBits)
e.table[prevHashS] = tableEntry{offset: o, val: uint32(x)}
e.bTable[prevHashL] = tableEntry{offset: o, val: uint32(x)}
cv = x >> 8
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}

View File

@@ -1,279 +0,0 @@
package flate
import "fmt"
type fastEncL5 struct {
fastGen
table [tableSize]tableEntry
bTable [tableSize]tableEntryPrev
}
func (e *fastEncL5) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
if debugDecode && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
for i := range e.bTable[:] {
e.bTable[i] = tableEntryPrev{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i].offset
if v <= minOff {
v = 0
} else {
v = v - e.cur + maxMatchOffset
}
e.table[i].offset = v
}
for i := range e.bTable[:] {
v := e.bTable[i]
if v.Cur.offset <= minOff {
v.Cur.offset = 0
v.Prev.offset = 0
} else {
v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
if v.Prev.offset <= minOff {
v.Prev.offset = 0
} else {
v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
}
}
e.bTable[i] = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load6432(src, s)
for {
const skipLog = 6
const doEvery = 1
nextS := s
var l int32
var t int32
for {
nextHashS := hash4x64(cv, tableBits)
nextHashL := hash7(cv, tableBits)
s = nextS
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
// Fetch a short+long candidate
sCandidate := e.table[nextHashS]
lCandidate := e.bTable[nextHashL]
next := load6432(src, nextS)
entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
e.table[nextHashS] = entry
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = entry, eLong.Cur
nextHashS = hash4x64(next, tableBits)
nextHashL = hash7(next, tableBits)
t = lCandidate.Cur.offset - e.cur
if s-t < maxMatchOffset {
if uint32(cv) == lCandidate.Cur.val {
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
t2 := lCandidate.Prev.offset - e.cur
if s-t2 < maxMatchOffset && uint32(cv) == lCandidate.Prev.val {
l = e.matchlen(s+4, t+4, src) + 4
ml1 := e.matchlen(s+4, t2+4, src) + 4
if ml1 > l {
t = t2
l = ml1
break
}
}
break
}
t = lCandidate.Prev.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == lCandidate.Prev.val {
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
break
}
}
t = sCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == sCandidate.val {
// Found a 4 match...
l = e.matchlen(s+4, t+4, src) + 4
lCandidate = e.bTable[nextHashL]
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
// If the next long is a candidate, use that...
t2 := lCandidate.Cur.offset - e.cur
if nextS-t2 < maxMatchOffset {
if lCandidate.Cur.val == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l {
t = t2
s = nextS
l = ml
break
}
}
// If the previous long is a candidate, use that...
t2 = lCandidate.Prev.offset - e.cur
if nextS-t2 < maxMatchOffset && lCandidate.Prev.val == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l {
t = t2
s = nextS
l = ml
break
}
}
}
break
}
cv = next
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
// Extend the 4-byte match as long as possible.
if l == 0 {
l = e.matchlenLong(s+4, t+4, src) + 4
} else if l == maxMatchLength {
l += e.matchlenLong(s+l, t+l, src)
}
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
emitLiteral(dst, src[nextEmit:s])
}
if false {
if t >= s {
panic(fmt.Sprintln("s-t", s, t))
}
if (s - t) > maxMatchOffset {
panic(fmt.Sprintln("mmo", s-t))
}
if l < baseMatchLength {
panic("bml")
}
}
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
goto emitRemainder
}
// Store every 3rd hash in-between.
if true {
const hashEvery = 3
i := s - l + 1
if i < s-1 {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur, val: uint32(cv)}
e.table[hash4x64(cv, tableBits)] = t
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
// Do an long at i+1
cv >>= 8
t = tableEntry{offset: t.offset + 1, val: uint32(cv)}
eLong = &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
// We only have enough bits for a short entry at i+2
cv >>= 8
t = tableEntry{offset: t.offset + 1, val: uint32(cv)}
e.table[hash4x64(cv, tableBits)] = t
// Skip one - otherwise we risk hitting 's'
i += 4
for ; i < s-1; i += hashEvery {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur, val: uint32(cv)}
t2 := tableEntry{offset: t.offset + 1, val: uint32(cv >> 8)}
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
e.table[hash4u(t2.val, tableBits)] = t2
}
}
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-1 and at s.
x := load6432(src, s-1)
o := e.cur + s - 1
prevHashS := hash4x64(x, tableBits)
prevHashL := hash7(x, tableBits)
e.table[prevHashS] = tableEntry{offset: o, val: uint32(x)}
eLong := &e.bTable[prevHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: o, val: uint32(x)}, eLong.Cur
cv = x >> 8
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}

View File

@@ -1,282 +0,0 @@
package flate
import "fmt"
type fastEncL6 struct {
fastGen
table [tableSize]tableEntry
bTable [tableSize]tableEntryPrev
}
func (e *fastEncL6) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
if debugDecode && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
for i := range e.bTable[:] {
e.bTable[i] = tableEntryPrev{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i].offset
if v <= minOff {
v = 0
} else {
v = v - e.cur + maxMatchOffset
}
e.table[i].offset = v
}
for i := range e.bTable[:] {
v := e.bTable[i]
if v.Cur.offset <= minOff {
v.Cur.offset = 0
v.Prev.offset = 0
} else {
v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
if v.Prev.offset <= minOff {
v.Prev.offset = 0
} else {
v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
}
}
e.bTable[i] = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load6432(src, s)
// Repeat MUST be > 1 and within range
repeat := int32(1)
for {
const skipLog = 7
const doEvery = 1
nextS := s
var l int32
var t int32
for {
nextHashS := hash4x64(cv, tableBits)
nextHashL := hash7(cv, tableBits)
s = nextS
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
// Fetch a short+long candidate
sCandidate := e.table[nextHashS]
lCandidate := e.bTable[nextHashL]
next := load6432(src, nextS)
entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
e.table[nextHashS] = entry
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = entry, eLong.Cur
// Calculate hashes of 'next'
nextHashS = hash4x64(next, tableBits)
nextHashL = hash7(next, tableBits)
t = lCandidate.Cur.offset - e.cur
if s-t < maxMatchOffset {
if uint32(cv) == lCandidate.Cur.val {
// Long candidate matches at least 4 bytes.
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
// Check the previous long candidate as well.
t2 := lCandidate.Prev.offset - e.cur
if s-t2 < maxMatchOffset && uint32(cv) == lCandidate.Prev.val {
l = e.matchlen(s+4, t+4, src) + 4
ml1 := e.matchlen(s+4, t2+4, src) + 4
if ml1 > l {
t = t2
l = ml1
break
}
}
break
}
// Current value did not match, but check if previous long value does.
t = lCandidate.Prev.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == lCandidate.Prev.val {
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
break
}
}
t = sCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == sCandidate.val {
// Found a 4 match...
l = e.matchlen(s+4, t+4, src) + 4
// Look up next long candidate (at nextS)
lCandidate = e.bTable[nextHashL]
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
// Check repeat at s + repOff
const repOff = 1
t2 := s - repeat + repOff
if load3232(src, t2) == uint32(cv>>(8*repOff)) {
ml := e.matchlen(s+4+repOff, t2+4, src) + 4
if ml > l {
t = t2
l = ml
s += repOff
// Not worth checking more.
break
}
}
// If the next long is a candidate, use that...
t2 = lCandidate.Cur.offset - e.cur
if nextS-t2 < maxMatchOffset {
if lCandidate.Cur.val == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l {
t = t2
s = nextS
l = ml
// This is ok, but check previous as well.
}
}
// If the previous long is a candidate, use that...
t2 = lCandidate.Prev.offset - e.cur
if nextS-t2 < maxMatchOffset && lCandidate.Prev.val == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l {
t = t2
s = nextS
l = ml
break
}
}
}
break
}
cv = next
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
// Extend the 4-byte match as long as possible.
if l == 0 {
l = e.matchlenLong(s+4, t+4, src) + 4
} else if l == maxMatchLength {
l += e.matchlenLong(s+l, t+l, src)
}
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
emitLiteral(dst, src[nextEmit:s])
}
if false {
if t >= s {
panic(fmt.Sprintln("s-t", s, t))
}
if (s - t) > maxMatchOffset {
panic(fmt.Sprintln("mmo", s-t))
}
if l < baseMatchLength {
panic("bml")
}
}
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
repeat = s - t
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
// Index after match end.
for i := nextS + 1; i < int32(len(src))-8; i += 2 {
cv := load6432(src, i)
e.table[hash4x64(cv, tableBits)] = tableEntry{offset: i + e.cur, val: uint32(cv)}
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur, val: uint32(cv)}, eLong.Cur
}
goto emitRemainder
}
// Store every long hash in-between and every second short.
if true {
for i := nextS + 1; i < s-1; i += 2 {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur, val: uint32(cv)}
t2 := tableEntry{offset: t.offset + 1, val: uint32(cv >> 8)}
eLong := &e.bTable[hash7(cv, tableBits)]
eLong2 := &e.bTable[hash7(cv>>8, tableBits)]
e.table[hash4x64(cv, tableBits)] = t
eLong.Cur, eLong.Prev = t, eLong.Cur
eLong2.Cur, eLong2.Prev = t2, eLong2.Cur
}
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-1 and at s.
cv = load6432(src, s)
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}

View File

@@ -1,297 +0,0 @@
package flate
import (
"io"
"math"
"sync"
)
const (
maxStatelessBlock = math.MaxInt16
// dictionary will be taken from maxStatelessBlock, so limit it.
maxStatelessDict = 8 << 10
slTableBits = 13
slTableSize = 1 << slTableBits
slTableShift = 32 - slTableBits
)
type statelessWriter struct {
dst io.Writer
closed bool
}
func (s *statelessWriter) Close() error {
if s.closed {
return nil
}
s.closed = true
// Emit EOF block
return StatelessDeflate(s.dst, nil, true, nil)
}
func (s *statelessWriter) Write(p []byte) (n int, err error) {
err = StatelessDeflate(s.dst, p, false, nil)
if err != nil {
return 0, err
}
return len(p), nil
}
func (s *statelessWriter) Reset(w io.Writer) {
s.dst = w
s.closed = false
}
// NewStatelessWriter will do compression but without maintaining any state
// between Write calls.
// There will be no memory kept between Write calls,
// but compression and speed will be suboptimal.
// Because of this, the size of actual Write calls will affect output size.
func NewStatelessWriter(dst io.Writer) io.WriteCloser {
return &statelessWriter{dst: dst}
}
// bitWriterPool contains bit writers that can be reused.
var bitWriterPool = sync.Pool{
New: func() interface{} {
return newHuffmanBitWriter(nil)
},
}
// StatelessDeflate allows to compress directly to a Writer without retaining state.
// When returning everything will be flushed.
// Up to 8KB of an optional dictionary can be given which is presumed to presumed to precede the block.
// Longer dictionaries will be truncated and will still produce valid output.
// Sending nil dictionary is perfectly fine.
func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
var dst tokens
bw := bitWriterPool.Get().(*huffmanBitWriter)
bw.reset(out)
defer func() {
// don't keep a reference to our output
bw.reset(nil)
bitWriterPool.Put(bw)
}()
if eof && len(in) == 0 {
// Just write an EOF block.
// Could be faster...
bw.writeStoredHeader(0, true)
bw.flush()
return bw.err
}
// Truncate dict
if len(dict) > maxStatelessDict {
dict = dict[len(dict)-maxStatelessDict:]
}
for len(in) > 0 {
todo := in
if len(todo) > maxStatelessBlock-len(dict) {
todo = todo[:maxStatelessBlock-len(dict)]
}
in = in[len(todo):]
uncompressed := todo
if len(dict) > 0 {
// combine dict and source
bufLen := len(todo) + len(dict)
combined := make([]byte, bufLen)
copy(combined, dict)
copy(combined[len(dict):], todo)
todo = combined
}
// Compress
statelessEnc(&dst, todo, int16(len(dict)))
isEof := eof && len(in) == 0
if dst.n == 0 {
bw.writeStoredHeader(len(uncompressed), isEof)
if bw.err != nil {
return bw.err
}
bw.writeBytes(uncompressed)
} else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 {
// If we removed less than 1/16th, huffman compress the block.
bw.writeBlockHuff(isEof, uncompressed, len(in) == 0)
} else {
bw.writeBlockDynamic(&dst, isEof, uncompressed, len(in) == 0)
}
if len(in) > 0 {
// Retain a dict if we have more
dict = todo[len(todo)-maxStatelessDict:]
dst.Reset()
}
if bw.err != nil {
return bw.err
}
}
if !eof {
// Align, only a stored block can do that.
bw.writeStoredHeader(0, false)
}
bw.flush()
return bw.err
}
func hashSL(u uint32) uint32 {
return (u * 0x1e35a7bd) >> slTableShift
}
func load3216(b []byte, i int16) uint32 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:]
b = b[:4]
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
}
func load6416(b []byte, i int16) uint64 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:]
b = b[:8]
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
}
func statelessEnc(dst *tokens, src []byte, startAt int16) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
type tableEntry struct {
offset int16
}
var table [slTableSize]tableEntry
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src)-int(startAt) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = 0
return
}
// Index until startAt
if startAt > 0 {
cv := load3232(src, 0)
for i := int16(0); i < startAt; i++ {
table[hashSL(cv)] = tableEntry{offset: i}
cv = (cv >> 8) | (uint32(src[i+4]) << 24)
}
}
s := startAt + 1
nextEmit := startAt
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int16(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load3216(src, s)
for {
const skipLog = 5
const doEvery = 2
nextS := s
var candidate tableEntry
for {
nextHash := hashSL(cv)
candidate = table[nextHash]
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit || nextS <= 0 {
goto emitRemainder
}
now := load6416(src, nextS)
table[nextHash] = tableEntry{offset: s}
nextHash = hashSL(uint32(now))
if cv == load3216(src, candidate.offset) {
table[nextHash] = tableEntry{offset: nextS}
break
}
// Do one right away...
cv = uint32(now)
s = nextS
nextS++
candidate = table[nextHash]
now >>= 8
table[nextHash] = tableEntry{offset: s}
if cv == load3216(src, candidate.offset) {
table[nextHash] = tableEntry{offset: nextS}
break
}
cv = uint32(now)
s = nextS
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
for {
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
// Extend the 4-byte match as long as possible.
t := candidate.offset
l := int16(matchLen(src[s+4:], src[t+4:]) + 4)
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
emitLiteral(dst, src[nextEmit:s])
}
// Save the match found
dst.AddMatchLong(int32(l), uint32(s-t-baseMatchOffset))
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
goto emitRemainder
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-2 and at s. If
// another emitCopy is not our next move, also calculate nextHash
// at s+1. At least on GOARCH=amd64, these three hash calculations
// are faster as one load64 call (with some shifts) instead of
// three load32 calls.
x := load6416(src, s-2)
o := s - 2
prevHash := hashSL(uint32(x))
table[prevHash] = tableEntry{offset: o}
x >>= 16
currHash := hashSL(uint32(x))
candidate = table[currHash]
table[currHash] = tableEntry{offset: o + 2}
if uint32(x) != load3216(src, candidate.offset) {
cv = uint32(x >> 8)
s++
break
}
}
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}

View File

@@ -1,375 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"math"
)
const (
// 2 bits: type 0 = literal 1=EOF 2=Match 3=Unused
// 8 bits: xlength = length - MIN_MATCH_LENGTH
// 22 bits xoffset = offset - MIN_OFFSET_SIZE, or literal
lengthShift = 22
offsetMask = 1<<lengthShift - 1
typeMask = 3 << 30
literalType = 0 << 30
matchType = 1 << 30
)
// The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH)
// is lengthCodes[length - MIN_MATCH_LENGTH]
var lengthCodes = [256]uint8{
0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
13, 13, 13, 13, 14, 14, 14, 14, 15, 15,
15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
17, 17, 17, 17, 17, 17, 17, 17, 18, 18,
18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
19, 19, 19, 19, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 28,
}
// lengthCodes1 is length codes, but starting at 1.
var lengthCodes1 = [256]uint8{
1, 2, 3, 4, 5, 6, 7, 8, 9, 9,
10, 10, 11, 11, 12, 12, 13, 13, 13, 13,
14, 14, 14, 14, 15, 15, 15, 15, 16, 16,
16, 16, 17, 17, 17, 17, 17, 17, 17, 17,
18, 18, 18, 18, 18, 18, 18, 18, 19, 19,
19, 19, 19, 19, 19, 19, 20, 20, 20, 20,
20, 20, 20, 20, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 29,
}
var offsetCodes = [256]uint32{
0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
}
// offsetCodes14 are offsetCodes, but with 14 added.
var offsetCodes14 = [256]uint32{
14, 15, 16, 17, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21,
22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
}
type token uint32
type tokens struct {
nLits int
extraHist [32]uint16 // codes 256->maxnumlit
offHist [32]uint16 // offset codes
litHist [256]uint16 // codes 0->255
n uint16 // Must be able to contain maxStoreBlockSize
tokens [maxStoreBlockSize + 1]token
}
func (t *tokens) Reset() {
if t.n == 0 {
return
}
t.n = 0
t.nLits = 0
for i := range t.litHist[:] {
t.litHist[i] = 0
}
for i := range t.extraHist[:] {
t.extraHist[i] = 0
}
for i := range t.offHist[:] {
t.offHist[i] = 0
}
}
func (t *tokens) Fill() {
if t.n == 0 {
return
}
for i, v := range t.litHist[:] {
if v == 0 {
t.litHist[i] = 1
t.nLits++
}
}
for i, v := range t.extraHist[:literalCount-256] {
if v == 0 {
t.nLits++
t.extraHist[i] = 1
}
}
for i, v := range t.offHist[:offsetCodeCount] {
if v == 0 {
t.offHist[i] = 1
}
}
}
func indexTokens(in []token) tokens {
var t tokens
t.indexTokens(in)
return t
}
func (t *tokens) indexTokens(in []token) {
t.Reset()
for _, tok := range in {
if tok < matchType {
t.AddLiteral(tok.literal())
continue
}
t.AddMatch(uint32(tok.length()), tok.offset())
}
}
// emitLiteral writes a literal chunk and returns the number of bytes written.
func emitLiteral(dst *tokens, lit []byte) {
ol := int(dst.n)
for i, v := range lit {
dst.tokens[(i+ol)&maxStoreBlockSize] = token(v)
dst.litHist[v]++
}
dst.n += uint16(len(lit))
dst.nLits += len(lit)
}
func (t *tokens) AddLiteral(lit byte) {
t.tokens[t.n] = token(lit)
t.litHist[lit]++
t.n++
t.nLits++
}
// from https://stackoverflow.com/a/28730362
func mFastLog2(val float32) float32 {
ux := int32(math.Float32bits(val))
log2 := (float32)(((ux >> 23) & 255) - 128)
ux &= -0x7f800001
ux += 127 << 23
uval := math.Float32frombits(uint32(ux))
log2 += ((-0.34484843)*uval+2.02466578)*uval - 0.67487759
return log2
}
// EstimatedBits will return an minimum size estimated by an *optimal*
// compression of the block.
// The size of the block
func (t *tokens) EstimatedBits() int {
shannon := float32(0)
bits := int(0)
nMatches := 0
if t.nLits > 0 {
invTotal := 1.0 / float32(t.nLits)
for _, v := range t.litHist[:] {
if v > 0 {
n := float32(v)
shannon += -mFastLog2(n*invTotal) * n
}
}
// Just add 15 for EOB
shannon += 15
for i, v := range t.extraHist[1 : literalCount-256] {
if v > 0 {
n := float32(v)
shannon += -mFastLog2(n*invTotal) * n
bits += int(lengthExtraBits[i&31]) * int(v)
nMatches += int(v)
}
}
}
if nMatches > 0 {
invTotal := 1.0 / float32(nMatches)
for i, v := range t.offHist[:offsetCodeCount] {
if v > 0 {
n := float32(v)
shannon += -mFastLog2(n*invTotal) * n
bits += int(offsetExtraBits[i&31]) * int(v)
}
}
}
return int(shannon) + bits
}
// AddMatch adds a match to the tokens.
// This function is very sensitive to inlining and right on the border.
func (t *tokens) AddMatch(xlength uint32, xoffset uint32) {
if debugDecode {
if xlength >= maxMatchLength+baseMatchLength {
panic(fmt.Errorf("invalid length: %v", xlength))
}
if xoffset >= maxMatchOffset+baseMatchOffset {
panic(fmt.Errorf("invalid offset: %v", xoffset))
}
}
t.nLits++
lengthCode := lengthCodes1[uint8(xlength)] & 31
t.tokens[t.n] = token(matchType | xlength<<lengthShift | xoffset)
t.extraHist[lengthCode]++
t.offHist[offsetCode(xoffset)&31]++
t.n++
}
// AddMatchLong adds a match to the tokens, potentially longer than max match length.
// Length should NOT have the base subtracted, only offset should.
func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) {
if debugDecode {
if xoffset >= maxMatchOffset+baseMatchOffset {
panic(fmt.Errorf("invalid offset: %v", xoffset))
}
}
oc := offsetCode(xoffset) & 31
for xlength > 0 {
xl := xlength
if xl > 258 {
// We need to have at least baseMatchLength left over for next loop.
xl = 258 - baseMatchLength
}
xlength -= xl
xl -= 3
t.nLits++
lengthCode := lengthCodes1[uint8(xl)] & 31
t.tokens[t.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
t.extraHist[lengthCode]++
t.offHist[oc]++
t.n++
}
}
func (t *tokens) AddEOB() {
t.tokens[t.n] = token(endBlockMarker)
t.extraHist[0]++
t.n++
}
func (t *tokens) Slice() []token {
return t.tokens[:t.n]
}
// VarInt returns the tokens as varint encoded bytes.
func (t *tokens) VarInt() []byte {
var b = make([]byte, binary.MaxVarintLen32*int(t.n))
var off int
for _, v := range t.tokens[:t.n] {
off += binary.PutUvarint(b[off:], uint64(v))
}
return b[:off]
}
// FromVarInt restores t to the varint encoded tokens provided.
// Any data in t is removed.
func (t *tokens) FromVarInt(b []byte) error {
var buf = bytes.NewReader(b)
var toks []token
for {
r, err := binary.ReadUvarint(buf)
if err == io.EOF {
break
}
if err != nil {
return err
}
toks = append(toks, token(r))
}
t.indexTokens(toks)
return nil
}
// Returns the type of a token
func (t token) typ() uint32 { return uint32(t) & typeMask }
// Returns the literal of a literal token
func (t token) literal() uint8 { return uint8(t) }
// Returns the extra offset of a match token
func (t token) offset() uint32 { return uint32(t) & offsetMask }
func (t token) length() uint8 { return uint8(t >> lengthShift) }
// The code is never more than 8 bits, but is returned as uint32 for convenience.
func lengthCode(len uint8) uint32 { return uint32(lengthCodes[len]) }
// Returns the offset code corresponding to a specific offset
func offsetCode(off uint32) uint32 {
if false {
if off < uint32(len(offsetCodes)) {
return offsetCodes[off&255]
} else if off>>7 < uint32(len(offsetCodes)) {
return offsetCodes[(off>>7)&255] + 14
} else {
return offsetCodes[(off>>14)&255] + 28
}
}
if off < uint32(len(offsetCodes)) {
return offsetCodes[uint8(off)]
}
return offsetCodes14[uint8(off>>7)]
}

View File

@@ -1,344 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package gzip implements reading and writing of gzip format compressed files,
// as specified in RFC 1952.
package gzip
import (
"bufio"
"encoding/binary"
"errors"
"hash/crc32"
"io"
"time"
"github.com/klauspost/compress/flate"
)
const (
gzipID1 = 0x1f
gzipID2 = 0x8b
gzipDeflate = 8
flagText = 1 << 0
flagHdrCrc = 1 << 1
flagExtra = 1 << 2
flagName = 1 << 3
flagComment = 1 << 4
)
var (
// ErrChecksum is returned when reading GZIP data that has an invalid checksum.
ErrChecksum = errors.New("gzip: invalid checksum")
// ErrHeader is returned when reading GZIP data that has an invalid header.
ErrHeader = errors.New("gzip: invalid header")
)
var le = binary.LittleEndian
// noEOF converts io.EOF to io.ErrUnexpectedEOF.
func noEOF(err error) error {
if err == io.EOF {
return io.ErrUnexpectedEOF
}
return err
}
// The gzip file stores a header giving metadata about the compressed file.
// That header is exposed as the fields of the Writer and Reader structs.
//
// Strings must be UTF-8 encoded and may only contain Unicode code points
// U+0001 through U+00FF, due to limitations of the GZIP file format.
type Header struct {
Comment string // comment
Extra []byte // "extra data"
ModTime time.Time // modification time
Name string // file name
OS byte // operating system type
}
// A Reader is an io.Reader that can be read to retrieve
// uncompressed data from a gzip-format compressed file.
//
// In general, a gzip file can be a concatenation of gzip files,
// each with its own header. Reads from the Reader
// return the concatenation of the uncompressed data of each.
// Only the first header is recorded in the Reader fields.
//
// Gzip files store a length and checksum of the uncompressed data.
// The Reader will return a ErrChecksum when Read
// reaches the end of the uncompressed data if it does not
// have the expected length or checksum. Clients should treat data
// returned by Read as tentative until they receive the io.EOF
// marking the end of the data.
type Reader struct {
Header // valid after NewReader or Reader.Reset
r flate.Reader
decompressor io.ReadCloser
digest uint32 // CRC-32, IEEE polynomial (section 8)
size uint32 // Uncompressed size (section 2.3.1)
buf [512]byte
err error
multistream bool
}
// NewReader creates a new Reader reading the given reader.
// If r does not also implement io.ByteReader,
// the decompressor may read more data than necessary from r.
//
// It is the caller's responsibility to call Close on the Reader when done.
//
// The Reader.Header fields will be valid in the Reader returned.
func NewReader(r io.Reader) (*Reader, error) {
z := new(Reader)
if err := z.Reset(r); err != nil {
return nil, err
}
return z, nil
}
// Reset discards the Reader z's state and makes it equivalent to the
// result of its original state from NewReader, but reading from r instead.
// This permits reusing a Reader rather than allocating a new one.
func (z *Reader) Reset(r io.Reader) error {
*z = Reader{
decompressor: z.decompressor,
multistream: true,
}
if rr, ok := r.(flate.Reader); ok {
z.r = rr
} else {
z.r = bufio.NewReader(r)
}
z.Header, z.err = z.readHeader()
return z.err
}
// Multistream controls whether the reader supports multistream files.
//
// If enabled (the default), the Reader expects the input to be a sequence
// of individually gzipped data streams, each with its own header and
// trailer, ending at EOF. The effect is that the concatenation of a sequence
// of gzipped files is treated as equivalent to the gzip of the concatenation
// of the sequence. This is standard behavior for gzip readers.
//
// Calling Multistream(false) disables this behavior; disabling the behavior
// can be useful when reading file formats that distinguish individual gzip
// data streams or mix gzip data streams with other data streams.
// In this mode, when the Reader reaches the end of the data stream,
// Read returns io.EOF. If the underlying reader implements io.ByteReader,
// it will be left positioned just after the gzip stream.
// To start the next stream, call z.Reset(r) followed by z.Multistream(false).
// If there is no next stream, z.Reset(r) will return io.EOF.
func (z *Reader) Multistream(ok bool) {
z.multistream = ok
}
// readString reads a NUL-terminated string from z.r.
// It treats the bytes read as being encoded as ISO 8859-1 (Latin-1) and
// will output a string encoded using UTF-8.
// This method always updates z.digest with the data read.
func (z *Reader) readString() (string, error) {
var err error
needConv := false
for i := 0; ; i++ {
if i >= len(z.buf) {
return "", ErrHeader
}
z.buf[i], err = z.r.ReadByte()
if err != nil {
return "", err
}
if z.buf[i] > 0x7f {
needConv = true
}
if z.buf[i] == 0 {
// Digest covers the NUL terminator.
z.digest = crc32.Update(z.digest, crc32.IEEETable, z.buf[:i+1])
// Strings are ISO 8859-1, Latin-1 (RFC 1952, section 2.3.1).
if needConv {
s := make([]rune, 0, i)
for _, v := range z.buf[:i] {
s = append(s, rune(v))
}
return string(s), nil
}
return string(z.buf[:i]), nil
}
}
}
// readHeader reads the GZIP header according to section 2.3.1.
// This method does not set z.err.
func (z *Reader) readHeader() (hdr Header, err error) {
if _, err = io.ReadFull(z.r, z.buf[:10]); err != nil {
// RFC 1952, section 2.2, says the following:
// A gzip file consists of a series of "members" (compressed data sets).
//
// Other than this, the specification does not clarify whether a
// "series" is defined as "one or more" or "zero or more". To err on the
// side of caution, Go interprets this to mean "zero or more".
// Thus, it is okay to return io.EOF here.
return hdr, err
}
if z.buf[0] != gzipID1 || z.buf[1] != gzipID2 || z.buf[2] != gzipDeflate {
return hdr, ErrHeader
}
flg := z.buf[3]
hdr.ModTime = time.Unix(int64(le.Uint32(z.buf[4:8])), 0)
// z.buf[8] is XFL and is currently ignored.
hdr.OS = z.buf[9]
z.digest = crc32.ChecksumIEEE(z.buf[:10])
if flg&flagExtra != 0 {
if _, err = io.ReadFull(z.r, z.buf[:2]); err != nil {
return hdr, noEOF(err)
}
z.digest = crc32.Update(z.digest, crc32.IEEETable, z.buf[:2])
data := make([]byte, le.Uint16(z.buf[:2]))
if _, err = io.ReadFull(z.r, data); err != nil {
return hdr, noEOF(err)
}
z.digest = crc32.Update(z.digest, crc32.IEEETable, data)
hdr.Extra = data
}
var s string
if flg&flagName != 0 {
if s, err = z.readString(); err != nil {
return hdr, err
}
hdr.Name = s
}
if flg&flagComment != 0 {
if s, err = z.readString(); err != nil {
return hdr, err
}
hdr.Comment = s
}
if flg&flagHdrCrc != 0 {
if _, err = io.ReadFull(z.r, z.buf[:2]); err != nil {
return hdr, noEOF(err)
}
digest := le.Uint16(z.buf[:2])
if digest != uint16(z.digest) {
return hdr, ErrHeader
}
}
z.digest = 0
if z.decompressor == nil {
z.decompressor = flate.NewReader(z.r)
} else {
z.decompressor.(flate.Resetter).Reset(z.r, nil)
}
return hdr, nil
}
// Read implements io.Reader, reading uncompressed bytes from its underlying Reader.
func (z *Reader) Read(p []byte) (n int, err error) {
if z.err != nil {
return 0, z.err
}
n, z.err = z.decompressor.Read(p)
z.digest = crc32.Update(z.digest, crc32.IEEETable, p[:n])
z.size += uint32(n)
if z.err != io.EOF {
// In the normal case we return here.
return n, z.err
}
// Finished file; check checksum and size.
if _, err := io.ReadFull(z.r, z.buf[:8]); err != nil {
z.err = noEOF(err)
return n, z.err
}
digest := le.Uint32(z.buf[:4])
size := le.Uint32(z.buf[4:8])
if digest != z.digest || size != z.size {
z.err = ErrChecksum
return n, z.err
}
z.digest, z.size = 0, 0
// File is ok; check if there is another.
if !z.multistream {
return n, io.EOF
}
z.err = nil // Remove io.EOF
if _, z.err = z.readHeader(); z.err != nil {
return n, z.err
}
// Read from next file, if necessary.
if n > 0 {
return n, nil
}
return z.Read(p)
}
// Support the io.WriteTo interface for io.Copy and friends.
func (z *Reader) WriteTo(w io.Writer) (int64, error) {
total := int64(0)
crcWriter := crc32.NewIEEE()
for {
if z.err != nil {
if z.err == io.EOF {
return total, nil
}
return total, z.err
}
// We write both to output and digest.
mw := io.MultiWriter(w, crcWriter)
n, err := z.decompressor.(io.WriterTo).WriteTo(mw)
total += n
z.size += uint32(n)
if err != nil {
z.err = err
return total, z.err
}
// Finished file; check checksum + size.
if _, err := io.ReadFull(z.r, z.buf[0:8]); err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
z.err = err
return total, err
}
z.digest = crcWriter.Sum32()
digest := le.Uint32(z.buf[:4])
size := le.Uint32(z.buf[4:8])
if digest != z.digest || size != z.size {
z.err = ErrChecksum
return total, z.err
}
z.digest, z.size = 0, 0
// File is ok; check if there is another.
if !z.multistream {
return total, nil
}
crcWriter.Reset()
z.err = nil // Remove io.EOF
if _, z.err = z.readHeader(); z.err != nil {
if z.err == io.EOF {
return total, nil
}
return total, z.err
}
}
}
// Close closes the Reader. It does not close the underlying io.Reader.
// In order for the GZIP checksum to be verified, the reader must be
// fully consumed until the io.EOF.
func (z *Reader) Close() error { return z.decompressor.Close() }

View File

@@ -1,269 +0,0 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gzip
import (
"errors"
"fmt"
"hash/crc32"
"io"
"github.com/klauspost/compress/flate"
)
// These constants are copied from the flate package, so that code that imports
// "compress/gzip" does not also have to import "compress/flate".
const (
NoCompression = flate.NoCompression
BestSpeed = flate.BestSpeed
BestCompression = flate.BestCompression
DefaultCompression = flate.DefaultCompression
ConstantCompression = flate.ConstantCompression
HuffmanOnly = flate.HuffmanOnly
// StatelessCompression will do compression but without maintaining any state
// between Write calls.
// There will be no memory kept between Write calls,
// but compression and speed will be suboptimal.
// Because of this, the size of actual Write calls will affect output size.
StatelessCompression = -3
)
// A Writer is an io.WriteCloser.
// Writes to a Writer are compressed and written to w.
type Writer struct {
Header // written at first call to Write, Flush, or Close
w io.Writer
level int
wroteHeader bool
compressor *flate.Writer
digest uint32 // CRC-32, IEEE polynomial (section 8)
size uint32 // Uncompressed size (section 2.3.1)
closed bool
buf [10]byte
err error
}
// NewWriter returns a new Writer.
// Writes to the returned writer are compressed and written to w.
//
// It is the caller's responsibility to call Close on the WriteCloser when done.
// Writes may be buffered and not flushed until Close.
//
// Callers that wish to set the fields in Writer.Header must do so before
// the first call to Write, Flush, or Close.
func NewWriter(w io.Writer) *Writer {
z, _ := NewWriterLevel(w, DefaultCompression)
return z
}
// NewWriterLevel is like NewWriter but specifies the compression level instead
// of assuming DefaultCompression.
//
// The compression level can be DefaultCompression, NoCompression, or any
// integer value between BestSpeed and BestCompression inclusive. The error
// returned will be nil if the level is valid.
func NewWriterLevel(w io.Writer, level int) (*Writer, error) {
if level < StatelessCompression || level > BestCompression {
return nil, fmt.Errorf("gzip: invalid compression level: %d", level)
}
z := new(Writer)
z.init(w, level)
return z, nil
}
func (z *Writer) init(w io.Writer, level int) {
compressor := z.compressor
if level != StatelessCompression {
if compressor != nil {
compressor.Reset(w)
}
}
*z = Writer{
Header: Header{
OS: 255, // unknown
},
w: w,
level: level,
compressor: compressor,
}
}
// Reset discards the Writer z's state and makes it equivalent to the
// result of its original state from NewWriter or NewWriterLevel, but
// writing to w instead. This permits reusing a Writer rather than
// allocating a new one.
func (z *Writer) Reset(w io.Writer) {
z.init(w, z.level)
}
// writeBytes writes a length-prefixed byte slice to z.w.
func (z *Writer) writeBytes(b []byte) error {
if len(b) > 0xffff {
return errors.New("gzip.Write: Extra data is too large")
}
le.PutUint16(z.buf[:2], uint16(len(b)))
_, err := z.w.Write(z.buf[:2])
if err != nil {
return err
}
_, err = z.w.Write(b)
return err
}
// writeString writes a UTF-8 string s in GZIP's format to z.w.
// GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1).
func (z *Writer) writeString(s string) (err error) {
// GZIP stores Latin-1 strings; error if non-Latin-1; convert if non-ASCII.
needconv := false
for _, v := range s {
if v == 0 || v > 0xff {
return errors.New("gzip.Write: non-Latin-1 header string")
}
if v > 0x7f {
needconv = true
}
}
if needconv {
b := make([]byte, 0, len(s))
for _, v := range s {
b = append(b, byte(v))
}
_, err = z.w.Write(b)
} else {
_, err = io.WriteString(z.w, s)
}
if err != nil {
return err
}
// GZIP strings are NUL-terminated.
z.buf[0] = 0
_, err = z.w.Write(z.buf[:1])
return err
}
// Write writes a compressed form of p to the underlying io.Writer. The
// compressed bytes are not necessarily flushed until the Writer is closed.
func (z *Writer) Write(p []byte) (int, error) {
if z.err != nil {
return 0, z.err
}
var n int
// Write the GZIP header lazily.
if !z.wroteHeader {
z.wroteHeader = true
z.buf[0] = gzipID1
z.buf[1] = gzipID2
z.buf[2] = gzipDeflate
z.buf[3] = 0
if z.Extra != nil {
z.buf[3] |= 0x04
}
if z.Name != "" {
z.buf[3] |= 0x08
}
if z.Comment != "" {
z.buf[3] |= 0x10
}
le.PutUint32(z.buf[4:8], uint32(z.ModTime.Unix()))
if z.level == BestCompression {
z.buf[8] = 2
} else if z.level == BestSpeed {
z.buf[8] = 4
} else {
z.buf[8] = 0
}
z.buf[9] = z.OS
n, z.err = z.w.Write(z.buf[:10])
if z.err != nil {
return n, z.err
}
if z.Extra != nil {
z.err = z.writeBytes(z.Extra)
if z.err != nil {
return n, z.err
}
}
if z.Name != "" {
z.err = z.writeString(z.Name)
if z.err != nil {
return n, z.err
}
}
if z.Comment != "" {
z.err = z.writeString(z.Comment)
if z.err != nil {
return n, z.err
}
}
if z.compressor == nil && z.level != StatelessCompression {
z.compressor, _ = flate.NewWriter(z.w, z.level)
}
}
z.size += uint32(len(p))
z.digest = crc32.Update(z.digest, crc32.IEEETable, p)
if z.level == StatelessCompression {
return len(p), flate.StatelessDeflate(z.w, p, false, nil)
}
n, z.err = z.compressor.Write(p)
return n, z.err
}
// Flush flushes any pending compressed data to the underlying writer.
//
// It is useful mainly in compressed network protocols, to ensure that
// a remote reader has enough data to reconstruct a packet. Flush does
// not return until the data has been written. If the underlying
// writer returns an error, Flush returns that error.
//
// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
func (z *Writer) Flush() error {
if z.err != nil {
return z.err
}
if z.closed || z.level == StatelessCompression {
return nil
}
if !z.wroteHeader {
z.Write(nil)
if z.err != nil {
return z.err
}
}
z.err = z.compressor.Flush()
return z.err
}
// Close closes the Writer, flushing any unwritten data to the underlying
// io.Writer, but does not close the underlying io.Writer.
func (z *Writer) Close() error {
if z.err != nil {
return z.err
}
if z.closed {
return nil
}
z.closed = true
if !z.wroteHeader {
z.Write(nil)
if z.err != nil {
return z.err
}
}
if z.level == StatelessCompression {
z.err = flate.StatelessDeflate(z.w, nil, true, nil)
} else {
z.err = z.compressor.Close()
}
if z.err != nil {
return z.err
}
le.PutUint32(z.buf[:4], z.digest)
le.PutUint32(z.buf[4:8], z.size)
_, z.err = z.w.Write(z.buf[:8])
return z.err
}

View File

@@ -1,34 +0,0 @@
# Created by https://www.gitignore.io/api/macos
### macOS ###
*.DS_Store
.AppleDouble
.LSOverride
# Icon must end with two \r
Icon
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
# End of https://www.gitignore.io/api/macos
cmd/*/*exe
.idea

Some files were not shown because too many files have changed in this diff Show More