2020-09-27 19:24:08 +00:00
|
|
|
package filesystem
|
|
|
|
|
|
|
|
import (
|
2021-04-25 22:36:00 +00:00
|
|
|
"archive/tar"
|
|
|
|
"archive/zip"
|
|
|
|
"compress/gzip"
|
2022-11-06 20:38:30 +00:00
|
|
|
"context"
|
2020-09-27 19:24:08 +00:00
|
|
|
"fmt"
|
2022-11-15 01:25:01 +00:00
|
|
|
"io"
|
2022-11-06 20:38:30 +00:00
|
|
|
iofs "io/fs"
|
2020-09-27 19:24:08 +00:00
|
|
|
"os"
|
|
|
|
"path"
|
|
|
|
"path/filepath"
|
2022-05-30 22:42:31 +00:00
|
|
|
"reflect"
|
2020-09-27 19:24:08 +00:00
|
|
|
"strings"
|
2021-01-18 05:05:51 +00:00
|
|
|
"sync/atomic"
|
2020-09-27 19:24:08 +00:00
|
|
|
"time"
|
2021-01-18 05:05:51 +00:00
|
|
|
|
2022-11-06 20:38:30 +00:00
|
|
|
"emperror.dev/errors"
|
2022-10-05 23:24:11 +00:00
|
|
|
gzip2 "github.com/klauspost/compress/gzip"
|
|
|
|
zip2 "github.com/klauspost/compress/zip"
|
2022-11-06 20:38:30 +00:00
|
|
|
"github.com/mholt/archiver/v4"
|
2020-09-27 19:24:08 +00:00
|
|
|
)
|
|
|
|
|
2022-11-15 01:25:01 +00:00
|
|
|
// CompressFiles compresses all the files matching the given paths in the
|
2021-01-18 05:05:51 +00:00
|
|
|
// specified directory. This function also supports passing nested paths to only
|
|
|
|
// compress certain files and folders when working in a larger directory. This
|
|
|
|
// effectively creates a local backup, but rather than ignoring specific files
|
|
|
|
// and folders, it takes an allow-list of files and folders.
|
2020-09-27 19:24:08 +00:00
|
|
|
//
|
2021-01-18 05:05:51 +00:00
|
|
|
// All paths are relative to the dir that is passed in as the first argument,
|
|
|
|
// and the compressed file will be placed at that location named
|
|
|
|
// `archive-{date}.tar.gz`.
|
2020-09-27 19:24:08 +00:00
|
|
|
func (fs *Filesystem) CompressFiles(dir string, paths []string) (os.FileInfo, error) {
|
|
|
|
cleanedRootDir, err := fs.SafePath(dir)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2022-11-15 01:25:01 +00:00
|
|
|
// Take all the paths passed in and merge them together with the root directory we've gotten.
|
2020-09-27 19:24:08 +00:00
|
|
|
for i, p := range paths {
|
|
|
|
paths[i] = filepath.Join(cleanedRootDir, p)
|
|
|
|
}
|
|
|
|
|
|
|
|
cleaned, err := fs.ParallelSafePath(paths)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2021-03-07 18:02:03 +00:00
|
|
|
a := &Archive{BasePath: cleanedRootDir, Files: cleaned}
|
2020-12-25 19:52:57 +00:00
|
|
|
d := path.Join(
|
|
|
|
cleanedRootDir,
|
|
|
|
fmt.Sprintf("archive-%s.tar.gz", strings.ReplaceAll(time.Now().Format(time.RFC3339), ":", "")),
|
|
|
|
)
|
2020-09-27 19:24:08 +00:00
|
|
|
|
2022-11-15 01:25:01 +00:00
|
|
|
if err := a.Create(context.Background(), d); err != nil {
|
2020-11-28 23:57:10 +00:00
|
|
|
return nil, err
|
2020-09-27 19:24:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
f, err := os.Stat(d)
|
|
|
|
if err != nil {
|
|
|
|
_ = os.Remove(d)
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2020-12-20 19:17:53 +00:00
|
|
|
if err := fs.HasSpaceFor(f.Size()); err != nil {
|
2020-09-27 19:24:08 +00:00
|
|
|
_ = os.Remove(d)
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
fs.addDisk(f.Size())
|
|
|
|
|
|
|
|
return f, nil
|
|
|
|
}
|
2021-01-18 05:05:51 +00:00
|
|
|
|
|
|
|
// SpaceAvailableForDecompression looks through a given archive and determines
|
|
|
|
// if decompressing it would put the server over its allocated disk space limit.
|
2022-11-06 20:38:30 +00:00
|
|
|
func (fs *Filesystem) SpaceAvailableForDecompression(ctx context.Context, dir string, file string) error {
|
2021-01-18 05:05:51 +00:00
|
|
|
// Don't waste time trying to determine this if we know the server will have the space for
|
|
|
|
// it since there is no limit.
|
|
|
|
if fs.MaxDisk() <= 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
source, err := fs.SafePath(filepath.Join(dir, file))
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get the cached size in a parallel process so that if it is not cached we are not
|
|
|
|
// waiting an unnecessary amount of time on this call.
|
|
|
|
dirSize, err := fs.DiskUsage(false)
|
|
|
|
|
2022-11-06 20:38:30 +00:00
|
|
|
fsys, err := archiver.FileSystem(source)
|
2021-01-18 05:05:51 +00:00
|
|
|
if err != nil {
|
2022-11-06 20:38:30 +00:00
|
|
|
if errors.Is(err, archiver.ErrNoMatch) {
|
2021-04-17 20:29:18 +00:00
|
|
|
return newFilesystemError(ErrCodeUnknownArchive, err)
|
2021-01-18 05:05:51 +00:00
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
2022-11-06 20:38:30 +00:00
|
|
|
|
|
|
|
var size int64
|
|
|
|
return iofs.WalkDir(fsys, ".", func(path string, d iofs.DirEntry, err error) error {
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
// Stop walking if the context is canceled.
|
|
|
|
return ctx.Err()
|
|
|
|
default:
|
|
|
|
info, err := d.Info()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if atomic.AddInt64(&size, info.Size())+dirSize > fs.MaxDisk() {
|
|
|
|
return newFilesystemError(ErrCodeDiskSpace, nil)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
})
|
2021-01-18 05:05:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// DecompressFile will decompress a file in a given directory by using the
|
|
|
|
// archiver tool to infer the file type and go from there. This will walk over
|
2022-11-06 20:38:30 +00:00
|
|
|
// all the files within the given archive and ensure that there is not a
|
2021-01-18 05:05:51 +00:00
|
|
|
// zip-slip attack being attempted by validating that the final path is within
|
|
|
|
// the server data directory.
|
2022-11-06 20:38:30 +00:00
|
|
|
func (fs *Filesystem) DecompressFile(ctx context.Context, dir string, file string) error {
|
2021-01-18 05:05:51 +00:00
|
|
|
source, err := fs.SafePath(filepath.Join(dir, file))
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2022-11-06 20:38:30 +00:00
|
|
|
return fs.DecompressFileUnsafe(ctx, dir, source)
|
|
|
|
}
|
|
|
|
|
|
|
|
// DecompressFileUnsafe will decompress any file on the local disk without checking
|
|
|
|
// if it is owned by the server. The file will be SAFELY decompressed and extracted
|
|
|
|
// into the server's directory.
|
|
|
|
func (fs *Filesystem) DecompressFileUnsafe(ctx context.Context, dir string, file string) error {
|
|
|
|
// Ensure that the archive actually exists on the system.
|
|
|
|
if _, err := os.Stat(file); err != nil {
|
2021-04-17 20:29:18 +00:00
|
|
|
return errors.WithStack(err)
|
2021-01-18 05:05:51 +00:00
|
|
|
}
|
|
|
|
|
2022-11-06 20:38:30 +00:00
|
|
|
f, err := os.Open(file)
|
2021-01-18 05:05:51 +00:00
|
|
|
if err != nil {
|
2022-11-06 20:38:30 +00:00
|
|
|
return err
|
|
|
|
}
|
2022-11-15 01:25:01 +00:00
|
|
|
// TODO: defer file close?
|
2022-11-06 20:38:30 +00:00
|
|
|
|
|
|
|
// Identify the type of archive we are dealing with.
|
|
|
|
format, input, err := archiver.Identify(filepath.Base(file), f)
|
|
|
|
if err != nil {
|
|
|
|
if errors.Is(err, archiver.ErrNoMatch) {
|
2021-04-17 20:29:18 +00:00
|
|
|
return newFilesystemError(ErrCodeUnknownArchive, err)
|
2021-01-18 05:05:51 +00:00
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
2022-11-06 20:38:30 +00:00
|
|
|
|
2022-11-15 01:25:01 +00:00
|
|
|
return fs.extractStream(ctx, extractStreamOptions{
|
|
|
|
Directory: dir,
|
|
|
|
Format: format,
|
|
|
|
Reader: input,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
// ExtractStreamUnsafe .
|
|
|
|
func (fs *Filesystem) ExtractStreamUnsafe(ctx context.Context, dir string, r io.Reader) error {
|
|
|
|
format, input, err := archiver.Identify("archive.tar.gz", r)
|
|
|
|
if err != nil {
|
|
|
|
if errors.Is(err, archiver.ErrNoMatch) {
|
|
|
|
return newFilesystemError(ErrCodeUnknownArchive, err)
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
return fs.extractStream(ctx, extractStreamOptions{
|
|
|
|
Directory: dir,
|
|
|
|
Format: format,
|
|
|
|
Reader: input,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
type extractStreamOptions struct {
|
|
|
|
// The directory to extract the archive to.
|
|
|
|
Directory string
|
|
|
|
// File name of the archive.
|
|
|
|
FileName string
|
|
|
|
// Format of the archive.
|
|
|
|
Format archiver.Format
|
|
|
|
// Reader for the archive.
|
|
|
|
Reader io.Reader
|
|
|
|
}
|
|
|
|
|
|
|
|
func (fs *Filesystem) extractStream(ctx context.Context, opts extractStreamOptions) error {
|
2022-11-06 20:38:30 +00:00
|
|
|
// Decompress and extract archive
|
2022-11-15 01:25:01 +00:00
|
|
|
if ex, ok := opts.Format.(archiver.Extractor); ok {
|
|
|
|
return ex.Extract(ctx, opts.Reader, nil, func(ctx context.Context, f archiver.File) error {
|
2022-11-06 20:38:30 +00:00
|
|
|
if f.IsDir() {
|
|
|
|
return nil
|
|
|
|
}
|
2022-11-15 01:25:01 +00:00
|
|
|
p := filepath.Join(opts.Directory, ExtractNameFromArchive(f))
|
2022-11-06 20:38:30 +00:00
|
|
|
// If it is ignored, just don't do anything with the file and skip over it.
|
|
|
|
if err := fs.IsIgnored(p); err != nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
r, err := f.Open()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer r.Close()
|
|
|
|
if err := fs.Writefile(p, r); err != nil {
|
2022-11-15 01:25:01 +00:00
|
|
|
return wrapError(err, opts.FileName)
|
2022-11-06 20:38:30 +00:00
|
|
|
}
|
|
|
|
// Update the file permissions to the one set in the archive.
|
|
|
|
if err := fs.Chmod(p, f.Mode()); err != nil {
|
2022-11-15 01:25:01 +00:00
|
|
|
return wrapError(err, opts.FileName)
|
2022-11-06 20:38:30 +00:00
|
|
|
}
|
|
|
|
// Update the file modification time to the one set in the archive.
|
|
|
|
if err := fs.Chtimes(p, f.ModTime(), f.ModTime()); err != nil {
|
2022-11-15 01:25:01 +00:00
|
|
|
return wrapError(err, opts.FileName)
|
2022-11-06 20:38:30 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
}
|
2021-01-18 05:05:51 +00:00
|
|
|
return nil
|
|
|
|
}
|
2021-04-25 22:36:00 +00:00
|
|
|
|
|
|
|
// ExtractNameFromArchive looks at an archive file to try and determine the name
|
|
|
|
// for a given element in an archive. Because of... who knows why, each file type
|
|
|
|
// uses different methods to determine the file name.
|
|
|
|
//
|
|
|
|
// If there is a archiver.File#Sys() value present we will try to use the name
|
|
|
|
// present in there, otherwise falling back to archiver.File#Name() if all else
|
|
|
|
// fails. Without this logic present, some archive types such as zip/tars/etc.
|
|
|
|
// will write all of the files to the base directory, rather than the nested
|
|
|
|
// directory that is expected.
|
|
|
|
//
|
|
|
|
// For files like ".rar" types, there is no f.Sys() value present, and the value
|
|
|
|
// of archiver.File#Name() will be what you need.
|
|
|
|
func ExtractNameFromArchive(f archiver.File) string {
|
|
|
|
sys := f.Sys()
|
|
|
|
// Some archive types won't have a value returned when you call f.Sys() on them,
|
|
|
|
// such as ".rar" archives for example. In those cases the only thing you can do
|
|
|
|
// is hope that "f.Name()" is actually correct for them.
|
|
|
|
if sys == nil {
|
|
|
|
return f.Name()
|
|
|
|
}
|
|
|
|
switch s := sys.(type) {
|
2022-05-30 22:42:31 +00:00
|
|
|
case *zip.FileHeader:
|
|
|
|
return s.Name
|
|
|
|
case *zip2.FileHeader:
|
|
|
|
return s.Name
|
2021-04-25 22:36:00 +00:00
|
|
|
case *tar.Header:
|
|
|
|
return s.Name
|
|
|
|
case *gzip.Header:
|
|
|
|
return s.Name
|
2022-05-30 22:42:31 +00:00
|
|
|
case *gzip2.Header:
|
2021-04-25 22:36:00 +00:00
|
|
|
return s.Name
|
|
|
|
default:
|
2022-05-30 22:42:31 +00:00
|
|
|
// At this point we cannot figure out what type of archive this might be so
|
|
|
|
// just try to find the name field in the struct. If it is found return it.
|
|
|
|
field := reflect.Indirect(reflect.ValueOf(sys)).FieldByName("Name")
|
|
|
|
if field.IsValid() {
|
|
|
|
return field.String()
|
|
|
|
}
|
|
|
|
// Fallback to the basename of the file at this point. There is nothing we can really
|
|
|
|
// do to try and figure out what the underlying directory of the file is supposed to
|
|
|
|
// be since it didn't implement a name field.
|
2021-04-25 22:36:00 +00:00
|
|
|
return f.Name()
|
|
|
|
}
|
|
|
|
}
|