wings/server/filesystem/compress.go

package filesystem

import (
	"archive/tar"
	"archive/zip"
	"compress/gzip"
	"fmt"
	"os"
	"path"
	"path/filepath"
	"strings"
	"sync/atomic"
	"time"

	"emperror.dev/errors"
	"github.com/mholt/archiver/v3"
)

// CompressFiles compresses all of the files matching the given paths in the
// specified directory. This function also supports passing nested paths to only
// compress certain files and folders when working in a larger directory. This
// effectively creates a local backup, but rather than ignoring specific files
// and folders, it takes an allow-list of files and folders.
//
// All paths are relative to the dir that is passed in as the first argument,
// and the compressed file will be placed at that location named
// `archive-{date}.tar.gz`.
func (fs *Filesystem) CompressFiles(dir string, paths []string) (os.FileInfo, error) {
	cleanedRootDir, err := fs.SafePath(dir)
	if err != nil {
		return nil, err
	}

	// Take all of the paths passed in and merge them together with the root directory we've gotten.
	for i, p := range paths {
		paths[i] = filepath.Join(cleanedRootDir, p)
	}

	cleaned, err := fs.ParallelSafePath(paths)
	if err != nil {
		return nil, err
	}

	a := &Archive{BasePath: cleanedRootDir, Files: cleaned}
	d := path.Join(
		cleanedRootDir,
		fmt.Sprintf("archive-%s.tar.gz", strings.ReplaceAll(time.Now().Format(time.RFC3339), ":", "")),
	)

	if err := a.Create(d); err != nil {
		return nil, err
	}

	f, err := os.Stat(d)
	if err != nil {
		_ = os.Remove(d)
		return nil, err
	}

	if err := fs.HasSpaceFor(f.Size()); err != nil {
		_ = os.Remove(d)
		return nil, err
	}

	fs.addDisk(f.Size())

	return f, nil
}

// SpaceAvailableForDecompression looks through a given archive and determines
// if decompressing it would put the server over its allocated disk space limit.
func (fs *Filesystem) SpaceAvailableForDecompression(dir string, file string) error {
	// Don't waste time trying to determine this if we know the server will have the space for
	// it since there is no limit.
	if fs.MaxDisk() <= 0 {
		return nil
	}

	source, err := fs.SafePath(filepath.Join(dir, file))
	if err != nil {
		return err
	}

	// Get the cached size in a parallel process so that if it is not cached we are not
	// waiting an unnecessary amount of time on this call.
	dirSize, err := fs.DiskUsage(false)

	var size int64
	// Walk over the archive and figure out just how large the final output would be from unarchiving it.
	err = archiver.Walk(source, func(f archiver.File) error {
		if atomic.AddInt64(&size, f.Size())+dirSize > fs.MaxDisk() {
			return newFilesystemError(ErrCodeDiskSpace, nil)
		}
		return nil
	})
	if err != nil {
		if IsUnknownArchiveFormatError(err) {
			return newFilesystemError(ErrCodeUnknownArchive, err)
		}
		return err
	}
	return err
}

// DecompressFile will decompress a file in a given directory by using the
// archiver tool to infer the file type and go from there. This will walk over
// all of the files within the given archive and ensure that there is not a
// zip-slip attack being attempted by validating that the final path is within
// the server data directory.
func (fs *Filesystem) DecompressFile(dir string, file string) error {
	source, err := fs.SafePath(filepath.Join(dir, file))
	if err != nil {
		return err
	}
	// Ensure that the source archive actually exists on the system.
	if _, err := os.Stat(source); err != nil {
		return errors.WithStack(err)
	}

	// Walk all of the files in the archiver file and write them to the disk. If any
	// directory is encountered it will be skipped since we handle creating any missing
	// directories automatically when writing files.
	err = archiver.Walk(source, func(f archiver.File) error {
		if f.IsDir() {
			return nil
		}
		p := filepath.Join(dir, ExtractNameFromArchive(f))
		// If it is ignored, just don't do anything with the file and skip over it.
		if err := fs.IsIgnored(p); err != nil {
			return nil
		}
		if err := fs.Writefile(p, f); err != nil {
			return wrapError(err, source)
		}
		// Update the file permissions to the one set in the archive.
		if err := fs.Chmod(p, f.Mode()); err != nil {
			return wrapError(err, source)
		}
		// Update the file modification time to the one set in the archive.
		if err := fs.Chtimes(p, f.ModTime(), f.ModTime()); err != nil {
			return wrapError(err, source)
		}
		return nil
	})
	if err != nil {
		if IsUnknownArchiveFormatError(err) {
			return newFilesystemError(ErrCodeUnknownArchive, err)
		}
		return err
	}
	return nil
}

// ExtractNameFromArchive looks at an archive file to try and determine the name
// for a given element in an archive. Because of... who knows why, each file type
// uses different methods to determine the file name.
//
// If there is a archiver.File#Sys() value present we will try to use the name
// present in there, otherwise falling back to archiver.File#Name() if all else
// fails. Without this logic present, some archive types such as zip/tars/etc.
// will write all of the files to the base directory, rather than the nested
// directory that is expected.
//
// For files like ".rar" types, there is no f.Sys() value present, and the value
// of archiver.File#Name() will be what you need.
func ExtractNameFromArchive(f archiver.File) string {
	sys := f.Sys()
	// Some archive types won't have a value returned when you call f.Sys() on them,
	// such as ".rar" archives for example. In those cases the only thing you can do
	// is hope that "f.Name()" is actually correct for them.
	if sys == nil {
		return f.Name()
	}
	switch s := sys.(type) {
	case *tar.Header:
		return s.Name
	case *gzip.Header:
		return s.Name
	case *zip.FileHeader:
		return s.Name
	default:
		return f.Name()
	}
}
Refactor filesystem to not be dependent on a server struct 2020-09-27 19:24:08 +00:00			`package filesystem`

			`import (`
Correctly determine name for archive files when decompressing; closes pterodactyl/panel#3296 2021-04-25 22:36:00 +00:00			`"archive/tar"`
			`"archive/zip"`
			`"compress/gzip"`
Refactor filesystem to not be dependent on a server struct 2020-09-27 19:24:08 +00:00			`"fmt"`
			`"os"`
			`"path"`
			`"path/filepath"`
			`"strings"`
Fix import cycle issue 2021-01-18 05:05:51 +00:00			`"sync/atomic"`
Refactor filesystem to not be dependent on a server struct 2020-09-27 19:24:08 +00:00			`"time"`
Fix import cycle issue 2021-01-18 05:05:51 +00:00
Add better error handling for filesystem 2021-04-17 20:29:18 +00:00			`"emperror.dev/errors"`
Fix import cycle issue 2021-01-18 05:05:51 +00:00			`"github.com/mholt/archiver/v3"`
Refactor filesystem to not be dependent on a server struct 2020-09-27 19:24:08 +00:00			`)`

Fix import cycle issue 2021-01-18 05:05:51 +00:00			`// CompressFiles compresses all of the files matching the given paths in the`
			`// specified directory. This function also supports passing nested paths to only`
			`// compress certain files and folders when working in a larger directory. This`
			`// effectively creates a local backup, but rather than ignoring specific files`
			`// and folders, it takes an allow-list of files and folders.`
Refactor filesystem to not be dependent on a server struct 2020-09-27 19:24:08 +00:00			`//`
Fix import cycle issue 2021-01-18 05:05:51 +00:00			`// All paths are relative to the dir that is passed in as the first argument,`
			`// and the compressed file will be placed at that location named`
			// `archive-{date}.tar.gz`.
Refactor filesystem to not be dependent on a server struct 2020-09-27 19:24:08 +00:00			`func (fs *Filesystem) CompressFiles(dir string, paths []string) (os.FileInfo, error) {`
			`cleanedRootDir, err := fs.SafePath(dir)`
			`if err != nil {`
			`return nil, err`
			`}`

			`// Take all of the paths passed in and merge them together with the root directory we've gotten.`
			`for i, p := range paths {`
			`paths[i] = filepath.Join(cleanedRootDir, p)`
			`}`

			`cleaned, err := fs.ParallelSafePath(paths)`
			`if err != nil {`
			`return nil, err`
			`}`

transfers: use backup archiver 2021-03-07 18:02:03 +00:00			`a := &Archive{BasePath: cleanedRootDir, Files: cleaned}`
Rework archiving logic to be more consistent and less impactful on disk IO (#79) Co-authored-by: Dane Everitt <dane@daneeveritt.com> 2020-12-25 19:52:57 +00:00			`d := path.Join(`
			`cleanedRootDir,`
			`fmt.Sprintf("archive-%s.tar.gz", strings.ReplaceAll(time.Now().Format(time.RFC3339), ":", "")),`
			`)`
Refactor filesystem to not be dependent on a server struct 2020-09-27 19:24:08 +00:00
Rework archiving logic to be more consistent and less impactful on disk IO (#79) Co-authored-by: Dane Everitt <dane@daneeveritt.com> 2020-12-25 19:52:57 +00:00			`if err := a.Create(d); err != nil {`
Error handling improvements (#71) * Remove `emperror.dev/errors`, remove all `errors#Wrap` and `errors#WithStack` calls * Improve logging in `server/backup.go` 2020-11-28 23:57:10 +00:00			`return nil, err`
Refactor filesystem to not be dependent on a server struct 2020-09-27 19:24:08 +00:00			`}`

			`f, err := os.Stat(d)`
			`if err != nil {`
			`_ = os.Remove(d)`
			`return nil, err`
			`}`

Check disk space before trying a write from the downloader; don't make empty directories if we can't even write the file 2020-12-20 19:17:53 +00:00			`if err := fs.HasSpaceFor(f.Size()); err != nil {`
Refactor filesystem to not be dependent on a server struct 2020-09-27 19:24:08 +00:00			`_ = os.Remove(d)`
			`return nil, err`
			`}`

			`fs.addDisk(f.Size())`

			`return f, nil`
			`}`
Fix import cycle issue 2021-01-18 05:05:51 +00:00
			`// SpaceAvailableForDecompression looks through a given archive and determines`
			`// if decompressing it would put the server over its allocated disk space limit.`
			`func (fs *Filesystem) SpaceAvailableForDecompression(dir string, file string) error {`
			`// Don't waste time trying to determine this if we know the server will have the space for`
			`// it since there is no limit.`
			`if fs.MaxDisk() <= 0 {`
			`return nil`
			`}`

			`source, err := fs.SafePath(filepath.Join(dir, file))`
			`if err != nil {`
			`return err`
			`}`

			`// Get the cached size in a parallel process so that if it is not cached we are not`
			`// waiting an unnecessary amount of time on this call.`
			`dirSize, err := fs.DiskUsage(false)`

			`var size int64`
			`// Walk over the archive and figure out just how large the final output would be from unarchiving it.`
			`err = archiver.Walk(source, func(f archiver.File) error {`
			`if atomic.AddInt64(&size, f.Size())+dirSize > fs.MaxDisk() {`
Add better error handling for filesystem 2021-04-17 20:29:18 +00:00			`return newFilesystemError(ErrCodeDiskSpace, nil)`
Fix import cycle issue 2021-01-18 05:05:51 +00:00			`}`
			`return nil`
			`})`
			`if err != nil {`
Add better error handling for filesystem 2021-04-17 20:29:18 +00:00			`if IsUnknownArchiveFormatError(err) {`
			`return newFilesystemError(ErrCodeUnknownArchive, err)`
Fix import cycle issue 2021-01-18 05:05:51 +00:00			`}`
			`return err`
			`}`
			`return err`
			`}`

			`// DecompressFile will decompress a file in a given directory by using the`
			`// archiver tool to infer the file type and go from there. This will walk over`
			`// all of the files within the given archive and ensure that there is not a`
			`// zip-slip attack being attempted by validating that the final path is within`
			`// the server data directory.`
			`func (fs *Filesystem) DecompressFile(dir string, file string) error {`
			`source, err := fs.SafePath(filepath.Join(dir, file))`
			`if err != nil {`
			`return err`
			`}`
			`// Ensure that the source archive actually exists on the system.`
			`if _, err := os.Stat(source); err != nil {`
Add better error handling for filesystem 2021-04-17 20:29:18 +00:00			`return errors.WithStack(err)`
Fix import cycle issue 2021-01-18 05:05:51 +00:00			`}`

Get general concept for backup resotration using a unified interface implemented 2021-01-19 05:20:58 +00:00			`// Walk all of the files in the archiver file and write them to the disk. If any`
			`// directory is encountered it will be skipped since we handle creating any missing`
			`// directories automatically when writing files.`
Fix import cycle issue 2021-01-18 05:05:51 +00:00			`err = archiver.Walk(source, func(f archiver.File) error {`
			`if f.IsDir() {`
			`return nil`
			`}`
Correctly determine name for archive files when decompressing; closes pterodactyl/panel#3296 2021-04-25 22:36:00 +00:00			`p := filepath.Join(dir, ExtractNameFromArchive(f))`
Fix import cycle issue 2021-01-18 05:05:51 +00:00			`// If it is ignored, just don't do anything with the file and skip over it.`
			`if err := fs.IsIgnored(p); err != nil {`
			`return nil`
			`}`
			`if err := fs.Writefile(p, f); err != nil {`
Add better error handling for filesystem 2021-04-17 20:29:18 +00:00			`return wrapError(err, source)`
Fix import cycle issue 2021-01-18 05:05:51 +00:00			`}`
server(fs): keep file mode when extracting archive 2021-07-15 21:37:38 +00:00			`// Update the file permissions to the one set in the archive.`
			`if err := fs.Chmod(p, f.Mode()); err != nil {`
			`return wrapError(err, source)`
			`}`
archive: keep timestamps when extracting 2021-09-01 15:54:41 +00:00			`// Update the file modification time to the one set in the archive.`
			`if err := fs.Chtimes(p, f.ModTime(), f.ModTime()); err != nil {`
			`return wrapError(err, source)`
			`}`
Fix import cycle issue 2021-01-18 05:05:51 +00:00			`return nil`
			`})`
			`if err != nil {`
Add better error handling for filesystem 2021-04-17 20:29:18 +00:00			`if IsUnknownArchiveFormatError(err) {`
			`return newFilesystemError(ErrCodeUnknownArchive, err)`
Fix import cycle issue 2021-01-18 05:05:51 +00:00			`}`
			`return err`
			`}`
			`return nil`
			`}`
Correctly determine name for archive files when decompressing; closes pterodactyl/panel#3296 2021-04-25 22:36:00 +00:00
			`// ExtractNameFromArchive looks at an archive file to try and determine the name`
			`// for a given element in an archive. Because of... who knows why, each file type`
			`// uses different methods to determine the file name.`
			`//`
			`// If there is a archiver.File#Sys() value present we will try to use the name`
			`// present in there, otherwise falling back to archiver.File#Name() if all else`
			`// fails. Without this logic present, some archive types such as zip/tars/etc.`
			`// will write all of the files to the base directory, rather than the nested`
			`// directory that is expected.`
			`//`
			`// For files like ".rar" types, there is no f.Sys() value present, and the value`
			`// of archiver.File#Name() will be what you need.`
			`func ExtractNameFromArchive(f archiver.File) string {`
			`sys := f.Sys()`
			`// Some archive types won't have a value returned when you call f.Sys() on them,`
			`// such as ".rar" archives for example. In those cases the only thing you can do`
			`// is hope that "f.Name()" is actually correct for them.`
			`if sys == nil {`
			`return f.Name()`
			`}`
			`switch s := sys.(type) {`
			`case *tar.Header:`
			`return s.Name`
			`case *gzip.Header:`
			`return s.Name`
			`case *zip.FileHeader:`
			`return s.Name`
			`default:`
			`return f.Name()`
			`}`
			`}`