2020-04-18 22:31:34 +00:00
|
|
|
package server
|
|
|
|
|
|
|
|
import (
|
2020-07-17 05:18:47 +00:00
|
|
|
"context"
|
2020-07-17 04:51:31 +00:00
|
|
|
"github.com/gammazero/workerpool"
|
2020-04-18 22:31:34 +00:00
|
|
|
"io/ioutil"
|
|
|
|
"os"
|
|
|
|
"path/filepath"
|
2020-07-17 04:51:31 +00:00
|
|
|
"runtime"
|
|
|
|
"sync"
|
2020-04-18 22:31:34 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
type FileWalker struct {
|
|
|
|
*Filesystem
|
|
|
|
}
|
|
|
|
|
2020-07-17 04:51:31 +00:00
|
|
|
type PooledFileWalker struct {
|
|
|
|
wg sync.WaitGroup
|
|
|
|
pool *workerpool.WorkerPool
|
|
|
|
callback filepath.WalkFunc
|
2020-07-17 05:18:47 +00:00
|
|
|
cancel context.CancelFunc
|
|
|
|
|
|
|
|
err error
|
|
|
|
errOnce sync.Once
|
2020-07-17 04:51:31 +00:00
|
|
|
|
|
|
|
Filesystem *Filesystem
|
|
|
|
}
|
|
|
|
|
2020-04-18 22:31:34 +00:00
|
|
|
// Returns a new walker instance.
|
|
|
|
func (fs *Filesystem) NewWalker() *FileWalker {
|
|
|
|
return &FileWalker{fs}
|
|
|
|
}
|
|
|
|
|
2020-07-17 04:51:31 +00:00
|
|
|
// Creates a new pooled file walker that will concurrently walk over a given directory but limit itself
|
|
|
|
// to a worker pool as to not completely flood out the system or cause a process crash.
|
|
|
|
func newPooledWalker(fs *Filesystem) *PooledFileWalker {
|
|
|
|
return &PooledFileWalker{
|
|
|
|
Filesystem: fs,
|
|
|
|
// Create a worker pool that is the same size as the number of processors available on the
|
|
|
|
// system. Going much higher doesn't provide much of a performance boost, and is only more
|
|
|
|
// likely to lead to resource overloading anyways.
|
2020-08-01 04:31:53 +00:00
|
|
|
pool: workerpool.New(runtime.NumCPU()),
|
2020-07-17 04:51:31 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Process a given path by calling the callback function for all of the files and directories within
|
|
|
|
// the path, and then dropping into any directories that we come across.
|
|
|
|
func (w *PooledFileWalker) process(path string) error {
|
|
|
|
p, err := w.Filesystem.SafePath(path)
|
2020-04-18 22:31:34 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2020-07-17 04:51:31 +00:00
|
|
|
files, err := ioutil.ReadDir(p)
|
2020-04-18 22:31:34 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2020-07-17 04:51:31 +00:00
|
|
|
// Loop over all of the files and directories in the given directory and call the provided
|
|
|
|
// callback function. If we encounter a directory, push that directory onto the worker queue
|
|
|
|
// to be processed.
|
2020-04-18 22:31:34 +00:00
|
|
|
for _, f := range files {
|
2020-07-18 18:40:38 +00:00
|
|
|
sp, err := w.Filesystem.SafeJoin(p, f)
|
|
|
|
if err != nil {
|
2020-07-18 18:57:50 +00:00
|
|
|
// Let the callback function handle what to do if there is a path resolution error because a
|
|
|
|
// dangerous path was resolved. If there is an error returned, return from this entire process
|
|
|
|
// otherwise just skip over this specific file. We don't care if its a file or a directory at
|
|
|
|
// this point since either way we're skipping it, however, still check for the SkipDir since that
|
|
|
|
// would be thrown otherwise.
|
|
|
|
if err = w.callback(sp, f, err); err != nil && err != filepath.SkipDir {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
continue
|
2020-07-18 18:40:38 +00:00
|
|
|
}
|
2020-07-17 04:51:31 +00:00
|
|
|
|
2020-07-18 18:40:38 +00:00
|
|
|
i, err := os.Stat(sp)
|
2020-07-18 17:54:37 +00:00
|
|
|
// You might end up getting an error about a file or folder not existing if the given path
|
|
|
|
// if it is an invalid symlink. We can safely just skip over these files I believe.
|
|
|
|
if os.IsNotExist(err) {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2020-07-17 05:01:50 +00:00
|
|
|
// Call the user-provided callback for this file or directory. If an error is returned that is
|
|
|
|
// not a SkipDir call, abort the entire process and bubble that error up.
|
|
|
|
if err = w.callback(sp, i, err); err != nil && err != filepath.SkipDir {
|
2020-07-17 04:51:31 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2020-07-17 05:01:50 +00:00
|
|
|
// If this is a directory, and we didn't get a SkipDir error, continue through by pushing another
|
|
|
|
// job to the pool to handle it. If we requested a skip, don't do anything just continue on to the
|
|
|
|
// next item.
|
|
|
|
if i.IsDir() && err != filepath.SkipDir {
|
2020-07-17 04:51:31 +00:00
|
|
|
w.push(sp)
|
2020-07-17 05:01:50 +00:00
|
|
|
} else if !i.IsDir() && err == filepath.SkipDir {
|
|
|
|
// Per the spec for the callback, if we get a SkipDir error but it is returned for an item
|
|
|
|
// that is _not_ a directory, abort the remaining operations on the directory.
|
|
|
|
return nil
|
2020-04-18 22:31:34 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-07-17 04:51:31 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-07-17 05:01:50 +00:00
|
|
|
// Push a new path into the worker pool and increment the waitgroup so that we do not return too
|
|
|
|
// early and cause panic's as internal directories attempt to submit to the pool.
|
2020-07-17 04:51:31 +00:00
|
|
|
func (w *PooledFileWalker) push(path string) {
|
|
|
|
w.wg.Add(1)
|
|
|
|
w.pool.Submit(func() {
|
2020-07-17 04:53:05 +00:00
|
|
|
defer w.wg.Done()
|
2020-07-17 05:18:47 +00:00
|
|
|
if err := w.process(path); err != nil {
|
|
|
|
w.errOnce.Do(func() {
|
|
|
|
w.err = err
|
|
|
|
if w.cancel != nil {
|
|
|
|
w.cancel()
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
2020-07-17 04:51:31 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
// Walks the given directory and executes the callback function for all of the files and directories
|
|
|
|
// that are encountered.
|
|
|
|
func (fs *Filesystem) Walk(dir string, callback filepath.WalkFunc) error {
|
|
|
|
w := newPooledWalker(fs)
|
|
|
|
w.callback = callback
|
|
|
|
|
2020-07-17 05:18:47 +00:00
|
|
|
_, cancel := context.WithCancel(context.Background())
|
|
|
|
w.cancel = cancel
|
|
|
|
|
2020-07-17 04:51:31 +00:00
|
|
|
w.push(dir)
|
|
|
|
|
|
|
|
w.wg.Wait()
|
|
|
|
w.pool.StopWait()
|
|
|
|
|
2020-07-17 05:18:47 +00:00
|
|
|
if w.err != nil {
|
|
|
|
return w.err
|
|
|
|
}
|
|
|
|
|
2020-07-17 04:51:31 +00:00
|
|
|
return nil
|
|
|
|
}
|