Optimizations to the filepath walker function to reduce CPU and I/O issues

This commit is contained in:
Dane Everitt 2020-08-24 19:45:24 -07:00
parent 7aaa51a14f
commit b35ac76720
No known key found for this signature in database
GPG Key ID: EEA66103B3D71F53

View File

@ -3,10 +3,10 @@ package server
import ( import (
"context" "context"
"github.com/gammazero/workerpool" "github.com/gammazero/workerpool"
"github.com/pkg/errors"
"io/ioutil" "io/ioutil"
"os" "os"
"path/filepath" "path/filepath"
"runtime"
"sync" "sync"
) )
@ -36,21 +36,14 @@ func (fs *Filesystem) NewWalker() *FileWalker {
func newPooledWalker(fs *Filesystem) *PooledFileWalker { func newPooledWalker(fs *Filesystem) *PooledFileWalker {
return &PooledFileWalker{ return &PooledFileWalker{
Filesystem: fs, Filesystem: fs,
// Create a worker pool that is the same size as the number of processors available on the // Run the walker as a single threaded process to optimize disk I/O and avoid CPU issues.
// system. Going much higher doesn't provide much of a performance boost, and is only more pool: workerpool.New(1),
// likely to lead to resource overloading anyways.
pool: workerpool.New(runtime.NumCPU()),
} }
} }
// Process a given path by calling the callback function for all of the files and directories within // Process a given path by calling the callback function for all of the files and directories within
// the path, and then dropping into any directories that we come across. // the path, and then dropping into any directories that we come across.
func (w *PooledFileWalker) process(path string) error { func (w *PooledFileWalker) process(p string) error {
p, err := w.Filesystem.SafePath(path)
if err != nil {
return err
}
files, err := ioutil.ReadDir(p) files, err := ioutil.ReadDir(p)
if err != nil { if err != nil {
return err return err
@ -74,11 +67,19 @@ func (w *PooledFileWalker) process(path string) error {
continue continue
} }
i, err := os.Stat(sp) var i os.FileInfo
// You might end up getting an error about a file or folder not existing if the given path // Re-stat the file or directory if it is determined to be a symlink by statting the result of the
// if it is an invalid symlink. We can safely just skip over these files I believe. // symlink resolution rather than the initial path we received. Only do this on files we _know_
if os.IsNotExist(err) { // will be returning a different value.
continue if f.Mode()&os.ModeSymlink != 0 {
i, err = os.Stat(sp)
// You might end up getting an error about a file or folder not existing if the given path
// if it is an invalid symlink. We can safely just skip over these files I believe.
if os.IsNotExist(err) {
continue
}
} else {
i = f
} }
// Call the user-provided callback for this file or directory. If an error is returned that is // Call the user-provided callback for this file or directory. If an error is returned that is
@ -128,8 +129,12 @@ func (fs *Filesystem) Walk(dir string, callback filepath.WalkFunc) error {
_, cancel := context.WithCancel(context.Background()) _, cancel := context.WithCancel(context.Background())
w.cancel = cancel w.cancel = cancel
w.push(dir) p, err := w.Filesystem.SafePath(dir)
if err != nil {
return errors.WithStack(err)
}
w.push(p)
w.wg.Wait() w.wg.Wait()
w.pool.StopWait() w.pool.StopWait()