Optimizations to the filepath walker function to reduce CPU and I/O issues
This commit is contained in:
parent
7aaa51a14f
commit
b35ac76720
|
@ -3,10 +3,10 @@ package server
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"github.com/gammazero/workerpool"
|
"github.com/gammazero/workerpool"
|
||||||
|
"github.com/pkg/errors"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
|
||||||
"sync"
|
"sync"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -36,21 +36,14 @@ func (fs *Filesystem) NewWalker() *FileWalker {
|
||||||
func newPooledWalker(fs *Filesystem) *PooledFileWalker {
|
func newPooledWalker(fs *Filesystem) *PooledFileWalker {
|
||||||
return &PooledFileWalker{
|
return &PooledFileWalker{
|
||||||
Filesystem: fs,
|
Filesystem: fs,
|
||||||
// Create a worker pool that is the same size as the number of processors available on the
|
// Run the walker as a single threaded process to optimize disk I/O and avoid CPU issues.
|
||||||
// system. Going much higher doesn't provide much of a performance boost, and is only more
|
pool: workerpool.New(1),
|
||||||
// likely to lead to resource overloading anyways.
|
|
||||||
pool: workerpool.New(runtime.NumCPU()),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Process a given path by calling the callback function for all of the files and directories within
|
// Process a given path by calling the callback function for all of the files and directories within
|
||||||
// the path, and then dropping into any directories that we come across.
|
// the path, and then dropping into any directories that we come across.
|
||||||
func (w *PooledFileWalker) process(path string) error {
|
func (w *PooledFileWalker) process(p string) error {
|
||||||
p, err := w.Filesystem.SafePath(path)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
files, err := ioutil.ReadDir(p)
|
files, err := ioutil.ReadDir(p)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -74,11 +67,19 @@ func (w *PooledFileWalker) process(path string) error {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
i, err := os.Stat(sp)
|
var i os.FileInfo
|
||||||
// You might end up getting an error about a file or folder not existing if the given path
|
// Re-stat the file or directory if it is determined to be a symlink by statting the result of the
|
||||||
// if it is an invalid symlink. We can safely just skip over these files I believe.
|
// symlink resolution rather than the initial path we received. Only do this on files we _know_
|
||||||
if os.IsNotExist(err) {
|
// will be returning a different value.
|
||||||
continue
|
if f.Mode()&os.ModeSymlink != 0 {
|
||||||
|
i, err = os.Stat(sp)
|
||||||
|
// You might end up getting an error about a file or folder not existing if the given path
|
||||||
|
// if it is an invalid symlink. We can safely just skip over these files I believe.
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
i = f
|
||||||
}
|
}
|
||||||
|
|
||||||
// Call the user-provided callback for this file or directory. If an error is returned that is
|
// Call the user-provided callback for this file or directory. If an error is returned that is
|
||||||
|
@ -128,8 +129,12 @@ func (fs *Filesystem) Walk(dir string, callback filepath.WalkFunc) error {
|
||||||
_, cancel := context.WithCancel(context.Background())
|
_, cancel := context.WithCancel(context.Background())
|
||||||
w.cancel = cancel
|
w.cancel = cancel
|
||||||
|
|
||||||
w.push(dir)
|
p, err := w.Filesystem.SafePath(dir)
|
||||||
|
if err != nil {
|
||||||
|
return errors.WithStack(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
w.push(p)
|
||||||
w.wg.Wait()
|
w.wg.Wait()
|
||||||
w.pool.StopWait()
|
w.pool.StopWait()
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user