wings/server/power.go

292 lines
9.9 KiB
Go
Raw Normal View History

2020-04-06 01:00:33 +00:00
package server
import (
"context"
2022-01-23 17:49:35 +00:00
"fmt"
2021-01-10 01:22:39 +00:00
"os"
2022-01-23 17:49:35 +00:00
"sync"
2021-01-10 01:22:39 +00:00
"time"
"emperror.dev/errors"
2022-01-23 17:49:35 +00:00
"github.com/google/uuid"
"github.com/pterodactyl/wings/config"
"github.com/pterodactyl/wings/environment"
)
type PowerAction string
// The power actions that can be performed for a given server. This taps into the given server
// environment and performs them in a way that prevents a race condition from occurring. For
// example, sending two "start" actions back to back will not process the second action until
// the first action has been completed.
//
// This utilizes a workerpool with a limit of one worker so that all the actions execute
// in a sync manner.
const (
PowerActionStart = "start"
PowerActionStop = "stop"
PowerActionRestart = "restart"
PowerActionTerminate = "kill"
)
// IsValid checks if the power action being received is valid.
func (pa PowerAction) IsValid() bool {
return pa == PowerActionStart ||
pa == PowerActionStop ||
pa == PowerActionTerminate ||
pa == PowerActionRestart
2020-04-06 01:00:33 +00:00
}
func (pa PowerAction) IsStart() bool {
return pa == PowerActionStart || pa == PowerActionRestart
}
2022-01-23 17:49:35 +00:00
type powerLocker struct {
mu sync.RWMutex
ch chan bool
}
func newPowerLocker() *powerLocker {
return &powerLocker{
ch: make(chan bool, 1),
2020-09-26 03:02:38 +00:00
}
2022-01-23 17:49:35 +00:00
}
type errPowerLockerLocked struct{}
func (e errPowerLockerLocked) Error() string {
return "cannot acquire a lock on the power state: already locked"
}
var ErrPowerLockerLocked error = errPowerLockerLocked{}
// IsLocked returns the current state of the locker channel. If there is
// currently a value in the channel, it is assumed to be locked.
func (pl *powerLocker) IsLocked() bool {
pl.mu.RLock()
defer pl.mu.RUnlock()
return len(pl.ch) == 1
}
2020-09-26 03:02:38 +00:00
2022-01-23 17:49:35 +00:00
// Acquire will acquire the power lock if it is not currently locked. If it is
// already locked, acquire will fail to acquire the lock, and will return false.
func (pl *powerLocker) Acquire() error {
pl.mu.Lock()
defer pl.mu.Unlock()
select {
case pl.ch <- true:
default:
2022-01-23 17:49:35 +00:00
return errors.WithStack(ErrPowerLockerLocked)
2020-09-26 03:02:38 +00:00
}
2022-01-23 17:49:35 +00:00
return nil
}
2020-09-26 03:02:38 +00:00
2022-01-23 17:49:35 +00:00
// TryAcquire will attempt to acquire a power-lock until the context provided
// is canceled.
func (pl *powerLocker) TryAcquire(ctx context.Context) error {
select {
case pl.ch <- true:
return nil
case <-ctx.Done():
if err := ctx.Err(); err != nil {
return errors.WithStack(err)
}
return nil
}
}
// Release will drain the locker channel so that we can properly re-acquire it
// at a later time. If the channel is not currently locked this function is a
// no-op and will immediately return.
2022-01-23 17:49:35 +00:00
func (pl *powerLocker) Release() {
pl.mu.Lock()
select {
case <-pl.ch:
default:
2022-01-23 17:49:35 +00:00
}
pl.mu.Unlock()
}
// Destroy cleans up the power locker by closing the channel.
func (pl *powerLocker) Destroy() {
pl.mu.Lock()
if pl.ch != nil {
select {
case <-pl.ch:
default:
2022-01-23 17:49:35 +00:00
}
close(pl.ch)
}
pl.mu.Unlock()
}
// ExecutingPowerAction checks if there is currently a power action being
// processed for the server.
func (s *Server) ExecutingPowerAction() bool {
return s.powerLock.IsLocked()
2020-09-26 03:02:38 +00:00
}
// HandlePowerAction is a helper function that can receive a power action and then process the
// actions that need to occur for it. This guards against someone calling Start() twice at the
// same time, or trying to restart while another restart process is currently running.
//
// However, the code design for the daemon does depend on the user correctly calling this
// function rather than making direct calls to the start/stop/restart functions on the
// environment struct.
func (s *Server) HandlePowerAction(action PowerAction, waitSeconds ...int) error {
2022-01-23 17:49:35 +00:00
if s.IsInstalling() || s.IsTransferring() || s.IsRestoring() {
if s.IsRestoring() {
return ErrServerIsRestoring
} else if s.IsTransferring() {
return ErrServerIsTransferring
}
return ErrServerIsInstalling
}
2022-01-23 17:49:35 +00:00
lockId, _ := uuid.NewUUID()
log := s.Log().WithField("lock_id", lockId.String()).WithField("action", action)
2022-01-23 17:49:35 +00:00
cleanup := func() {
log.Info("releasing exclusive lock for power action")
s.powerLock.Release()
}
2022-01-23 17:49:35 +00:00
var wait int
if len(waitSeconds) > 0 && waitSeconds[0] > 0 {
wait = waitSeconds[0]
}
2022-01-23 17:49:35 +00:00
log.WithField("wait_seconds", wait).Debug("acquiring power action lock for instance")
// Only attempt to acquire a lock on the process if this is not a termination event. We want to
// just allow those events to pass right through for good reason. If a server is currently trying
// to process a power action but has gotten stuck you still should be able to pass through the
// terminate event. The good news here is that doing that oftentimes will get the stuck process to
// move again, and naturally continue through the process.
if action != PowerActionTerminate {
// Determines if we should wait for the lock or not. If a value greater than 0 is passed
// into this function we will wait that long for a lock to be acquired.
2022-01-23 17:49:35 +00:00
if wait > 0 {
ctx, cancel := context.WithTimeout(s.ctx, time.Second*time.Duration(wait))
defer cancel()
// Attempt to acquire a lock on the power action lock for up to 30 seconds. If more
// time than that passes an error will be propagated back up the chain and this
// request will be aborted.
2022-01-23 17:49:35 +00:00
if err := s.powerLock.TryAcquire(ctx); err != nil {
return errors.Wrap(err, fmt.Sprintf("could not acquire lock on power action after %d seconds", wait))
}
} else {
// If no wait duration was provided we will attempt to immediately acquire the lock
// and bail out with a context deadline error if it is not acquired immediately.
2022-01-23 17:49:35 +00:00
if err := s.powerLock.Acquire(); err != nil {
return errors.Wrap(err, "failed to acquire exclusive lock for power actions")
}
}
2022-01-23 17:49:35 +00:00
log.Info("acquired exclusive lock on power actions, processing event...")
defer cleanup()
} else {
2022-01-23 17:49:35 +00:00
// Still try to acquire the lock if terminating, and it is available, just so that
// other power actions are blocked until it has completed. However, if it cannot be
// acquired we won't stop the entire process.
//
// If we did successfully acquire the lock, make sure we release it once we're done
// executiong the power actions.
if err := s.powerLock.Acquire(); err == nil {
log.Info("acquired exclusive lock on power actions, processing event...")
defer cleanup()
} else {
log.Warn("failed to acquire exclusive lock, ignoring failure for termination event")
}
}
switch action {
case PowerActionStart:
if s.Environment.State() != environment.ProcessOfflineState {
return ErrIsRunning
}
// Run the pre-boot logic for the server before processing the environment start.
if err := s.onBeforeStart(); err != nil {
return err
}
return s.Environment.Start(s.Context())
case PowerActionStop:
// We're specifically waiting for the process to be stopped here, otherwise the lock is released
// too soon, and you can rack up all sorts of issues.
return s.Environment.WaitForStop(10*60, true)
case PowerActionRestart:
if err := s.Environment.WaitForStop(10*60, true); err != nil {
// Even timeout errors should be bubbled back up the stack. If the process didn't stop
// nicely, but the terminate argument was passed then the server is stopped without an
// error being returned.
//
// However, if terminate is not passed you'll get a context deadline error. We could
// probably handle that nicely here, but I'd rather just pass it back up the stack for now.
// Either way, any type of error indicates we should not attempt to start the server back
// up.
return err
}
// Now actually try to start the process by executing the normal pre-boot logic.
if err := s.onBeforeStart(); err != nil {
return err
}
return s.Environment.Start(s.Context())
case PowerActionTerminate:
return s.Environment.Terminate(os.Kill)
}
return errors.New("attempting to handle unknown power action")
2020-04-06 01:00:33 +00:00
}
// Execute a few functions before actually calling the environment start commands. This ensures
// that everything is ready to go for environment booting, and that the server can even be started.
func (s *Server) onBeforeStart() error {
s.Log().Info("syncing server configuration with panel")
if err := s.Sync(); err != nil {
return errors.WithMessage(err, "unable to sync server data from Panel instance")
}
// Disallow start & restart if the server is suspended. Do this check after performing a sync
// action with the Panel to ensure that we have the most up-to-date information for that server.
if s.IsSuspended() {
return ErrSuspended
}
// Ensure we sync the server information with the environment so that any new environment variables
// and process resource limits are correctly applied.
s.SyncWithEnvironment()
// If a server has unlimited disk space, we don't care enough to block the startup to check remaining.
// However, we should trigger a size anyway, as it'd be good to kick it off for other processes.
if s.DiskSpace() <= 0 {
s.Filesystem().HasSpaceAvailable(true)
} else {
s.PublishConsoleOutputFromDaemon("Checking server disk space usage, this could take a few seconds...")
if err := s.Filesystem().HasSpaceErr(false); err != nil {
return err
}
}
2020-08-19 03:27:42 +00:00
// Update the configuration files defined for the server before beginning the boot process.
// This process executes a bunch of parallel updates, so we just block until that process
// is complete. Any errors as a result of this will just be bubbled out in the logger,
// we don't need to actively do anything about it at this point, worse comes to worst the
2020-08-19 03:27:42 +00:00
// server starts in a weird state and the user can manually adjust.
s.PublishConsoleOutputFromDaemon("Updating process configuration files...")
s.UpdateConfigurationFiles()
if config.Get().System.CheckPermissionsOnBoot {
s.PublishConsoleOutputFromDaemon("Ensuring file permissions are set correctly, this could take a few seconds...")
// Ensure all the server file permissions are set correctly before booting the process.
if err := s.Filesystem().Chown("/"); err != nil {
return errors.WithMessage(err, "failed to chown root server directory during pre-boot process")
}
2020-08-19 03:27:42 +00:00
}
return nil
}