Fix logic for context based environment stopping
Uses dual contexts to handle stopping using a timed context, and also terminating the entire process loop if the parent context gets canceled.
This commit is contained in:
parent
84bbefdadc
commit
cd67e5fdb9
|
@ -177,7 +177,9 @@ func (e *Environment) Stop(ctx context.Context) error {
|
||||||
return e.SendCommand(s.Value)
|
return e.SendCommand(s.Value)
|
||||||
}
|
}
|
||||||
|
|
||||||
t := time.Second * 30
|
// Allow the stop action to run for however long it takes, similar to executing a command
|
||||||
|
// and using a different logic pathway to wait for the container to stop successfully.
|
||||||
|
t := time.Duration(-1)
|
||||||
if err := e.client.ContainerStop(ctx, e.Id, &t); err != nil {
|
if err := e.client.ContainerStop(ctx, e.Id, &t); err != nil {
|
||||||
// If the container does not exist just mark the process as stopped and return without
|
// If the container does not exist just mark the process as stopped and return without
|
||||||
// an error.
|
// an error.
|
||||||
|
@ -196,48 +198,66 @@ func (e *Environment) Stop(ctx context.Context) error {
|
||||||
// command. If the server does not stop after seconds have passed, an error will
|
// command. If the server does not stop after seconds have passed, an error will
|
||||||
// be returned, or the instance will be terminated forcefully depending on the
|
// be returned, or the instance will be terminated forcefully depending on the
|
||||||
// value of the second argument.
|
// value of the second argument.
|
||||||
func (e *Environment) WaitForStop(seconds uint, terminate bool) error {
|
//
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second * time.Duration(seconds))
|
// Calls to Environment.Terminate() in this function use the context passed
|
||||||
|
// through since we don't want to prevent termination of the server instance
|
||||||
|
// just because the context.WithTimeout() has expired.
|
||||||
|
func (e *Environment) WaitForStop(ctx context.Context, duration time.Duration, terminate bool) error {
|
||||||
|
tctx, cancel := context.WithTimeout(context.Background(), duration)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
return e.WaitForStopWithContext(ctx, terminate)
|
// If the parent context is canceled, abort the timed context for termination.
|
||||||
}
|
go func() {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
cancel()
|
||||||
|
case <-tctx.Done():
|
||||||
|
// When the timed context is canceled, terminate this routine since we no longer
|
||||||
|
// need to worry about the parent routine being canceled.
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
func (e *Environment) WaitForStopWithContext(ctx context.Context, terminate bool) error {
|
doTermination := func (s string) error {
|
||||||
if err := e.Stop(ctx); err != nil {
|
e.log().WithField("step", s).WithField("duration", duration).Warn("container stop did not complete in time, terminating process...")
|
||||||
|
return e.Terminate(ctx, os.Kill)
|
||||||
|
}
|
||||||
|
|
||||||
|
// We pass through the timed context for this stop action so that if one of the
|
||||||
|
// internal docker calls fails to ever finish before we've exhausted the time limit
|
||||||
|
// the resources get cleaned up, and the exection is stopped.
|
||||||
|
if err := e.Stop(tctx); err != nil {
|
||||||
|
if terminate && errors.Is(err, context.DeadlineExceeded) {
|
||||||
|
return doTermination("stop")
|
||||||
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Block the return of this function until the container as been marked as no
|
// Block the return of this function until the container as been marked as no
|
||||||
// longer running. If this wait does not end by the time seconds have passed,
|
// longer running. If this wait does not end by the time seconds have passed,
|
||||||
// attempt to terminate the container, or return an error.
|
// attempt to terminate the container, or return an error.
|
||||||
ok, errChan := e.client.ContainerWait(ctx, e.Id, container.WaitConditionNotRunning)
|
ok, errChan := e.client.ContainerWait(tctx, e.Id, container.WaitConditionNotRunning)
|
||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
if err := ctx.Err(); err != nil {
|
if err := ctx.Err(); err != nil {
|
||||||
if terminate {
|
if terminate {
|
||||||
log.WithField("container_id", e.Id).Info("server did not stop in time, executing process termination")
|
return doTermination("parent-context")
|
||||||
|
|
||||||
return e.Terminate(ctx, os.Kill)
|
|
||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
case err := <-errChan:
|
case err := <-errChan:
|
||||||
// If the error stems from the container not existing there is no point in wasting
|
// If the error stems from the container not existing there is no point in wasting
|
||||||
// CPU time to then try and terminate it.
|
// CPU time to then try and terminate it.
|
||||||
if err != nil && !client.IsErrNotFound(err) {
|
if err == nil || client.IsErrNotFound(err) {
|
||||||
if terminate {
|
return nil
|
||||||
l := log.WithField("container_id", e.Id)
|
|
||||||
if errors.Is(err, context.DeadlineExceeded) {
|
|
||||||
l.Warn("deadline exceeded for container stop; terminating process")
|
|
||||||
} else {
|
|
||||||
l.WithField("error", err).Warn("error while waiting for container stop; terminating process")
|
|
||||||
}
|
}
|
||||||
|
if terminate {
|
||||||
return e.Terminate(ctx, os.Kill)
|
if !errors.Is(err, context.DeadlineExceeded) {
|
||||||
|
e.log().WithField("error", err).Warn("error while waiting for container stop; terminating process")
|
||||||
|
}
|
||||||
|
return doTermination("wait")
|
||||||
}
|
}
|
||||||
return errors.WrapIf(err, "environment/docker: error waiting on container to enter \"not-running\" state")
|
return errors.WrapIf(err, "environment/docker: error waiting on container to enter \"not-running\" state")
|
||||||
}
|
|
||||||
case <-ok:
|
case <-ok:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -3,6 +3,7 @@ package environment
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"os"
|
"os"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/pterodactyl/wings/events"
|
"github.com/pterodactyl/wings/events"
|
||||||
)
|
)
|
||||||
|
@ -63,15 +64,11 @@ type ProcessEnvironment interface {
|
||||||
Stop(ctx context.Context) error
|
Stop(ctx context.Context) error
|
||||||
|
|
||||||
// WaitForStop waits for a server instance to stop gracefully. If the server is
|
// WaitForStop waits for a server instance to stop gracefully. If the server is
|
||||||
// still detected as running after seconds, an error will be returned, or the server
|
// still detected as running after "duration", an error will be returned, or the server
|
||||||
// will be terminated depending on the value of the second argument.
|
// will be terminated depending on the value of the second argument. If the context
|
||||||
WaitForStop(seconds uint, terminate bool) error
|
// provided is canceled the underlying wait conditions will be stopped and the
|
||||||
|
// entire loop will be ended (potentially without stopping or terminating).
|
||||||
// WaitForStopWithContext works in the same fashion as WaitForStop, but accepts a
|
WaitForStop(ctx context.Context, duration time.Duration, terminate bool) error
|
||||||
// context value and will stop waiting when the context is canceled. If the terminate
|
|
||||||
// option is true, the server will be terminate when the context is canceled if the
|
|
||||||
// server has not stopped at that point.
|
|
||||||
WaitForStopWithContext(ctx context.Context, terminate bool) error
|
|
||||||
|
|
||||||
// Terminate stops a running server instance using the provided signal. This function
|
// Terminate stops a running server instance using the provided signal. This function
|
||||||
// is a no-op if the server is already stopped.
|
// is a no-op if the server is already stopped.
|
||||||
|
|
|
@ -178,7 +178,7 @@ func postServerArchive(c *gin.Context) {
|
||||||
|
|
||||||
// Ensure the server is offline. Sometimes a "No such container" error gets through
|
// Ensure the server is offline. Sometimes a "No such container" error gets through
|
||||||
// which means the server is already stopped. We can ignore that.
|
// which means the server is already stopped. We can ignore that.
|
||||||
if err := s.Environment.WaitForStop(60, false); err != nil && !strings.Contains(strings.ToLower(err.Error()), "no such container") {
|
if err := s.Environment.WaitForStop(s.Context(), time.Minute, false); err != nil && !strings.Contains(strings.ToLower(err.Error()), "no such container") {
|
||||||
sendTransferLog("Failed to stop server, aborting transfer..")
|
sendTransferLog("Failed to stop server, aborting transfer..")
|
||||||
l.WithField("error", err).Error("failed to stop server")
|
l.WithField("error", err).Error("failed to stop server")
|
||||||
return
|
return
|
||||||
|
|
|
@ -11,9 +11,10 @@ import (
|
||||||
"emperror.dev/errors"
|
"emperror.dev/errors"
|
||||||
"github.com/apex/log"
|
"github.com/apex/log"
|
||||||
"github.com/gbrlsnchs/jwt/v3"
|
"github.com/gbrlsnchs/jwt/v3"
|
||||||
|
"github.com/goccy/go-json"
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
"github.com/gorilla/websocket"
|
"github.com/gorilla/websocket"
|
||||||
"github.com/goccy/go-json"
|
"github.com/pterodactyl/wings/system"
|
||||||
|
|
||||||
"github.com/pterodactyl/wings/config"
|
"github.com/pterodactyl/wings/config"
|
||||||
"github.com/pterodactyl/wings/environment"
|
"github.com/pterodactyl/wings/environment"
|
||||||
|
@ -353,7 +354,7 @@ func (h *Handler) HandleInbound(ctx context.Context, m Message) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
err := h.server.HandlePowerAction(action)
|
err := h.server.HandlePowerAction(action)
|
||||||
if errors.Is(err, context.DeadlineExceeded) {
|
if errors.Is(err, system.ErrLockerLocked) {
|
||||||
m, _ := h.GetErrorMessage("another power action is currently being processed for this server, please try again later")
|
m, _ := h.GetErrorMessage("another power action is currently being processed for this server, please try again later")
|
||||||
|
|
||||||
_ = h.SendJson(Message{
|
_ = h.SendJson(Message{
|
||||||
|
|
|
@ -142,7 +142,7 @@ func (s *Server) RestoreBackup(b backup.BackupInterface, reader io.ReadCloser) (
|
||||||
// instance, otherwise you'll likely hit all types of write errors due to the
|
// instance, otherwise you'll likely hit all types of write errors due to the
|
||||||
// server being suspended.
|
// server being suspended.
|
||||||
if s.Environment.State() != environment.ProcessOfflineState {
|
if s.Environment.State() != environment.ProcessOfflineState {
|
||||||
if err = s.Environment.WaitForStop(120, false); err != nil {
|
if err = s.Environment.WaitForStop(s.Context(), time.Minute*2, false); err != nil {
|
||||||
if !client.IsErrNotFound(err) {
|
if !client.IsErrNotFound(err) {
|
||||||
return errors.WrapIf(err, "server/backup: restore: failed to wait for container stop")
|
return errors.WrapIf(err, "server/backup: restore: failed to wait for container stop")
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,6 +10,7 @@ import (
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
"emperror.dev/errors"
|
"emperror.dev/errors"
|
||||||
"github.com/apex/log"
|
"github.com/apex/log"
|
||||||
|
@ -79,7 +80,7 @@ func (s *Server) Install(sync bool) error {
|
||||||
func (s *Server) Reinstall() error {
|
func (s *Server) Reinstall() error {
|
||||||
if s.Environment.State() != environment.ProcessOfflineState {
|
if s.Environment.State() != environment.ProcessOfflineState {
|
||||||
s.Log().Debug("waiting for server instance to enter a stopped state")
|
s.Log().Debug("waiting for server instance to enter a stopped state")
|
||||||
if err := s.Environment.WaitForStop(10, true); err != nil {
|
if err := s.Environment.WaitForStop(s.Context(), time.Second*10, true); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,6 +5,7 @@ import (
|
||||||
"regexp"
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
"sync"
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/apex/log"
|
"github.com/apex/log"
|
||||||
|
|
||||||
|
@ -44,7 +45,7 @@ func (dsl *diskSpaceLimiter) Reset() {
|
||||||
func (dsl *diskSpaceLimiter) Trigger() {
|
func (dsl *diskSpaceLimiter) Trigger() {
|
||||||
dsl.o.Do(func() {
|
dsl.o.Do(func() {
|
||||||
dsl.server.PublishConsoleOutputFromDaemon("Server is exceeding the assigned disk space limit, stopping process now.")
|
dsl.server.PublishConsoleOutputFromDaemon("Server is exceeding the assigned disk space limit, stopping process now.")
|
||||||
if err := dsl.server.Environment.WaitForStop(60, true); err != nil {
|
if err := dsl.server.Environment.WaitForStop(dsl.server.Context(), time.Minute, true); err != nil {
|
||||||
dsl.server.Log().WithField("error", err).Error("failed to stop server after exceeding space limit!")
|
dsl.server.Log().WithField("error", err).Error("failed to stop server after exceeding space limit!")
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
|
@ -135,12 +135,9 @@ func (s *Server) HandlePowerAction(action PowerAction, waitSeconds ...int) error
|
||||||
case PowerActionStop:
|
case PowerActionStop:
|
||||||
fallthrough
|
fallthrough
|
||||||
case PowerActionRestart:
|
case PowerActionRestart:
|
||||||
ctx, cancel := context.WithTimeout(s.Context(), time.Second)
|
|
||||||
defer cancel()
|
|
||||||
|
|
||||||
// We're specifically waiting for the process to be stopped here, otherwise the lock is
|
// We're specifically waiting for the process to be stopped here, otherwise the lock is
|
||||||
// released too soon, and you can rack up all sorts of issues.
|
// released too soon, and you can rack up all sorts of issues.
|
||||||
if err := s.Environment.WaitForStopWithContext(ctx, true); err != nil {
|
if err := s.Environment.WaitForStop(s.Context(), time.Minute*10, true); err != nil {
|
||||||
// Even timeout errors should be bubbled back up the stack. If the process didn't stop
|
// Even timeout errors should be bubbled back up the stack. If the process didn't stop
|
||||||
// nicely, but the terminate argument was passed then the server is stopped without an
|
// nicely, but the terminate argument was passed then the server is stopped without an
|
||||||
// error being returned.
|
// error being returned.
|
||||||
|
@ -156,11 +153,6 @@ func (s *Server) HandlePowerAction(action PowerAction, waitSeconds ...int) error
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Release the resources we acquired for the initial timer context since we don't
|
|
||||||
// need them anymore at this point, and the start process can take quite awhile to
|
|
||||||
// complete.
|
|
||||||
cancel()
|
|
||||||
|
|
||||||
// Now actually try to start the process by executing the normal pre-boot logic.
|
// Now actually try to start the process by executing the normal pre-boot logic.
|
||||||
if err := s.onBeforeStart(); err != nil {
|
if err := s.onBeforeStart(); err != nil {
|
||||||
return err
|
return err
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
package server
|
package server
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/pterodactyl/wings/environment/docker"
|
"github.com/pterodactyl/wings/environment/docker"
|
||||||
|
|
||||||
"github.com/pterodactyl/wings/environment"
|
"github.com/pterodactyl/wings/environment"
|
||||||
|
@ -58,7 +60,7 @@ func (s *Server) SyncWithEnvironment() {
|
||||||
s.Log().Info("server suspended with running process state, terminating now")
|
s.Log().Info("server suspended with running process state, terminating now")
|
||||||
|
|
||||||
go func(s *Server) {
|
go func(s *Server) {
|
||||||
if err := s.Environment.WaitForStop(60, true); err != nil {
|
if err := s.Environment.WaitForStop(s.Context(), time.Minute, true); err != nil {
|
||||||
s.Log().WithField("error", err).Warn("failed to terminate server environment after suspension")
|
s.Log().WithField("error", err).Warn("failed to terminate server environment after suspension")
|
||||||
}
|
}
|
||||||
}(s)
|
}(s)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user