add context timeouts to avoid hanging wings boot process if docker has a hiccup; closes pterodactyl/panel#3358
This commit is contained in:
parent
5cd43dd4c9
commit
ee91224eb6
15
cmd/root.go
15
cmd/root.go
|
@ -1,6 +1,7 @@
|
|||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
@ -206,7 +207,17 @@ func rootCmdRun(cmd *cobra.Command, _ []string) {
|
|||
st = state
|
||||
}
|
||||
|
||||
r, err := s.Environment.IsRunning()
|
||||
// Use a timed context here to avoid booting issues where Docker hangs for a
|
||||
// specific container that would cause Wings to be un-bootable until the entire
|
||||
// machine is rebooted. It is much better for us to just have a single failed
|
||||
// server instance than an entire offline node.
|
||||
//
|
||||
// @see https://github.com/pterodactyl/panel/issues/2475
|
||||
// @see https://github.com/pterodactyl/panel/issues/3358
|
||||
ctx, cancel := context.WithTimeout(cmd.Context(), time.Second * 30)
|
||||
defer cancel()
|
||||
|
||||
r, err := s.Environment.IsRunning(ctx)
|
||||
// We ignore missing containers because we don't want to actually block booting of wings at this
|
||||
// point. If we didn't do this, and you pruned all the images and then started wings you could
|
||||
// end up waiting a long period of time for all the images to be re-pulled on Wings boot rather
|
||||
|
@ -235,7 +246,7 @@ func rootCmdRun(cmd *cobra.Command, _ []string) {
|
|||
s.Log().Info("detected server is running, re-attaching to process...")
|
||||
|
||||
s.Environment.SetState(environment.ProcessRunningState)
|
||||
if err := s.Environment.Attach(); err != nil {
|
||||
if err := s.Environment.Attach(ctx); err != nil {
|
||||
s.Log().WithField("error", err).Warn("failed to attach to running server environment")
|
||||
}
|
||||
} else {
|
||||
|
|
|
@ -45,7 +45,7 @@ func (nw noopWriter) Write(b []byte) (int, error) {
|
|||
// Calling this function will poll resources for the container in the background
|
||||
// until the provided context is canceled by the caller. Failure to cancel said
|
||||
// context will cause background memory leaks as the goroutine will not exit.
|
||||
func (e *Environment) Attach() error {
|
||||
func (e *Environment) Attach(ctx context.Context) error {
|
||||
if e.IsAttached() {
|
||||
return nil
|
||||
}
|
||||
|
@ -62,14 +62,17 @@ func (e *Environment) Attach() error {
|
|||
}
|
||||
|
||||
// Set the stream again with the container.
|
||||
if st, err := e.client.ContainerAttach(context.Background(), e.Id, opts); err != nil {
|
||||
if st, err := e.client.ContainerAttach(ctx, e.Id, opts); err != nil {
|
||||
return err
|
||||
} else {
|
||||
e.SetStream(&st)
|
||||
}
|
||||
|
||||
go func() {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
// Don't use the context provided to the function, that'll cause the polling to
|
||||
// exit unexpectedly. We want a custom context for this, the one passed to the
|
||||
// function is to avoid a hang situation when trying to attach to a container.
|
||||
pollCtx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
defer e.stream.Close()
|
||||
defer func() {
|
||||
|
@ -78,7 +81,7 @@ func (e *Environment) Attach() error {
|
|||
}()
|
||||
|
||||
go func() {
|
||||
if err := e.pollResources(ctx); err != nil {
|
||||
if err := e.pollResources(pollCtx); err != nil {
|
||||
if !errors.Is(err, context.Canceled) {
|
||||
e.log().WithField("error", err).Error("error during environment resource polling")
|
||||
} else {
|
||||
|
|
|
@ -128,20 +128,20 @@ func (e *Environment) Exists() (bool, error) {
|
|||
return true, nil
|
||||
}
|
||||
|
||||
// Determines if the server's docker container is currently running. If there is no container
|
||||
// present, an error will be raised (since this shouldn't be a case that ever happens under
|
||||
// correctly developed circumstances).
|
||||
// IsRunning determines if the server's docker container is currently running.
|
||||
// If there is no container present, an error will be raised (since this
|
||||
// shouldn't be a case that ever happens under correctly developed
|
||||
// circumstances).
|
||||
//
|
||||
// You can confirm if the instance wasn't found by using client.IsErrNotFound from the Docker
|
||||
// API.
|
||||
// You can confirm if the instance wasn't found by using client.IsErrNotFound
|
||||
// from the Docker API.
|
||||
//
|
||||
// @see docker/client/errors.go
|
||||
func (e *Environment) IsRunning() (bool, error) {
|
||||
c, err := e.client.ContainerInspect(context.Background(), e.Id)
|
||||
func (e *Environment) IsRunning(ctx context.Context) (bool, error) {
|
||||
c, err := e.client.ContainerInspect(ctx, e.Id)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
return c.State.Running, nil
|
||||
}
|
||||
|
||||
|
|
|
@ -17,16 +17,17 @@ import (
|
|||
"github.com/pterodactyl/wings/remote"
|
||||
)
|
||||
|
||||
// Run before the container starts and get the process configuration from the Panel.
|
||||
// This is important since we use this to check configuration files as well as ensure
|
||||
// we always have the latest version of an egg available for server processes.
|
||||
// OnBeforeStart run before the container starts and get the process
|
||||
// configuration from the Panel. This is important since we use this to check
|
||||
// configuration files as well as ensure we always have the latest version of
|
||||
// an egg available for server processes.
|
||||
//
|
||||
// This process will also confirm that the server environment exists and is in a bootable
|
||||
// state. This ensures that unexpected container deletion while Wings is running does
|
||||
// not result in the server becoming un-bootable.
|
||||
func (e *Environment) OnBeforeStart() error {
|
||||
// This process will also confirm that the server environment exists and is in
|
||||
// a bootable state. This ensures that unexpected container deletion while Wings
|
||||
// is running does not result in the server becoming un-bootable.
|
||||
func (e *Environment) OnBeforeStart(ctx context.Context) error {
|
||||
// Always destroy and re-create the server container to ensure that synced data from the Panel is used.
|
||||
if err := e.client.ContainerRemove(context.Background(), e.Id, types.ContainerRemoveOptions{RemoveVolumes: true}); err != nil {
|
||||
if err := e.client.ContainerRemove(ctx, e.Id, types.ContainerRemoveOptions{RemoveVolumes: true}); err != nil {
|
||||
if !client.IsErrNotFound(err) {
|
||||
return errors.WrapIf(err, "environment/docker: failed to remove container during pre-boot")
|
||||
}
|
||||
|
@ -46,10 +47,10 @@ func (e *Environment) OnBeforeStart() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// Starts the server environment and begins piping output to the event listeners for the
|
||||
// console. If a container does not exist, or needs to be rebuilt that will happen in the
|
||||
// call to OnBeforeStart().
|
||||
func (e *Environment) Start() error {
|
||||
// Start will start the server environment and begins piping output to the event
|
||||
// listeners for the console. If a container does not exist, or needs to be
|
||||
// rebuilt that will happen in the call to OnBeforeStart().
|
||||
func (e *Environment) Start(ctx context.Context) error {
|
||||
sawError := false
|
||||
|
||||
// If sawError is set to true there was an error somewhere in the pipeline that
|
||||
|
@ -65,7 +66,7 @@ func (e *Environment) Start() error {
|
|||
}
|
||||
}()
|
||||
|
||||
if c, err := e.client.ContainerInspect(context.Background(), e.Id); err != nil {
|
||||
if c, err := e.client.ContainerInspect(ctx, e.Id); err != nil {
|
||||
// Do nothing if the container is not found, we just don't want to continue
|
||||
// to the next block of code here. This check was inlined here to guard against
|
||||
// a nil-pointer when checking c.State below.
|
||||
|
@ -79,7 +80,7 @@ func (e *Environment) Start() error {
|
|||
if c.State.Running {
|
||||
e.SetState(environment.ProcessRunningState)
|
||||
|
||||
return e.Attach()
|
||||
return e.Attach(ctx)
|
||||
}
|
||||
|
||||
// Truncate the log file, so we don't end up outputting a bunch of useless log information
|
||||
|
@ -101,21 +102,23 @@ func (e *Environment) Start() error {
|
|||
// Run the before start function and wait for it to finish. This will validate that the container
|
||||
// exists on the system, and rebuild the container if that is required for server booting to
|
||||
// occur.
|
||||
if err := e.OnBeforeStart(); err != nil {
|
||||
if err := e.OnBeforeStart(ctx); err != nil {
|
||||
return errors.WithStackIf(err)
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second*10)
|
||||
// If we cannot start & attach to the container in 30 seconds something has gone
|
||||
// quite sideways and we should stop trying to avoid a hanging situation.
|
||||
actx, cancel := context.WithTimeout(ctx, time.Second*30)
|
||||
defer cancel()
|
||||
|
||||
if err := e.client.ContainerStart(ctx, e.Id, types.ContainerStartOptions{}); err != nil {
|
||||
if err := e.client.ContainerStart(actx, e.Id, types.ContainerStartOptions{}); err != nil {
|
||||
return errors.WrapIf(err, "environment/docker: failed to start container")
|
||||
}
|
||||
|
||||
// No errors, good to continue through.
|
||||
sawError = false
|
||||
|
||||
return e.Attach()
|
||||
return e.Attach(actx)
|
||||
}
|
||||
|
||||
// Stop stops the container that the server is running in. This will allow up to
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package environment
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
|
||||
"github.com/pterodactyl/wings/events"
|
||||
|
@ -41,9 +42,9 @@ type ProcessEnvironment interface {
|
|||
// a basic CLI environment this can probably just return true right away.
|
||||
Exists() (bool, error)
|
||||
|
||||
// Determines if the environment is currently active and running a server process
|
||||
// for this specific server instance.
|
||||
IsRunning() (bool, error)
|
||||
// IsRunning determines if the environment is currently active and running
|
||||
// a server process for this specific server instance.
|
||||
IsRunning(ctx context.Context) (bool, error)
|
||||
|
||||
// Performs an update of server resource limits without actually stopping the server
|
||||
// process. This only executes if the environment supports it, otherwise it is
|
||||
|
@ -52,11 +53,11 @@ type ProcessEnvironment interface {
|
|||
|
||||
// Runs before the environment is started. If an error is returned starting will
|
||||
// not occur, otherwise proceeds as normal.
|
||||
OnBeforeStart() error
|
||||
OnBeforeStart(ctx context.Context) error
|
||||
|
||||
// Starts a server instance. If the server instance is not in a state where it
|
||||
// can be started an error should be returned.
|
||||
Start() error
|
||||
Start(ctx context.Context) error
|
||||
|
||||
// Stops a server instance. If the server is already stopped an error should
|
||||
// not be returned.
|
||||
|
@ -84,10 +85,10 @@ type ProcessEnvironment interface {
|
|||
// server.
|
||||
Create() error
|
||||
|
||||
// Attaches to the server console environment and allows piping the output to a
|
||||
// websocket or other internal tool to monitor output. Also allows you to later
|
||||
// Attach attaches to the server console environment and allows piping the output
|
||||
// to a websocket or other internal tool to monitor output. Also allows you to later
|
||||
// send data into the environment's stdin.
|
||||
Attach() error
|
||||
Attach(ctx context.Context) error
|
||||
|
||||
// Sends the provided command to the running server instance.
|
||||
SendCommand(string) error
|
||||
|
|
|
@ -101,7 +101,7 @@ func postServerPower(c *gin.Context) {
|
|||
func postServerCommands(c *gin.Context) {
|
||||
s := ExtractServer(c)
|
||||
|
||||
if running, err := s.Environment.IsRunning(); err != nil {
|
||||
if running, err := s.Environment.IsRunning(c.Request.Context()); err != nil {
|
||||
NewServerError(err, s).Abort(c)
|
||||
return
|
||||
} else if !running {
|
||||
|
|
|
@ -368,7 +368,9 @@ func (h *Handler) HandleInbound(m Message) error {
|
|||
}
|
||||
case SendServerLogsEvent:
|
||||
{
|
||||
if running, _ := h.server.Environment.IsRunning(); !running {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second * 5)
|
||||
defer cancel()
|
||||
if running, _ := h.server.Environment.IsRunning(ctx); !running {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -128,7 +128,7 @@ func (s *Server) HandlePowerAction(action PowerAction, waitSeconds ...int) error
|
|||
return err
|
||||
}
|
||||
|
||||
return s.Environment.Start()
|
||||
return s.Environment.Start(s.Context())
|
||||
case PowerActionStop:
|
||||
// We're specifically waiting for the process to be stopped here, otherwise the lock is released
|
||||
// too soon, and you can rack up all sorts of issues.
|
||||
|
@ -151,7 +151,7 @@ func (s *Server) HandlePowerAction(action PowerAction, waitSeconds ...int) error
|
|||
return err
|
||||
}
|
||||
|
||||
return s.Environment.Start()
|
||||
return s.Environment.Start(s.Context())
|
||||
case PowerActionTerminate:
|
||||
return s.Environment.Terminate(os.Kill)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user