add context timeouts to avoid hanging wings boot process if docker has a hiccup; closes pterodactyl/panel#3358

This commit is contained in:
Dane Everitt 2021-09-11 14:13:19 -07:00
parent 5cd43dd4c9
commit ee91224eb6
8 changed files with 64 additions and 44 deletions

View File

@ -1,6 +1,7 @@
package cmd package cmd
import ( import (
"context"
"crypto/tls" "crypto/tls"
"errors" "errors"
"fmt" "fmt"
@ -206,7 +207,17 @@ func rootCmdRun(cmd *cobra.Command, _ []string) {
st = state st = state
} }
r, err := s.Environment.IsRunning() // Use a timed context here to avoid booting issues where Docker hangs for a
// specific container that would cause Wings to be un-bootable until the entire
// machine is rebooted. It is much better for us to just have a single failed
// server instance than an entire offline node.
//
// @see https://github.com/pterodactyl/panel/issues/2475
// @see https://github.com/pterodactyl/panel/issues/3358
ctx, cancel := context.WithTimeout(cmd.Context(), time.Second * 30)
defer cancel()
r, err := s.Environment.IsRunning(ctx)
// We ignore missing containers because we don't want to actually block booting of wings at this // We ignore missing containers because we don't want to actually block booting of wings at this
// point. If we didn't do this, and you pruned all the images and then started wings you could // point. If we didn't do this, and you pruned all the images and then started wings you could
// end up waiting a long period of time for all the images to be re-pulled on Wings boot rather // end up waiting a long period of time for all the images to be re-pulled on Wings boot rather
@ -235,7 +246,7 @@ func rootCmdRun(cmd *cobra.Command, _ []string) {
s.Log().Info("detected server is running, re-attaching to process...") s.Log().Info("detected server is running, re-attaching to process...")
s.Environment.SetState(environment.ProcessRunningState) s.Environment.SetState(environment.ProcessRunningState)
if err := s.Environment.Attach(); err != nil { if err := s.Environment.Attach(ctx); err != nil {
s.Log().WithField("error", err).Warn("failed to attach to running server environment") s.Log().WithField("error", err).Warn("failed to attach to running server environment")
} }
} else { } else {

View File

@ -45,7 +45,7 @@ func (nw noopWriter) Write(b []byte) (int, error) {
// Calling this function will poll resources for the container in the background // Calling this function will poll resources for the container in the background
// until the provided context is canceled by the caller. Failure to cancel said // until the provided context is canceled by the caller. Failure to cancel said
// context will cause background memory leaks as the goroutine will not exit. // context will cause background memory leaks as the goroutine will not exit.
func (e *Environment) Attach() error { func (e *Environment) Attach(ctx context.Context) error {
if e.IsAttached() { if e.IsAttached() {
return nil return nil
} }
@ -62,14 +62,17 @@ func (e *Environment) Attach() error {
} }
// Set the stream again with the container. // Set the stream again with the container.
if st, err := e.client.ContainerAttach(context.Background(), e.Id, opts); err != nil { if st, err := e.client.ContainerAttach(ctx, e.Id, opts); err != nil {
return err return err
} else { } else {
e.SetStream(&st) e.SetStream(&st)
} }
go func() { go func() {
ctx, cancel := context.WithCancel(context.Background()) // Don't use the context provided to the function, that'll cause the polling to
// exit unexpectedly. We want a custom context for this, the one passed to the
// function is to avoid a hang situation when trying to attach to a container.
pollCtx, cancel := context.WithCancel(context.Background())
defer cancel() defer cancel()
defer e.stream.Close() defer e.stream.Close()
defer func() { defer func() {
@ -78,7 +81,7 @@ func (e *Environment) Attach() error {
}() }()
go func() { go func() {
if err := e.pollResources(ctx); err != nil { if err := e.pollResources(pollCtx); err != nil {
if !errors.Is(err, context.Canceled) { if !errors.Is(err, context.Canceled) {
e.log().WithField("error", err).Error("error during environment resource polling") e.log().WithField("error", err).Error("error during environment resource polling")
} else { } else {

View File

@ -128,20 +128,20 @@ func (e *Environment) Exists() (bool, error) {
return true, nil return true, nil
} }
// Determines if the server's docker container is currently running. If there is no container // IsRunning determines if the server's docker container is currently running.
// present, an error will be raised (since this shouldn't be a case that ever happens under // If there is no container present, an error will be raised (since this
// correctly developed circumstances). // shouldn't be a case that ever happens under correctly developed
// circumstances).
// //
// You can confirm if the instance wasn't found by using client.IsErrNotFound from the Docker // You can confirm if the instance wasn't found by using client.IsErrNotFound
// API. // from the Docker API.
// //
// @see docker/client/errors.go // @see docker/client/errors.go
func (e *Environment) IsRunning() (bool, error) { func (e *Environment) IsRunning(ctx context.Context) (bool, error) {
c, err := e.client.ContainerInspect(context.Background(), e.Id) c, err := e.client.ContainerInspect(ctx, e.Id)
if err != nil { if err != nil {
return false, err return false, err
} }
return c.State.Running, nil return c.State.Running, nil
} }

View File

@ -17,16 +17,17 @@ import (
"github.com/pterodactyl/wings/remote" "github.com/pterodactyl/wings/remote"
) )
// Run before the container starts and get the process configuration from the Panel. // OnBeforeStart run before the container starts and get the process
// This is important since we use this to check configuration files as well as ensure // configuration from the Panel. This is important since we use this to check
// we always have the latest version of an egg available for server processes. // configuration files as well as ensure we always have the latest version of
// an egg available for server processes.
// //
// This process will also confirm that the server environment exists and is in a bootable // This process will also confirm that the server environment exists and is in
// state. This ensures that unexpected container deletion while Wings is running does // a bootable state. This ensures that unexpected container deletion while Wings
// not result in the server becoming un-bootable. // is running does not result in the server becoming un-bootable.
func (e *Environment) OnBeforeStart() error { func (e *Environment) OnBeforeStart(ctx context.Context) error {
// Always destroy and re-create the server container to ensure that synced data from the Panel is used. // Always destroy and re-create the server container to ensure that synced data from the Panel is used.
if err := e.client.ContainerRemove(context.Background(), e.Id, types.ContainerRemoveOptions{RemoveVolumes: true}); err != nil { if err := e.client.ContainerRemove(ctx, e.Id, types.ContainerRemoveOptions{RemoveVolumes: true}); err != nil {
if !client.IsErrNotFound(err) { if !client.IsErrNotFound(err) {
return errors.WrapIf(err, "environment/docker: failed to remove container during pre-boot") return errors.WrapIf(err, "environment/docker: failed to remove container during pre-boot")
} }
@ -46,10 +47,10 @@ func (e *Environment) OnBeforeStart() error {
return nil return nil
} }
// Starts the server environment and begins piping output to the event listeners for the // Start will start the server environment and begins piping output to the event
// console. If a container does not exist, or needs to be rebuilt that will happen in the // listeners for the console. If a container does not exist, or needs to be
// call to OnBeforeStart(). // rebuilt that will happen in the call to OnBeforeStart().
func (e *Environment) Start() error { func (e *Environment) Start(ctx context.Context) error {
sawError := false sawError := false
// If sawError is set to true there was an error somewhere in the pipeline that // If sawError is set to true there was an error somewhere in the pipeline that
@ -65,7 +66,7 @@ func (e *Environment) Start() error {
} }
}() }()
if c, err := e.client.ContainerInspect(context.Background(), e.Id); err != nil { if c, err := e.client.ContainerInspect(ctx, e.Id); err != nil {
// Do nothing if the container is not found, we just don't want to continue // Do nothing if the container is not found, we just don't want to continue
// to the next block of code here. This check was inlined here to guard against // to the next block of code here. This check was inlined here to guard against
// a nil-pointer when checking c.State below. // a nil-pointer when checking c.State below.
@ -79,7 +80,7 @@ func (e *Environment) Start() error {
if c.State.Running { if c.State.Running {
e.SetState(environment.ProcessRunningState) e.SetState(environment.ProcessRunningState)
return e.Attach() return e.Attach(ctx)
} }
// Truncate the log file, so we don't end up outputting a bunch of useless log information // Truncate the log file, so we don't end up outputting a bunch of useless log information
@ -101,21 +102,23 @@ func (e *Environment) Start() error {
// Run the before start function and wait for it to finish. This will validate that the container // Run the before start function and wait for it to finish. This will validate that the container
// exists on the system, and rebuild the container if that is required for server booting to // exists on the system, and rebuild the container if that is required for server booting to
// occur. // occur.
if err := e.OnBeforeStart(); err != nil { if err := e.OnBeforeStart(ctx); err != nil {
return errors.WithStackIf(err) return errors.WithStackIf(err)
} }
ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) // If we cannot start & attach to the container in 30 seconds something has gone
// quite sideways and we should stop trying to avoid a hanging situation.
actx, cancel := context.WithTimeout(ctx, time.Second*30)
defer cancel() defer cancel()
if err := e.client.ContainerStart(ctx, e.Id, types.ContainerStartOptions{}); err != nil { if err := e.client.ContainerStart(actx, e.Id, types.ContainerStartOptions{}); err != nil {
return errors.WrapIf(err, "environment/docker: failed to start container") return errors.WrapIf(err, "environment/docker: failed to start container")
} }
// No errors, good to continue through. // No errors, good to continue through.
sawError = false sawError = false
return e.Attach() return e.Attach(actx)
} }
// Stop stops the container that the server is running in. This will allow up to // Stop stops the container that the server is running in. This will allow up to

View File

@ -1,6 +1,7 @@
package environment package environment
import ( import (
"context"
"os" "os"
"github.com/pterodactyl/wings/events" "github.com/pterodactyl/wings/events"
@ -41,9 +42,9 @@ type ProcessEnvironment interface {
// a basic CLI environment this can probably just return true right away. // a basic CLI environment this can probably just return true right away.
Exists() (bool, error) Exists() (bool, error)
// Determines if the environment is currently active and running a server process // IsRunning determines if the environment is currently active and running
// for this specific server instance. // a server process for this specific server instance.
IsRunning() (bool, error) IsRunning(ctx context.Context) (bool, error)
// Performs an update of server resource limits without actually stopping the server // Performs an update of server resource limits without actually stopping the server
// process. This only executes if the environment supports it, otherwise it is // process. This only executes if the environment supports it, otherwise it is
@ -52,11 +53,11 @@ type ProcessEnvironment interface {
// Runs before the environment is started. If an error is returned starting will // Runs before the environment is started. If an error is returned starting will
// not occur, otherwise proceeds as normal. // not occur, otherwise proceeds as normal.
OnBeforeStart() error OnBeforeStart(ctx context.Context) error
// Starts a server instance. If the server instance is not in a state where it // Starts a server instance. If the server instance is not in a state where it
// can be started an error should be returned. // can be started an error should be returned.
Start() error Start(ctx context.Context) error
// Stops a server instance. If the server is already stopped an error should // Stops a server instance. If the server is already stopped an error should
// not be returned. // not be returned.
@ -84,10 +85,10 @@ type ProcessEnvironment interface {
// server. // server.
Create() error Create() error
// Attaches to the server console environment and allows piping the output to a // Attach attaches to the server console environment and allows piping the output
// websocket or other internal tool to monitor output. Also allows you to later // to a websocket or other internal tool to monitor output. Also allows you to later
// send data into the environment's stdin. // send data into the environment's stdin.
Attach() error Attach(ctx context.Context) error
// Sends the provided command to the running server instance. // Sends the provided command to the running server instance.
SendCommand(string) error SendCommand(string) error

View File

@ -101,7 +101,7 @@ func postServerPower(c *gin.Context) {
func postServerCommands(c *gin.Context) { func postServerCommands(c *gin.Context) {
s := ExtractServer(c) s := ExtractServer(c)
if running, err := s.Environment.IsRunning(); err != nil { if running, err := s.Environment.IsRunning(c.Request.Context()); err != nil {
NewServerError(err, s).Abort(c) NewServerError(err, s).Abort(c)
return return
} else if !running { } else if !running {

View File

@ -368,7 +368,9 @@ func (h *Handler) HandleInbound(m Message) error {
} }
case SendServerLogsEvent: case SendServerLogsEvent:
{ {
if running, _ := h.server.Environment.IsRunning(); !running { ctx, cancel := context.WithTimeout(context.Background(), time.Second * 5)
defer cancel()
if running, _ := h.server.Environment.IsRunning(ctx); !running {
return nil return nil
} }

View File

@ -128,7 +128,7 @@ func (s *Server) HandlePowerAction(action PowerAction, waitSeconds ...int) error
return err return err
} }
return s.Environment.Start() return s.Environment.Start(s.Context())
case PowerActionStop: case PowerActionStop:
// We're specifically waiting for the process to be stopped here, otherwise the lock is released // We're specifically waiting for the process to be stopped here, otherwise the lock is released
// too soon, and you can rack up all sorts of issues. // too soon, and you can rack up all sorts of issues.
@ -151,7 +151,7 @@ func (s *Server) HandlePowerAction(action PowerAction, waitSeconds ...int) error
return err return err
} }
return s.Environment.Start() return s.Environment.Start(s.Context())
case PowerActionTerminate: case PowerActionTerminate:
return s.Environment.Terminate(os.Kill) return s.Environment.Terminate(os.Kill)
} }