From 177aa8e436c01574bfe74bde0ce1246dba67f0b8 Mon Sep 17 00:00:00 2001 From: Dane Everitt Date: Sat, 1 Aug 2020 15:20:39 -0700 Subject: [PATCH] Refactor power handling logic to be more robust and able to handle spam clicking and duplicate power actions --- cmd/root.go | 2 +- router/router_server.go | 24 ++++++---- router/websocket/websocket.go | 63 +++++++++++++-------------- server/crash.go | 2 +- server/environment.go | 1 - server/environment_docker.go | 6 ++- server/power.go | 82 ++++++++++++++++++++++++++++++++--- server/server.go | 19 +------- 8 files changed, 129 insertions(+), 70 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index 75f2d63..1c298d9 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -205,7 +205,7 @@ func rootCmdRun(*cobra.Command, []string) { // is that it was running, but we see that the container process is not currently running. if r || (!r && s.IsRunning()) { s.Log().Info("detected server is running, re-attaching to process...") - if err := s.Environment.Start(); err != nil { + if err := s.HandlePowerAction(server.PowerActionStart); err != nil { s.Log().WithField("error", errors.WithStack(err)).Warn("failed to properly start server detected as already running") } diff --git a/router/router_server.go b/router/router_server.go index 6af62d0..2d090f3 100644 --- a/router/router_server.go +++ b/router/router_server.go @@ -2,6 +2,7 @@ package router import ( "bytes" + "context" "github.com/apex/log" "github.com/gin-gonic/gin" "github.com/pkg/errors" @@ -47,13 +48,15 @@ func getServerLogs(c *gin.Context) { func postServerPower(c *gin.Context) { s := GetServer(c.Param("server")) - var data server.PowerAction - // BindJSON sends 400 if the request fails, all we need to do is return + var data struct{ + Action server.PowerAction `json:"action"` + } + if err := c.BindJSON(&data); err != nil { return } - if !data.IsValid() { + if !data.Action.IsValid() { c.AbortWithStatusJSON(http.StatusUnprocessableEntity, gin.H{ "error": "The power action provided was not valid, should be one of \"stop\", \"start\", \"restart\", \"kill\"", }) @@ -66,7 +69,7 @@ func postServerPower(c *gin.Context) { // // We don't really care about any of the other actions at this point, they'll all result // in the process being stopped, which should have happened anyways if the server is suspended. - if (data.Action == "start" || data.Action == "restart") && s.IsSuspended() { + if (data.Action == server.PowerActionStart || data.Action == server.PowerActionRestart) && s.IsSuspended() { c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{ "error": "Cannot start or restart a server that is suspended.", }) @@ -76,10 +79,15 @@ func postServerPower(c *gin.Context) { // Pass the actual heavy processing off to a seperate thread to handle so that // we can immediately return a response from the server. Some of these actions // can take quite some time, especially stopping or restarting. - go func(server *server.Server) { - if err := server.HandlePowerAction(data); err != nil { - server.Log().WithFields(log.Fields{"action": data, "error": err}). - Error("encountered error processing a server power action in the background") + go func(s *server.Server) { + if err := s.HandlePowerAction(data.Action, 30); err != nil { + if errors.Is(err, context.DeadlineExceeded) { + s.Log().WithField("action", data.Action). + Warn("could not acquire a lock while attempting to perform a power action") + } else { + s.Log().WithFields(log.Fields{"action": data, "error": err}). + Error("encountered error processing a server power action in the background") + } } }(s) diff --git a/router/websocket/websocket.go b/router/websocket/websocket.go index d14caa6..7bca972 100644 --- a/router/websocket/websocket.go +++ b/router/websocket/websocket.go @@ -1,6 +1,7 @@ package websocket import ( + "context" "encoding/json" "fmt" "github.com/apex/log" @@ -12,7 +13,6 @@ import ( "github.com/pterodactyl/wings/router/tokens" "github.com/pterodactyl/wings/server" "net/http" - "os" "strings" "sync" "time" @@ -150,7 +150,7 @@ func (h *Handler) TokenValid() error { // Sends an error back to the connected websocket instance by checking the permissions // of the token. If the user has the "receive-errors" grant we will send back the actual // error message, otherwise we just send back a standard error message. -func (h *Handler) SendErrorJson(msg Message, err error) error { +func (h *Handler) SendErrorJson(msg Message, err error, shouldLog ...bool) error { j := h.GetJwt() message := "an unexpected error was encountered while handling this request" @@ -163,9 +163,11 @@ func (h *Handler) SendErrorJson(msg Message, err error) error { wsm := Message{Event: ErrorEvent} wsm.Args = []string{m} - if !server.IsSuspendedError(err) { - h.server.Log().WithFields(log.Fields{"event": msg.Event, "error_identifier": u.String(), "error": err}). - Error("failed to handle websocket process; an error was encountered processing an event") + if len(shouldLog) == 0 || (len(shouldLog) == 1 && shouldLog[0] == true) { + if !server.IsSuspendedError(err) { + h.server.Log().WithFields(log.Fields{"event": msg.Event, "error_identifier": u.String(), "error": err}). + Error("failed to handle websocket process; an error was encountered processing an event") + } } return h.unsafeSendJson(wsm) @@ -271,37 +273,34 @@ func (h *Handler) HandleInbound(m Message) error { } case SetStateEvent: { - switch strings.Join(m.Args, "") { - case "start": - if h.GetJwt().HasPermission(PermissionSendPowerStart) { - return h.server.Environment.Start() - } - break - case "stop": - if h.GetJwt().HasPermission(PermissionSendPowerStop) { - return h.server.Environment.Stop() - } - break - case "restart": - if h.GetJwt().HasPermission(PermissionSendPowerRestart) { - // If the server is alreay restarting don't do anything. Perhaps we send back an event - // in the future for this? For now no reason to knowingly trigger an error by trying to - // restart a process already restarting. - if h.server.Environment.IsRestarting() { - return nil - } + action := server.PowerAction(strings.Join(m.Args, "")) - return h.server.Environment.Restart() + actions := make(map[server.PowerAction]string) + actions[server.PowerActionStart] = PermissionSendPowerStart + actions[server.PowerActionStop] = PermissionSendPowerStop + actions[server.PowerActionRestart] = PermissionSendPowerRestart + actions[server.PowerActionTerminate] = PermissionSendPowerStop + + // Check that they have permission to perform this action if it is needed. + if permission, exists := actions[action]; exists { + if !h.GetJwt().HasPermission(permission) { + return nil } - break - case "kill": - if h.GetJwt().HasPermission(PermissionSendPowerStop) { - return h.server.Environment.Terminate(os.Kill) - } - break } - return nil + err := h.server.HandlePowerAction(action) + if errors.Is(err, context.DeadlineExceeded) { + m, _ := h.GetErrorMessage("another power action is currently being processed for this server, please try again later") + + h.SendJson(&Message{ + Event: ErrorEvent, + Args: []string{m}, + }) + + return nil + } + + return err } case SendServerLogsEvent: { diff --git a/server/crash.go b/server/crash.go index 3eb2ba4..66ec98a 100644 --- a/server/crash.go +++ b/server/crash.go @@ -82,5 +82,5 @@ func (s *Server) handleServerCrash() error { s.crasher.SetLastCrash(time.Now()) - return s.Environment.Start() + return s.HandlePowerAction(PowerActionStart) } \ No newline at end of file diff --git a/server/environment.go b/server/environment.go index ce6eed0..e97abcf 100644 --- a/server/environment.go +++ b/server/environment.go @@ -36,7 +36,6 @@ type Environment interface { // unnecessary double/triple/quad looping issues if multiple people press restart or spam the // button to restart. Restart() error - IsRestarting() bool // Waits for a server instance to stop gracefully. If the server is still detected // as running after seconds, an error will be returned, or the server will be terminated diff --git a/server/environment_docker.go b/server/environment_docker.go index b9cd0e6..663070f 100644 --- a/server/environment_docker.go +++ b/server/environment_docker.go @@ -342,13 +342,13 @@ func (d *DockerEnvironment) acquireRestartLock() error { // start command. This will return an error if there is already a restart process executing for the // server. The lock is released when the process is stopped and a start has begun. func (d *DockerEnvironment) Restart() error { - d.Server.Log().Debug("attempting to acquire restart lock...") + d.Server.Log().Debug("acquiring process restart lock...") if err := d.acquireRestartLock(); err != nil { d.Server.Log().Warn("failed to acquire restart lock; already acquired by a different process") return err } - d.Server.Log().Debug("acquired restart lock") + d.Server.Log().Info("acquired process lock; beginning restart process...") err := d.WaitForStop(60, false) if err != nil { @@ -496,12 +496,14 @@ func (d *DockerEnvironment) Attach() error { } var err error + d.Lock() d.stream, err = d.Client.ContainerAttach(context.Background(), d.Server.Id(), types.ContainerAttachOptions{ Stdin: true, Stdout: true, Stderr: true, Stream: true, }) + d.Unlock() if err != nil { return errors.WithStack(err) diff --git a/server/power.go b/server/power.go index 454ac5b..9435230 100644 --- a/server/power.go +++ b/server/power.go @@ -1,12 +1,80 @@ package server -type PowerAction struct { - Action string `json:"action"` +import ( + "context" + "github.com/pkg/errors" + "golang.org/x/sync/semaphore" + "os" + "time" +) + +type PowerAction string + +// The power actions that can be performed for a given server. This taps into the given server +// environment and performs them in a way that prevents a race condition from occurring. For +// example, sending two "start" actions back to back will not process the second action until +// the first action has been completed. +// +// This utilizes a workerpool with a limit of one worker so that all of the actions execute +// in a sync manner. +const ( + PowerActionStart = "start" + PowerActionStop = "stop" + PowerActionRestart = "restart" + PowerActionTerminate = "kill" +) + +// Checks if the power action being received is valid. +func (pa PowerAction) IsValid() bool { + return pa == PowerActionStart || + pa == PowerActionStop || + pa == PowerActionTerminate || + pa == PowerActionRestart } -func (pr *PowerAction) IsValid() bool { - return pr.Action == "start" || - pr.Action == "stop" || - pr.Action == "kill" || - pr.Action == "restart" +// Helper function that can receive a power action and then process the actions that need +// to occur for it. This guards against someone calling Start() twice at the same time, or +// trying to restart while another restart process is currently running. +// +// However, the code design for the daemon does depend on the user correctly calling this +// function rather than making direct calls to the start/stop/restart functions on the +// environment struct. +func (s *Server) HandlePowerAction(action PowerAction, waitSeconds ...int) error { + if s.powerLock == nil { + s.powerLock = semaphore.NewWeighted(1) + } + + // Determines if we should wait for the lock or not. If a value greater than 0 is passed + // into this function we will wait that long for a lock to be acquired. + if len(waitSeconds) > 0 && waitSeconds[0] != 0 { + ctx, _ := context.WithTimeout(context.Background(), time.Second*time.Duration(waitSeconds[0])) + // Attempt to acquire a lock on the power action lock for up to 30 seconds. If more + // time than that passes an error will be propagated back up the chain and this + // request will be aborted. + if err := s.powerLock.Acquire(ctx, 1); err != nil { + return errors.WithMessage(err, "could not acquire lock on power state") + } + } else { + // If no wait duration was provided we will attempt to immediately acquire the lock + // and bail out with a context deadline error if it is not acquired immediately. + if ok := s.powerLock.TryAcquire(1); !ok { + return errors.WithMessage(context.DeadlineExceeded, "could not acquire lock on power state") + } + } + + // Release the lock once the process being requested has finished executing. + defer s.powerLock.Release(1) + + switch action { + case PowerActionStart: + return s.Environment.Start() + case PowerActionStop: + return s.Environment.Stop() + case PowerActionRestart: + return s.Environment.Restart() + case PowerActionTerminate: + return s.Environment.Terminate(os.Kill) + } + + return errors.New("attempting to handle unknown power action") } diff --git a/server/server.go b/server/server.go index 78d819b..c95b008 100644 --- a/server/server.go +++ b/server/server.go @@ -8,7 +8,6 @@ import ( "github.com/pkg/errors" "github.com/pterodactyl/wings/api" "golang.org/x/sync/semaphore" - "os" "strings" "sync" "time" @@ -20,6 +19,7 @@ type Server struct { // writing the configuration to the disk. sync.RWMutex emitterLock sync.Mutex + powerLock *semaphore.Weighted // Maintains the configuration for the server. This is the data that gets returned by the Panel // such as build settings and container images. @@ -158,23 +158,6 @@ func (s *Server) GetProcessConfiguration() (*api.ServerConfigurationResponse, *a return api.NewRequester().GetServerConfiguration(s.Id()) } -// Helper function that can receive a power action and then process the -// actions that need to occur for it. -func (s *Server) HandlePowerAction(action PowerAction) error { - switch action.Action { - case "start": - return s.Environment.Start() - case "restart": - return s.Environment.Restart() - case "stop": - return s.Environment.Stop() - case "kill": - return s.Environment.Terminate(os.Kill) - default: - return errors.New("an invalid power action was provided") - } -} - // Checks if the server is marked as being suspended or not on the system. func (s *Server) IsSuspended() bool { return s.Config().Suspended