From ee08829a284aa3a7e011b0eece7fd7badcb5f752 Mon Sep 17 00:00:00 2001 From: Matthew Penner Date: Sun, 6 Dec 2020 13:56:17 -0700 Subject: [PATCH] s3 backups: handle CompleteMultipartUpload and AbortMultipartUpload on the panel --- api/backup_endpoints.go | 26 +++++++++++--- config/config_system.go | 19 +++++++--- loggers/cli/cli.go | 2 +- server/backup.go | 3 +- server/backup/backup_s3.go | 73 +++++--------------------------------- server/crash.go | 18 +++++----- 6 files changed, 56 insertions(+), 85 deletions(-) diff --git a/api/backup_endpoints.go b/api/backup_endpoints.go index 44e6b93..c3d2df1 100644 --- a/api/backup_endpoints.go +++ b/api/backup_endpoints.go @@ -3,13 +3,18 @@ package api import ( "fmt" "strconv" + "sync" +) + +var ( + backupUploadIDsMx sync.Mutex + backupUploadIDs = map[string]string{} ) type BackupRemoteUploadResponse struct { - CompleteMultipartUpload string `json:"complete_multipart_upload"` - AbortMultipartUpload string `json:"abort_multipart_upload"` - Parts []string `json:"parts"` - PartSize int64 `json:"part_size"` + UploadID string `json:"upload_id"` + Parts []string `json:"parts"` + PartSize int64 `json:"part_size"` } func (r *Request) GetBackupRemoteUploadURLs(backup string, size int64) (*BackupRemoteUploadResponse, error) { @@ -28,10 +33,16 @@ func (r *Request) GetBackupRemoteUploadURLs(backup string, size int64) (*BackupR return nil, err } + // Store the backup upload id for later use, this is a janky way to be able to use it later with SendBackupStatus. + backupUploadIDsMx.Lock() + backupUploadIDs[backup] = res.UploadID + backupUploadIDsMx.Unlock() + return &res, nil } type BackupRequest struct { + UploadID string `json:"upload_id"` Checksum string `json:"checksum"` ChecksumType string `json:"checksum_type"` Size int64 `json:"size"` @@ -41,6 +52,13 @@ type BackupRequest struct { // Notifies the panel that a specific backup has been completed and is now // available for a user to view and download. func (r *Request) SendBackupStatus(backup string, data BackupRequest) error { + // Set the UploadID on the data. + backupUploadIDsMx.Lock() + if v, ok := backupUploadIDs[backup]; ok { + data.UploadID = v + } + backupUploadIDsMx.Unlock() + resp, err := r.Post(fmt.Sprintf("/backups/%s", backup), data) if err != nil { return err diff --git a/config/config_system.go b/config/config_system.go index be540a3..848ddca 100644 --- a/config/config_system.go +++ b/config/config_system.go @@ -59,11 +59,6 @@ type SystemConfiguration struct { // disk usage is not a concern. DiskCheckInterval int64 `default:"150" yaml:"disk_check_interval"` - // Determines if Wings should detect a server that stops with a normal exit code of - // "0" as being crashed if the process stopped without any Wings interaction. E.g. - // the user did not press the stop button, but the process stopped cleanly. - DetectCleanExitAsCrash bool `default:"true" yaml:"detect_clean_exit_as_crash"` - // If set to true, file permissions for a server will be checked when the process is // booted. This can cause boot delays if the server has a large amount of files. In most // cases disabling this should not have any major impact unless external processes are @@ -78,6 +73,20 @@ type SystemConfiguration struct { WebsocketLogCount int `default:"150" yaml:"websocket_log_count"` Sftp SftpConfiguration `yaml:"sftp"` + + CrashDetection CrashDetection `yaml:"crash_detection"` +} + +type CrashDetection struct { + // Determines if Wings should detect a server that stops with a normal exit code of + // "0" as being crashed if the process stopped without any Wings interaction. E.g. + // the user did not press the stop button, but the process stopped cleanly. + DetectCleanExitAsCrash bool `default:"true" yaml:"detect_clean_exit_as_crash"` + + // Timeout specifies the timeout between crashes that will not cause the server + // to be automatically restarted, this value is used to prevent servers from + // becoming stuck in a boot-loop after multiple consecutive crashes. + Timeout int `default:"60" json:"timeout"` } // Ensures that all of the system directories exist on the system. These directories are diff --git a/loggers/cli/cli.go b/loggers/cli/cli.go index 0710cdf..46fc403 100644 --- a/loggers/cli/cli.go +++ b/loggers/cli/cli.go @@ -95,7 +95,7 @@ func getErrorStack(err error, i bool) errors.StackTrace { // The errors.WrapIf did not return a interface compatible with `tracer`, so // we don't have an easy way to get the stacktrace, this should probably be changed // at some point, but without this the application may panic when handling some errors. - return nil + return errors.WithStack(err).(tracer).StackTrace() } return getErrorStack(errors.WithMessage(err, err.Error()), true) diff --git a/server/backup.go b/server/backup.go index ad0f6c8..ea353c5 100644 --- a/server/backup.go +++ b/server/backup.go @@ -13,8 +13,7 @@ import ( // Notifies the panel of a backup's state and returns an error if one is encountered // while performing this action. func (s *Server) notifyPanelOfBackup(uuid string, ad *backup.ArchiveDetails, successful bool) error { - err := api.New().SendBackupStatus(uuid, ad.ToRequest(successful)) - if err != nil { + if err := api.New().SendBackupStatus(uuid, ad.ToRequest(successful)); err != nil { if !api.IsRequestError(err) { s.Log().WithFields(log.Fields{ "backup": uuid, diff --git a/server/backup/backup_s3.go b/server/backup/backup_s3.go index a9e7799..a5b3d51 100644 --- a/server/backup/backup_s3.go +++ b/server/backup/backup_s3.go @@ -1,7 +1,6 @@ package backup import ( - "bytes" "context" "fmt" "github.com/apex/log" @@ -10,7 +9,6 @@ import ( "net/http" "os" "strconv" - "time" ) type S3Backup struct { @@ -75,10 +73,12 @@ func (s *S3Backup) generateRemoteRequest(rc io.ReadCloser) error { return err } - log.WithFields(log.Fields{ + l := log.WithFields(log.Fields{ "backup_id": s.Uuid, "adapter": "s3", - }).Info("attempting to upload backup..") + }) + + l.Info("attempting to upload backup..") handlePart := func(part string, size int64) (string, error) { r, err := http.NewRequest(http.MethodPut, part, nil) @@ -91,7 +91,7 @@ func (s *S3Backup) generateRemoteRequest(rc io.ReadCloser) error { r.Header.Add("Content-Type", "application/x-gzip") // Limit the reader to the size of the part. - r.Body = Reader{io.LimitReader(rc, size)} + r.Body = Reader{Reader: io.LimitReader(rc, size)} // This http request can block forever due to it not having a timeout, // but we are uploading up to 5GB of data, so there is not really @@ -111,10 +111,6 @@ func (s *S3Backup) generateRemoteRequest(rc io.ReadCloser) error { return res.Header.Get("ETag"), nil } - // Start assembling the body that will be sent as apart of the CompleteMultipartUpload request. - var completeUploadBody bytes.Buffer - completeUploadBody.WriteString("\n") - partCount := len(urls.Parts) for i, part := range urls.Parts { // Get the size for the current part. @@ -128,67 +124,14 @@ func (s *S3Backup) generateRemoteRequest(rc io.ReadCloser) error { } // Attempt to upload the part. - etag, err := handlePart(part, partSize) + _, err := handlePart(part, partSize) if err != nil { - log.WithError(err).Warn("failed to upload part") - - // Send an AbortMultipartUpload request. - if err := s.finishUpload(urls.AbortMultipartUpload, nil); err != nil { - log.WithError(err).Warn("failed to abort multipart backup upload") - } - + l.WithField("part_id", part).WithError(err).Warn("failed to upload part") return err } - - // Add the part to the CompleteMultipartUpload body. - completeUploadBody.WriteString("\t\n") - completeUploadBody.WriteString("\t\t\"" + etag + "\"\n") - completeUploadBody.WriteString("\t\t" + strconv.Itoa(i+1) + "\n") - completeUploadBody.WriteString("\t\n") - } - completeUploadBody.WriteString("") - - // Send a CompleteMultipartUpload request. - if err := s.finishUpload(urls.CompleteMultipartUpload, &completeUploadBody); err != nil { - return err } - log.WithFields(log.Fields{ - "backup_id": s.Uuid, - "adapter": "s3", - }).Info("backup has been successfully uploaded") - return nil -} - -// finishUpload sends a requests to the specified url to either complete or abort the upload. -func (s *S3Backup) finishUpload(url string, body io.Reader) error { - r, err := http.NewRequest(http.MethodPost, url, body) - if err != nil { - return err - } - - // Create a new http client with a 10 second timeout. - c := &http.Client{ - Timeout: 10 * time.Second, - } - - res, err := c.Do(r) - if err != nil { - return err - } - defer res.Body.Close() - - // Handle non-200 status codes. - if res.StatusCode != http.StatusOK { - // If no body was sent, we were aborting the upload. - if body == nil { - return fmt.Errorf("failed to abort S3 multipart upload, %d:%s", res.StatusCode, res.Status) - } - - // If a body was sent we were completing the upload. - // TODO: Attempt to send abort request? - return fmt.Errorf("failed to complete S3 multipart upload, %d:%s", res.StatusCode, res.Status) - } + l.Info("backup has been successfully uploaded") return nil } diff --git a/server/crash.go b/server/crash.go index f945bea..ddb8ecb 100644 --- a/server/crash.go +++ b/server/crash.go @@ -4,6 +4,7 @@ import ( "fmt" "github.com/pterodactyl/wings/config" "github.com/pterodactyl/wings/environment" + "strconv" "sync" "time" ) @@ -47,8 +48,7 @@ func (s *Server) handleServerCrash() error { if s.Environment.State() != environment.ProcessOfflineState || !s.Config().CrashDetectionEnabled { if !s.Config().CrashDetectionEnabled { s.Log().Debug("server triggered crash detection but handler is disabled for server process") - - s.PublishConsoleOutputFromDaemon("Server detected as crashed; crash detection is disabled for this instance.") + s.PublishConsoleOutputFromDaemon("Aborting automatic restart, crash detection is disabled for this instance.") } return nil @@ -61,9 +61,8 @@ func (s *Server) handleServerCrash() error { // If the system is not configured to detect a clean exit code as a crash, and the // crash is not the result of the program running out of memory, do nothing. - if exitCode == 0 && !oomKilled && !config.Get().System.DetectCleanExitAsCrash { + if exitCode == 0 && !oomKilled && !config.Get().System.CrashDetection.DetectCleanExitAsCrash { s.Log().Debug("server exited with successful exit code; system is configured to not detect this as a crash") - return nil } @@ -72,11 +71,14 @@ func (s *Server) handleServerCrash() error { s.PublishConsoleOutputFromDaemon(fmt.Sprintf("Out of memory: %t", oomKilled)) c := s.crasher.LastCrashTime() - // If the last crash time was within the last 60 seconds we do not want to perform - // an automatic reboot of the process. Return an error that can be handled. - if !c.IsZero() && c.Add(time.Second*60).After(time.Now()) { - s.PublishConsoleOutputFromDaemon("Aborting automatic reboot: last crash occurred less than 60 seconds ago.") + timeout := config.Get().System.CrashDetection.Timeout + // If the last crash time was within the last `timeout` seconds we do not want to perform + // an automatic reboot of the process. Return an error that can be handled. + // + // If timeout is set to 0, always reboot the server (this is probably a terrible idea, but some people want it) + if timeout != 0 && !c.IsZero() && c.Add(time.Second*time.Duration(config.Get().System.CrashDetection.Timeout)).After(time.Now()) { + s.PublishConsoleOutputFromDaemon("Aborting automatic restart, last crash occurred less than " + strconv.Itoa(timeout) + " seconds ago.") return &crashTooFrequent{} }