Compare commits

..

1 Commits

Author SHA1 Message Date
Matthew Penner
dafbbab2ed metrics: initial commit 2021-06-22 08:17:02 -06:00
10 changed files with 172 additions and 8 deletions

View File

@@ -1,9 +1,5 @@
# Changelog
## v1.4.5
### Changed
* Upped the process limit for a container from `256` to `512` in order to address edge-cases for some games that spawn a lot of processes.
## v1.4.4
### Added
* **[security]** Adds support for limiting the total number of pids any one container can have active at once to prevent malicious users from impacting other instances on the same node.

View File

@@ -4,6 +4,7 @@ import (
"crypto/tls"
"errors"
"fmt"
"github.com/pterodactyl/wings/metrics"
log2 "log"
"net/http"
"os"
@@ -137,6 +138,9 @@ func rootCmdRun(cmd *cobra.Command, _ []string) {
"gid": config.Get().System.User.Gid,
}).Info("configured system user successfully")
done := make(chan bool)
go metrics.Initialize(done)
pclient := remote.New(
config.Get().PanelLocation,
remote.WithCredentials(config.Get().AuthenticationTokenId, config.Get().AuthenticationToken),
@@ -199,6 +203,12 @@ func rootCmdRun(cmd *cobra.Command, _ []string) {
continue
}
if states[s.Id()] == environment.ProcessRunningState {
metrics.ServerStatus.WithLabelValues(s.Id()).Set(1)
} else {
metrics.ServerStatus.WithLabelValues(s.Id()).Set(0)
}
pool.Submit(func() {
s.Log().Info("configuring server environment and restoring to previous state")
var st string
@@ -346,6 +356,7 @@ func rootCmdRun(cmd *cobra.Command, _ []string) {
if err := s.ListenAndServe(); err != nil {
log.WithField("error", err).Fatal("failed to configure HTTP server")
}
<-done
}
// Reads the configuration from the disk and then sets up the global singleton

View File

@@ -91,6 +91,12 @@ type ApiConfiguration struct {
UploadLimit int `default:"100" json:"upload_limit" yaml:"upload_limit"`
}
// MetricsConfiguration .
type MetricsConfiguration struct {
// Bind .
Bind string `default:":9000" yaml:"bind"`
}
// RemoteQueryConfiguration defines the configuration settings for remote requests
// from Wings to the Panel.
type RemoteQueryConfiguration struct {
@@ -260,9 +266,10 @@ type Configuration struct {
// validate against it.
AuthenticationToken string `json:"token" yaml:"token"`
Api ApiConfiguration `json:"api" yaml:"api"`
System SystemConfiguration `json:"system" yaml:"system"`
Docker DockerConfiguration `json:"docker" yaml:"docker"`
Api ApiConfiguration `json:"api" yaml:"api"`
System SystemConfiguration `json:"system" yaml:"system"`
Docker DockerConfiguration `json:"docker" yaml:"docker"`
Metrics MetricsConfiguration `json:"metrics" yaml:"metrics"`
// Defines internal throttling configurations for server processes to prevent
// someone from running an endless loop that spams data to logs.

View File

@@ -60,7 +60,7 @@ type DockerConfiguration struct {
// at any given moment. This is a security concern in shared-hosting environments where a
// malicious process could create enough processes to cause the host node to run out of
// available pids and crash.
ContainerPidLimit int64 `default:"512" json:"container_pid_limit" yaml:"container_pid_limit"`
ContainerPidLimit int64 `default:"256" json:"container_pid_limit" yaml:"container_pid_limit"`
// InstallLimits defines the limits on the installer containers that prevents a server's
// installation process from unintentionally consuming more resources than expected. This

View File

@@ -3,6 +3,7 @@ package docker
import (
"context"
"fmt"
"github.com/pterodactyl/wings/metrics"
"io"
"sync"
@@ -212,5 +213,15 @@ func (e *Environment) SetState(state string) {
// If the state changed make sure we update the internal tracking to note that.
e.st.Store(state)
e.Events().Publish(environment.StateChangeEvent, state)
if state == environment.ProcessRunningState || state == environment.ProcessOfflineState {
val := 0
if state == environment.ProcessRunningState {
val = 1
} else {
metrics.ResetServer(e.Id)
}
metrics.ServerStatus.WithLabelValues(e.Id).Set(float64(val))
}
}
}

View File

@@ -6,6 +6,7 @@ import (
"encoding/json"
"github.com/docker/docker/api/types"
"github.com/pterodactyl/wings/environment"
"github.com/pterodactyl/wings/metrics"
"io"
"math"
)
@@ -60,6 +61,11 @@ func (e *Environment) pollResources(ctx context.Context) error {
st.Network.TxBytes += nw.TxBytes
}
metrics.ServerCPU.WithLabelValues(e.Id).Set(st.CpuAbsolute)
metrics.ServerMemory.WithLabelValues(e.Id).Set(float64(st.Memory))
metrics.ServerNetworkRx.WithLabelValues(e.Id).Set(float64(st.Network.RxBytes))
metrics.ServerNetworkTx.WithLabelValues(e.Id).Set(float64(st.Network.TxBytes))
if b, err := json.Marshal(st); err != nil {
e.log().WithField("error", err).Warn("error while marshaling stats object for environment")
} else {

107
metrics/metrics.go Normal file
View File

@@ -0,0 +1,107 @@
package metrics
import (
"github.com/apex/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/pterodactyl/wings/config"
"net/http"
"time"
)
type Metrics struct {
handler http.Handler
}
const (
namespace = "pterodactyl"
subsystem = "wings"
)
var (
bootTimeSeconds = promauto.NewGauge(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "boot_time_seconds",
Help: "Boot time of this instance since epoch (1970)",
})
timeSeconds = promauto.NewGauge(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "time_seconds",
Help: "System time in seconds since epoch (1970)",
})
ServerStatus = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "server_status",
}, []string{"server_id"})
ServerCPU = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "server_cpu",
}, []string{"server_id"})
ServerMemory = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "server_memory",
}, []string{"server_id"})
ServerNetworkRx = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "server_network_rx",
}, []string{"server_id"})
ServerNetworkTx = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "server_network_tx",
}, []string{"server_id"})
HTTPRequestsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "http_requests_total",
}, []string{"method", "route_path", "raw_path", "raw_query", "code"})
)
func Initialize(done chan bool) {
bootTimeSeconds.Set(float64(time.Now().UnixNano()) / 1e9)
ticker := time.NewTicker(time.Second)
go func() {
defer ticker.Stop()
for {
select {
case <-done:
// Received a "signal" on the done channel.
log.Debug("metrics: done")
return
case t := <-ticker.C:
// Update the current time.
timeSeconds.Set(float64(t.UnixNano()) / 1e9)
}
}
}()
if err := http.ListenAndServe(config.Get().Metrics.Bind, promhttp.Handler()); err != nil && err != http.ErrServerClosed {
log.WithField("error", err).Error("failed to start metrics server")
}
}
// DeleteServer will remove any existing labels from being scraped by Prometheus.
// Any previously scraped data will still be persisted by Prometheus.
func DeleteServer(sID string) {
ServerStatus.DeleteLabelValues(sID)
ServerCPU.DeleteLabelValues(sID)
ServerMemory.DeleteLabelValues(sID)
ServerNetworkRx.DeleteLabelValues(sID)
ServerNetworkTx.DeleteLabelValues(sID)
}
// ResetServer will reset a server's metrics to their default values except the status.
func ResetServer(sID string) {
ServerCPU.WithLabelValues(sID).Set(0)
ServerMemory.WithLabelValues(sID).Set(0)
ServerNetworkRx.WithLabelValues(sID).Set(0)
ServerNetworkTx.WithLabelValues(sID).Set(0)
}

View File

@@ -3,9 +3,11 @@ package middleware
import (
"context"
"crypto/subtle"
"github.com/pterodactyl/wings/metrics"
"io"
"net/http"
"os"
"strconv"
"strings"
"emperror.dev/errors"
@@ -352,3 +354,19 @@ func ExtractManager(c *gin.Context) *server.Manager {
}
panic("middleware/middleware: cannot extract server manager: not present in context")
}
func Metrics() gin.HandlerFunc {
return func(c *gin.Context) {
path := c.Request.URL.Path
rawQuery := c.Request.URL.RawQuery
c.Next()
// Skip over the server websocket endpoint.
if strings.HasSuffix(c.FullPath(), "/ws") {
return
}
metrics.HTTPRequestsTotal.WithLabelValues(c.Request.Method, c.FullPath(), path, rawQuery, strconv.Itoa(c.Writer.Status())).Inc()
}
}

View File

@@ -14,6 +14,7 @@ func Configure(m *server.Manager, client remote.Client) *gin.Engine {
router := gin.New()
router.Use(gin.Recovery())
router.Use(middleware.Metrics())
router.Use(middleware.AttachRequestID(), middleware.CaptureErrors(), middleware.SetAccessControlHeaders())
router.Use(middleware.AttachServerManager(m), middleware.AttachApiClient(client))
// @todo log this into a different file so you can setup IP blocking for abusive requests and such.

View File

@@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"fmt"
"github.com/pterodactyl/wings/metrics"
"io"
"io/ioutil"
"os"
@@ -72,6 +73,9 @@ func (m *Manager) Add(s *Server) {
m.mu.Lock()
m.servers = append(m.servers, s)
m.mu.Unlock()
// Add the server to the metrics with a offline status.
metrics.ServerStatus.WithLabelValues(s.Id()).Set(0)
}
// Get returns a single server instance and a boolean value indicating if it was
@@ -117,6 +121,9 @@ func (m *Manager) Remove(filter func(match *Server) bool) {
for _, v := range m.servers {
if !filter(v) {
r = append(r, v)
} else {
// Delete the server from the metric.
metrics.DeleteServer(v.Id())
}
}
m.servers = r