From 4da46f3c4a31f2d86deb6307dc92a2eec5bd7c5b Mon Sep 17 00:00:00 2001 From: Hector Date: Mon, 30 Aug 2021 07:19:11 +0000 Subject: [PATCH] feat: export metrics with socket errors Add new metric to collect the number of errors found when connecting to the fail2ban server socket. Errors are split into two categories: connection errors (e.g. socket file not found), and request errors (e.g. invalid response received from server). Update the `up` metric to return `0` if the socket connection fails. Improve error logging. --- src/exporter.go | 61 ++++++++++++++++++++++++++++-------- src/socket/fail2banSocket.go | 17 +++++----- 2 files changed, 57 insertions(+), 21 deletions(-) diff --git a/src/exporter.go b/src/exporter.go index 007fb51..d1d8bab 100644 --- a/src/exporter.go +++ b/src/exporter.go @@ -50,7 +50,12 @@ var ( []string{"type"}, nil, ) - metricServerPing = prometheus.NewDesc( + metricErrorCountNew = prometheus.NewDesc( + prometheus.BuildFQName(sockNamespace, "", "errors"), + "Number of errors found since startup", + []string{"type"}, nil, + ) + metricServerUp = prometheus.NewDesc( prometheus.BuildFQName(sockNamespace, "", "up"), "Check if the fail2ban server is up", nil, nil, @@ -83,10 +88,12 @@ var ( ) type Exporter struct { - db *fail2banDb.Fail2BanDB - socketPath string - lastError error - dbErrorCount int + db *fail2banDb.Fail2BanDB + socketPath string + lastError error + dbErrorCount int + socketConnectionErrorCount int + socketRequestErrorCount int } func (e *Exporter) Describe(ch chan<- *prometheus.Desc) { @@ -98,13 +105,14 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) { ch <- metricErrorCount } if e.socketPath != "" { - ch <- metricServerPing + ch <- metricServerUp ch <- metricJailCount ch <- metricJailFailedCurrent ch <- metricJailFailedTotal ch <- metricJailBannedCurrent ch <- metricJailBannedTotal } + ch <- metricErrorCountNew } func (e *Exporter) Collect(ch chan<- prometheus.Metric) { @@ -119,12 +127,16 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) { s, err := socket.ConnectToSocket(e.socketPath) if err != nil { log.Printf("error opening socket: %v", err) + e.socketConnectionErrorCount++ } else { defer s.Close() - e.collectServerPingMetric(ch, s) + } + e.collectServerUpMetric(ch, s) + if err == nil && s != nil { e.collectJailMetrics(ch, s) } } + e.collectErrorCountMetricNew(ch) } func (e *Exporter) collectUpMetric(ch chan<- prometheus.Metric) { @@ -191,20 +203,42 @@ func (e *Exporter) collectEnabledJailMetrics(ch chan<- prometheus.Metric) { } } -func (e *Exporter) collectServerPingMetric(ch chan<- prometheus.Metric, s *socket.Fail2BanSocket) { - pingSuccess := s.Ping() - var pingSuccessInt float64 = 1 - if !pingSuccess { - pingSuccessInt = 0 +func (e *Exporter) collectErrorCountMetricNew(ch chan<- prometheus.Metric) { + ch <- prometheus.MustNewConstMetric( + metricErrorCountNew, prometheus.CounterValue, float64(e.dbErrorCount), "db", + ) + ch <- prometheus.MustNewConstMetric( + metricErrorCountNew, prometheus.CounterValue, float64(e.socketConnectionErrorCount), "socket_conn", + ) + ch <- prometheus.MustNewConstMetric( + metricErrorCountNew, prometheus.CounterValue, float64(e.socketRequestErrorCount), "socket_req", + ) +} + +func (e *Exporter) collectServerUpMetric(ch chan<- prometheus.Metric, s *socket.Fail2BanSocket) { + var serverUp float64 = 0 + if s != nil { + pingSuccess, err := s.Ping() + if err != nil { + e.socketRequestErrorCount++ + log.Print(err) + } + if err == nil && pingSuccess { + serverUp = 1 + } } ch <- prometheus.MustNewConstMetric( - metricServerPing, prometheus.GaugeValue, pingSuccessInt, + metricServerUp, prometheus.GaugeValue, serverUp, ) } func (e *Exporter) collectJailMetrics(ch chan<- prometheus.Metric, s *socket.Fail2BanSocket) { jails, err := s.GetJails() var count float64 = 0 + if err != nil { + e.socketRequestErrorCount++ + log.Print(err) + } if err == nil { count = float64(len(jails)) } @@ -220,6 +254,7 @@ func (e *Exporter) collectJailMetrics(ch chan<- prometheus.Metric, s *socket.Fai func (e *Exporter) collectJailStatsMetric(ch chan<- prometheus.Metric, s *socket.Fail2BanSocket, jail string) { stats, err := s.GetJailStats(jail) if err != nil { + e.socketRequestErrorCount++ log.Printf("failed to get stats for jail %s: %v", jail, err) return } diff --git a/src/socket/fail2banSocket.go b/src/socket/fail2banSocket.go index 493d1e3..4f720ad 100644 --- a/src/socket/fail2banSocket.go +++ b/src/socket/fail2banSocket.go @@ -4,7 +4,6 @@ import ( "fmt" "github.com/kisielk/og-rek" "github.com/nlpodyssey/gopickle/types" - "log" "net" "strings" ) @@ -36,21 +35,19 @@ func (s *Fail2BanSocket) Close() error { return s.socket.Close() } -func (s *Fail2BanSocket) Ping() bool { +func (s *Fail2BanSocket) Ping() (bool, error) { response, err := s.sendCommand([]string{pingCommand, "100"}) if err != nil { - log.Printf("server ping failed: %v", err) - return false + return false, newConnectionError(pingCommand, err) } if t, ok := response.(*types.Tuple); ok { if (*t)[1] == "pong" { - return true + return true, nil } - log.Printf("unexpected response data: %s", t) + return false, fmt.Errorf("unexpected response data (expecting 'pong'): %s", (*t)[1]) } - log.Printf("(%s) unexpected response format - cannot parse: %v", pingCommand, response) - return false + return false, newBadFormatError(pingCommand, response) } func (s *Fail2BanSocket) GetJails() ([]string, error) { @@ -125,6 +122,10 @@ func newBadFormatError(command string, data interface{}) error { return fmt.Errorf("(%s) unexpected response format - cannot parse: %v", command, data) } +func newConnectionError(command string, err error) error { + return fmt.Errorf("(%s) failed to send command through socket: %v", command, err) +} + func trimSpaceForAll(slice []string) []string { for i := range slice { slice[i] = strings.TrimSpace(slice[i])