From 8726afcd6bda11c4d246f30f2f656d90b11ff16f Mon Sep 17 00:00:00 2001
From: Hector <dev@hsmith.org>
Date: Wed, 7 Apr 2021 21:46:41 +0100
Subject: [PATCH] feat: new metric to track error counts

Add a new metric to count the number of database errors that have been
found since startup. This complements the `up` metric to have better
visibility into occasional database errors.
---
 README.md       |  4 ++++
 src/exporter.go | 21 +++++++++++++++++++--
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 2fd35df..4873284 100644
--- a/README.md
+++ b/README.md
@@ -99,6 +99,7 @@ Access exported metrics at `/metrics` (on the provided port).
 
 Exposed metrics:
 * `up` - Returns 1 if the service is up
+* `errors` - Returns the number of errors found since startup
 * `enabled_jails` - Returns 1 for each jail that is enabled, 0 if disabled.
 * `bad_ips` (per jail)
     * A *bad IP* is defined as an IP that has been banned at least once in the past
@@ -125,4 +126,7 @@ fail2ban_up 1
 # TYPE fail2ban_enabled_jails gauge
 fail2ban_enabled_jails{jail="jail1"} 1
 fail2ban_enabled_jails{jail="jail2"} 1
+# HELP fail2ban_errors Number of errors found since startup.
+# TYPE fail2ban_errors counter
+fail2ban_errors{type="db"} 0
 ```
diff --git a/src/exporter.go b/src/exporter.go
index 5519a40..6cc3ad1 100644
--- a/src/exporter.go
+++ b/src/exporter.go
@@ -39,11 +39,17 @@ var (
 		"Enabled jails.",
 		[]string{"jail"}, nil,
 	)
+	metricErrorCount = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "", "errors"),
+		"Number of errors found since startup.",
+		[]string{"type"}, nil,
+	)
 )
 
 type Exporter struct {
-	db        *fail2banDb.Fail2BanDB
-	lastError error
+	db           *fail2banDb.Fail2BanDB
+	lastError    error
+	dbErrorCount int
 }
 
 func (e *Exporter) Describe(ch chan<- *prometheus.Desc) {
@@ -51,6 +57,7 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) {
 	ch <- metricBadIpsPerJail
 	ch <- metricBannedIpsPerJail
 	ch <- metricEnabledJails
+	ch <- metricErrorCount
 }
 
 func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
@@ -58,6 +65,7 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
 	e.collectBannedIpsPerJailMetrics(ch)
 	e.collectEnabledJailMetrics(ch)
 	e.collectUpMetric(ch)
+	e.collectErrorCountMetric(ch)
 }
 
 func (e *Exporter) collectUpMetric(ch chan<- prometheus.Metric) {
@@ -70,11 +78,18 @@ func (e *Exporter) collectUpMetric(ch chan<- prometheus.Metric) {
 	)
 }
 
+func (e *Exporter) collectErrorCountMetric(ch chan<- prometheus.Metric) {
+	ch <- prometheus.MustNewConstMetric(
+		metricErrorCount, prometheus.CounterValue, float64(e.dbErrorCount), "db",
+	)
+}
+
 func (e *Exporter) collectBadIpsPerJailMetrics(ch chan<- prometheus.Metric) {
 	jailNameToCountMap, err := e.db.CountBadIpsPerJail()
 	e.lastError = err
 
 	if err != nil {
+		e.dbErrorCount++
 		log.Print(err)
 	}
 
@@ -90,6 +105,7 @@ func (e *Exporter) collectBannedIpsPerJailMetrics(ch chan<- prometheus.Metric) {
 	e.lastError = err
 
 	if err != nil {
+		e.dbErrorCount++
 		log.Print(err)
 	}
 
@@ -105,6 +121,7 @@ func (e *Exporter) collectEnabledJailMetrics(ch chan<- prometheus.Metric) {
 	e.lastError = err
 
 	if err != nil {
+		e.dbErrorCount++
 		log.Print(err)
 	}