feat: new metric to track error counts

Add a new metric to count the number of database errors that have been
found since startup. This complements the `up` metric to have better
visibility into occasional database errors.
This commit is contained in:
Hector 2021-04-07 21:46:41 +01:00
parent a406e019e2
commit 8726afcd6b
2 changed files with 23 additions and 2 deletions

View File

@ -99,6 +99,7 @@ Access exported metrics at `/metrics` (on the provided port).
Exposed metrics: Exposed metrics:
* `up` - Returns 1 if the service is up * `up` - Returns 1 if the service is up
* `errors` - Returns the number of errors found since startup
* `enabled_jails` - Returns 1 for each jail that is enabled, 0 if disabled. * `enabled_jails` - Returns 1 for each jail that is enabled, 0 if disabled.
* `bad_ips` (per jail) * `bad_ips` (per jail)
* A *bad IP* is defined as an IP that has been banned at least once in the past * A *bad IP* is defined as an IP that has been banned at least once in the past
@ -125,4 +126,7 @@ fail2ban_up 1
# TYPE fail2ban_enabled_jails gauge # TYPE fail2ban_enabled_jails gauge
fail2ban_enabled_jails{jail="jail1"} 1 fail2ban_enabled_jails{jail="jail1"} 1
fail2ban_enabled_jails{jail="jail2"} 1 fail2ban_enabled_jails{jail="jail2"} 1
# HELP fail2ban_errors Number of errors found since startup.
# TYPE fail2ban_errors counter
fail2ban_errors{type="db"} 0
``` ```

View File

@ -39,11 +39,17 @@ var (
"Enabled jails.", "Enabled jails.",
[]string{"jail"}, nil, []string{"jail"}, nil,
) )
metricErrorCount = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "errors"),
"Number of errors found since startup.",
[]string{"type"}, nil,
)
) )
type Exporter struct { type Exporter struct {
db *fail2banDb.Fail2BanDB db *fail2banDb.Fail2BanDB
lastError error lastError error
dbErrorCount int
} }
func (e *Exporter) Describe(ch chan<- *prometheus.Desc) { func (e *Exporter) Describe(ch chan<- *prometheus.Desc) {
@ -51,6 +57,7 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) {
ch <- metricBadIpsPerJail ch <- metricBadIpsPerJail
ch <- metricBannedIpsPerJail ch <- metricBannedIpsPerJail
ch <- metricEnabledJails ch <- metricEnabledJails
ch <- metricErrorCount
} }
func (e *Exporter) Collect(ch chan<- prometheus.Metric) { func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
@ -58,6 +65,7 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
e.collectBannedIpsPerJailMetrics(ch) e.collectBannedIpsPerJailMetrics(ch)
e.collectEnabledJailMetrics(ch) e.collectEnabledJailMetrics(ch)
e.collectUpMetric(ch) e.collectUpMetric(ch)
e.collectErrorCountMetric(ch)
} }
func (e *Exporter) collectUpMetric(ch chan<- prometheus.Metric) { func (e *Exporter) collectUpMetric(ch chan<- prometheus.Metric) {
@ -70,11 +78,18 @@ func (e *Exporter) collectUpMetric(ch chan<- prometheus.Metric) {
) )
} }
func (e *Exporter) collectErrorCountMetric(ch chan<- prometheus.Metric) {
ch <- prometheus.MustNewConstMetric(
metricErrorCount, prometheus.CounterValue, float64(e.dbErrorCount), "db",
)
}
func (e *Exporter) collectBadIpsPerJailMetrics(ch chan<- prometheus.Metric) { func (e *Exporter) collectBadIpsPerJailMetrics(ch chan<- prometheus.Metric) {
jailNameToCountMap, err := e.db.CountBadIpsPerJail() jailNameToCountMap, err := e.db.CountBadIpsPerJail()
e.lastError = err e.lastError = err
if err != nil { if err != nil {
e.dbErrorCount++
log.Print(err) log.Print(err)
} }
@ -90,6 +105,7 @@ func (e *Exporter) collectBannedIpsPerJailMetrics(ch chan<- prometheus.Metric) {
e.lastError = err e.lastError = err
if err != nil { if err != nil {
e.dbErrorCount++
log.Print(err) log.Print(err)
} }
@ -105,6 +121,7 @@ func (e *Exporter) collectEnabledJailMetrics(ch chan<- prometheus.Metric) {
e.lastError = err e.lastError = err
if err != nil { if err != nil {
e.dbErrorCount++
log.Print(err) log.Print(err)
} }