Add support for disabling repeat notifications
* Add a new database function for getting the tail * Update ShouldNotify() to handle ignoring repeat notifications if set
This commit is contained in:
@@ -2,6 +2,7 @@ package database
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
|
||||||
"github.com/analogj/scrutiny/webapp/backend/pkg"
|
"github.com/analogj/scrutiny/webapp/backend/pkg"
|
||||||
"github.com/analogj/scrutiny/webapp/backend/pkg/models"
|
"github.com/analogj/scrutiny/webapp/backend/pkg/models"
|
||||||
"github.com/analogj/scrutiny/webapp/backend/pkg/models/collector"
|
"github.com/analogj/scrutiny/webapp/backend/pkg/models/collector"
|
||||||
@@ -21,6 +22,7 @@ type DeviceRepo interface {
|
|||||||
|
|
||||||
SaveSmartAttributes(ctx context.Context, wwn string, collectorSmartData collector.SmartInfo) (measurements.Smart, error)
|
SaveSmartAttributes(ctx context.Context, wwn string, collectorSmartData collector.SmartInfo) (measurements.Smart, error)
|
||||||
GetSmartAttributeHistory(ctx context.Context, wwn string, durationKey string, attributes []string) ([]measurements.Smart, error)
|
GetSmartAttributeHistory(ctx context.Context, wwn string, durationKey string, attributes []string) ([]measurements.Smart, error)
|
||||||
|
GetSmartAttributeHistoryTail(ctx context.Context, wwn string, n int, offset int, attributes []string) ([]measurements.Smart, error)
|
||||||
|
|
||||||
SaveSmartTemperature(ctx context.Context, wwn string, deviceProtocol string, collectorSmartData collector.SmartInfo) error
|
SaveSmartTemperature(ctx context.Context, wwn string, deviceProtocol string, collectorSmartData collector.SmartInfo) error
|
||||||
|
|
||||||
|
|||||||
@@ -3,13 +3,14 @@ package database
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/analogj/scrutiny/webapp/backend/pkg/models/collector"
|
"github.com/analogj/scrutiny/webapp/backend/pkg/models/collector"
|
||||||
"github.com/analogj/scrutiny/webapp/backend/pkg/models/measurements"
|
"github.com/analogj/scrutiny/webapp/backend/pkg/models/measurements"
|
||||||
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
|
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
|
||||||
"github.com/influxdata/influxdb-client-go/v2/api"
|
"github.com/influxdata/influxdb-client-go/v2/api"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
"strings"
|
|
||||||
"time"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@@ -84,6 +85,73 @@ func (sr *scrutinyRepository) GetSmartAttributeHistory(ctx context.Context, wwn
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetSmartAttributeHistory MUST return in sorted order, where newest entries are at the beginning of the list, and oldest are at the end.
|
||||||
|
func (sr *scrutinyRepository) GetSmartAttributeHistoryTail(ctx context.Context, wwn string, n int, offset int, attributes []string) ([]measurements.Smart, error) {
|
||||||
|
partialQueryStr := []string{
|
||||||
|
`import "influxdata/influxdb/schema"`,
|
||||||
|
}
|
||||||
|
|
||||||
|
nestedDurationKeys := sr.lookupNestedDurationKeys(DURATION_KEY_FOREVER)
|
||||||
|
subQueryNames := []string{}
|
||||||
|
for _, nestedDurationKey := range nestedDurationKeys {
|
||||||
|
bucketName := sr.lookupBucketName(nestedDurationKey)
|
||||||
|
durationRange := sr.lookupDuration(nestedDurationKey)
|
||||||
|
|
||||||
|
subQueryNames = append(subQueryNames, fmt.Sprintf(`%sData`, nestedDurationKey))
|
||||||
|
partialQueryStr = append(partialQueryStr, []string{
|
||||||
|
fmt.Sprintf(`%sData = from(bucket: "%s")`, nestedDurationKey, bucketName),
|
||||||
|
fmt.Sprintf(`|> range(start: %s, stop: %s)`, durationRange[0], durationRange[1]),
|
||||||
|
`|> filter(fn: (r) => r["_measurement"] == "smart" )`,
|
||||||
|
fmt.Sprintf(`|> filter(fn: (r) => r["device_wwn"] == "%s" )`, wwn),
|
||||||
|
// We only need the last `offset` # of entries from each table to guarantee we can
|
||||||
|
// get the last `n` # of entries starting from `offset` of the union
|
||||||
|
fmt.Sprintf(`|> tail(n: %d)`, offset),
|
||||||
|
"|> schema.fieldsAsCols()",
|
||||||
|
}...)
|
||||||
|
}
|
||||||
|
|
||||||
|
partialQueryStr = append(partialQueryStr, []string{
|
||||||
|
fmt.Sprintf("union(tables: [%s])", strings.Join(subQueryNames, ", ")),
|
||||||
|
"|> group()",
|
||||||
|
`|> sort(columns: ["_time"], desc: false)`,
|
||||||
|
fmt.Sprintf(`|> tail(n: %d, offset: %d)`, n, offset),
|
||||||
|
`|> yield(name: "last")`,
|
||||||
|
}...)
|
||||||
|
|
||||||
|
queryStr := strings.Join(partialQueryStr, "\n")
|
||||||
|
log.Infoln(queryStr)
|
||||||
|
|
||||||
|
smartResults := []measurements.Smart{}
|
||||||
|
|
||||||
|
result, err := sr.influxQueryApi.Query(ctx, queryStr)
|
||||||
|
if err == nil {
|
||||||
|
// Use Next() to iterate over query result lines
|
||||||
|
for result.Next() {
|
||||||
|
// Observe when there is new grouping key producing new table
|
||||||
|
if result.TableChanged() {
|
||||||
|
//fmt.Printf("table: %s\n", result.TableMetadata().String())
|
||||||
|
}
|
||||||
|
|
||||||
|
smartData, err := measurements.NewSmartFromInfluxDB(result.Record().Values())
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
smartResults = append(smartResults, *smartData)
|
||||||
|
|
||||||
|
}
|
||||||
|
if result.Err() != nil {
|
||||||
|
fmt.Printf("Query error: %s\n", result.Err().Error())
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
//we have to sort the smartResults again, because the `union` command will return multiple 'tables' and only sort the records in each table.
|
||||||
|
sortSmartMeasurementsDesc(smartResults)
|
||||||
|
|
||||||
|
return smartResults, nil
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Helper Methods
|
// Helper Methods
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|||||||
@@ -15,11 +15,13 @@ import (
|
|||||||
"github.com/analogj/go-util/utils"
|
"github.com/analogj/go-util/utils"
|
||||||
"github.com/analogj/scrutiny/webapp/backend/pkg"
|
"github.com/analogj/scrutiny/webapp/backend/pkg"
|
||||||
"github.com/analogj/scrutiny/webapp/backend/pkg/config"
|
"github.com/analogj/scrutiny/webapp/backend/pkg/config"
|
||||||
|
"github.com/analogj/scrutiny/webapp/backend/pkg/database"
|
||||||
"github.com/analogj/scrutiny/webapp/backend/pkg/models"
|
"github.com/analogj/scrutiny/webapp/backend/pkg/models"
|
||||||
"github.com/analogj/scrutiny/webapp/backend/pkg/models/measurements"
|
"github.com/analogj/scrutiny/webapp/backend/pkg/models/measurements"
|
||||||
"github.com/analogj/scrutiny/webapp/backend/pkg/thresholds"
|
"github.com/analogj/scrutiny/webapp/backend/pkg/thresholds"
|
||||||
"github.com/containrrr/shoutrrr"
|
"github.com/containrrr/shoutrrr"
|
||||||
shoutrrrTypes "github.com/containrrr/shoutrrr/pkg/types"
|
shoutrrrTypes "github.com/containrrr/shoutrrr/pkg/types"
|
||||||
|
"github.com/gin-gonic/gin"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"golang.org/x/sync/errgroup"
|
"golang.org/x/sync/errgroup"
|
||||||
)
|
)
|
||||||
@@ -30,7 +32,7 @@ const NotifyFailureTypeSmartFailure = "SmartFailure"
|
|||||||
const NotifyFailureTypeScrutinyFailure = "ScrutinyFailure"
|
const NotifyFailureTypeScrutinyFailure = "ScrutinyFailure"
|
||||||
|
|
||||||
// ShouldNotify check if the error Message should be filtered (level mismatch or filtered_attributes)
|
// ShouldNotify check if the error Message should be filtered (level mismatch or filtered_attributes)
|
||||||
func ShouldNotify(device models.Device, smartAttrs measurements.Smart, statusThreshold pkg.MetricsStatusThreshold, statusFilterAttributes pkg.MetricsStatusFilterAttributes) bool {
|
func ShouldNotify(device models.Device, smartAttrs measurements.Smart, statusThreshold pkg.MetricsStatusThreshold, statusFilterAttributes pkg.MetricsStatusFilterAttributes, repeatNotifications bool, c *gin.Context, deviceRepo database.DeviceRepo) bool {
|
||||||
// 1. check if the device is healthy
|
// 1. check if the device is healthy
|
||||||
if device.DeviceStatus == pkg.DeviceStatusPassed {
|
if device.DeviceStatus == pkg.DeviceStatusPassed {
|
||||||
return false
|
return false
|
||||||
@@ -54,52 +56,79 @@ func ShouldNotify(device models.Device, smartAttrs measurements.Smart, statusThr
|
|||||||
requiredAttrStatus = pkg.AttributeStatusFailedScrutiny
|
requiredAttrStatus = pkg.AttributeStatusFailedScrutiny
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2. check if the attributes that are failing should be filtered (non-critical)
|
// This is the only case where individual attributes need not be considered
|
||||||
// 3. for any unfiltered attribute, store the failure reason (Smart or Scrutiny)
|
if statusFilterAttributes == pkg.MetricsStatusFilterAttributesAll && repeatNotifications {
|
||||||
if statusFilterAttributes == pkg.MetricsStatusFilterAttributesCritical {
|
return pkg.DeviceStatusHas(device.DeviceStatus, requiredDeviceStatus)
|
||||||
hasFailingCriticalAttr := false
|
|
||||||
var statusFailingCriticalAttr pkg.AttributeStatus
|
|
||||||
|
|
||||||
for attrId, attrData := range smartAttrs.Attributes {
|
|
||||||
//find failing attribute
|
|
||||||
if attrData.GetStatus() == pkg.AttributeStatusPassed {
|
|
||||||
continue //skip all passing attributes
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// merge the status's of all critical attributes
|
var failingAttributes []string
|
||||||
statusFailingCriticalAttr = pkg.AttributeStatusSet(statusFailingCriticalAttr, attrData.GetStatus())
|
for attrId, attrData := range smartAttrs.Attributes {
|
||||||
|
var status pkg.AttributeStatus = attrData.GetStatus()
|
||||||
|
if status == pkg.AttributeStatusPassed {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
//found a failing attribute, see if its critical
|
if statusFilterAttributes == pkg.MetricsStatusFilterAttributesCritical {
|
||||||
if device.IsScsi() && thresholds.ScsiMetadata[attrId].Critical {
|
critical := false
|
||||||
hasFailingCriticalAttr = true
|
if device.IsScsi() {
|
||||||
} else if device.IsNvme() && thresholds.NmveMetadata[attrId].Critical {
|
critical = thresholds.ScsiMetadata[attrId].Critical
|
||||||
hasFailingCriticalAttr = true
|
} else if device.IsNvme() {
|
||||||
|
critical = thresholds.NmveMetadata[attrId].Critical
|
||||||
} else {
|
} else {
|
||||||
//this is ATA
|
//this is ATA
|
||||||
attrIdInt, err := strconv.Atoi(attrId)
|
attrIdInt, err := strconv.Atoi(attrId)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if thresholds.AtaMetadata[attrIdInt].Critical {
|
critical = thresholds.AtaMetadata[attrIdInt].Critical
|
||||||
hasFailingCriticalAttr = true
|
}
|
||||||
|
if !critical {
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
failingAttributes = append(failingAttributes, attrId)
|
||||||
}
|
}
|
||||||
|
|
||||||
if !hasFailingCriticalAttr {
|
if repeatNotifications {
|
||||||
//no critical attributes are failing, and notifyFilterAttributes == "critical"
|
lastPoints, err := deviceRepo.GetSmartAttributeHistoryTail(c, c.Param("wwn"), 1, 1, failingAttributes)
|
||||||
|
if err == nil && len(lastPoints) > 1 {
|
||||||
|
for _, attrId := range failingAttributes {
|
||||||
|
if old, ok := lastPoints[0].Attributes[attrId].(*measurements.SmartAtaAttribute); ok {
|
||||||
|
if current, ok := smartAttrs.Attributes[attrId].(*measurements.SmartAtaAttribute); ok {
|
||||||
|
if old.TransformedValue != current.TransformedValue {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if old, ok := lastPoints[0].Attributes[attrId].(*measurements.SmartNvmeAttribute); ok {
|
||||||
|
if current, ok := smartAttrs.Attributes[attrId].(*measurements.SmartNvmeAttribute); ok {
|
||||||
|
if old.TransformedValue != current.TransformedValue {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if old, ok := lastPoints[0].Attributes[attrId].(*measurements.SmartScsiAttribute); ok {
|
||||||
|
if current, ok := smartAttrs.Attributes[attrId].(*measurements.SmartScsiAttribute); ok {
|
||||||
|
if old.TransformedValue != current.TransformedValue {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
return false
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
} else {
|
} else {
|
||||||
// check if any of the critical attributes have a status that we're looking for
|
for _, attr := range failingAttributes {
|
||||||
return pkg.AttributeStatusHas(statusFailingCriticalAttr, requiredAttrStatus)
|
attrStatus := smartAttrs.Attributes[attr].GetStatus()
|
||||||
|
if pkg.AttributeStatusHas(attrStatus, requiredAttrStatus) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
return false
|
||||||
// 2. SKIP - we are processing every attribute.
|
|
||||||
// 3. check if the device failure level matches the wanted failure level.
|
|
||||||
return pkg.DeviceStatusHas(device.DeviceStatus, requiredDeviceStatus)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: include user label for device.
|
// TODO: include user label for device.
|
||||||
|
|||||||
@@ -2,6 +2,8 @@ package handler
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
|
||||||
"github.com/analogj/scrutiny/webapp/backend/pkg"
|
"github.com/analogj/scrutiny/webapp/backend/pkg"
|
||||||
"github.com/analogj/scrutiny/webapp/backend/pkg/config"
|
"github.com/analogj/scrutiny/webapp/backend/pkg/config"
|
||||||
"github.com/analogj/scrutiny/webapp/backend/pkg/database"
|
"github.com/analogj/scrutiny/webapp/backend/pkg/database"
|
||||||
@@ -9,7 +11,6 @@ import (
|
|||||||
"github.com/analogj/scrutiny/webapp/backend/pkg/notify"
|
"github.com/analogj/scrutiny/webapp/backend/pkg/notify"
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"net/http"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func UploadDeviceMetrics(c *gin.Context) {
|
func UploadDeviceMetrics(c *gin.Context) {
|
||||||
@@ -73,6 +74,9 @@ func UploadDeviceMetrics(c *gin.Context) {
|
|||||||
smartData,
|
smartData,
|
||||||
pkg.MetricsStatusThreshold(appConfig.GetInt(fmt.Sprintf("%s.metrics.status_threshold", config.DB_USER_SETTINGS_SUBKEY))),
|
pkg.MetricsStatusThreshold(appConfig.GetInt(fmt.Sprintf("%s.metrics.status_threshold", config.DB_USER_SETTINGS_SUBKEY))),
|
||||||
pkg.MetricsStatusFilterAttributes(appConfig.GetInt(fmt.Sprintf("%s.metrics.status_filter_attributes", config.DB_USER_SETTINGS_SUBKEY))),
|
pkg.MetricsStatusFilterAttributes(appConfig.GetInt(fmt.Sprintf("%s.metrics.status_filter_attributes", config.DB_USER_SETTINGS_SUBKEY))),
|
||||||
|
appConfig.GetBool(fmt.Sprintf("%s.metrics.repeat_notifications", config.DB_USER_SETTINGS_SUBKEY)),
|
||||||
|
c,
|
||||||
|
deviceRepo,
|
||||||
) {
|
) {
|
||||||
//send notifications
|
//send notifications
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user