validate thresholds whenever SMART data is recieved.

This commit is contained in:
Jason Kulatunga
2021-07-25 22:11:07 -07:00
parent 1fc910f41b
commit 80f4660130
12 changed files with 237 additions and 163 deletions
+2
View File
@@ -2,6 +2,7 @@ package database
import ( import (
"context" "context"
"github.com/analogj/scrutiny/webapp/backend/pkg"
"github.com/analogj/scrutiny/webapp/backend/pkg/models" "github.com/analogj/scrutiny/webapp/backend/pkg/models"
"github.com/analogj/scrutiny/webapp/backend/pkg/models/collector" "github.com/analogj/scrutiny/webapp/backend/pkg/models/collector"
"github.com/analogj/scrutiny/webapp/backend/pkg/models/measurements" "github.com/analogj/scrutiny/webapp/backend/pkg/models/measurements"
@@ -16,6 +17,7 @@ type DeviceRepo interface {
RegisterDevice(ctx context.Context, dev models.Device) error RegisterDevice(ctx context.Context, dev models.Device) error
GetDevices(ctx context.Context) ([]models.Device, error) GetDevices(ctx context.Context) ([]models.Device, error)
UpdateDevice(ctx context.Context, wwn string, collectorSmartData collector.SmartInfo) (models.Device, error) UpdateDevice(ctx context.Context, wwn string, collectorSmartData collector.SmartInfo) (models.Device, error)
UpdateDeviceStatus(ctx context.Context, wwn string, status pkg.DeviceStatus) (models.Device, error)
GetDeviceDetails(ctx context.Context, wwn string) (models.Device, error) GetDeviceDetails(ctx context.Context, wwn string) (models.Device, error)
SaveSmartAttributes(ctx context.Context, wwn string, collectorSmartData collector.SmartInfo) (measurements.Smart, error) SaveSmartAttributes(ctx context.Context, wwn string, collectorSmartData collector.SmartInfo) (measurements.Smart, error)
@@ -3,6 +3,7 @@ package database
import ( import (
"context" "context"
"fmt" "fmt"
"github.com/analogj/scrutiny/webapp/backend/pkg"
"github.com/analogj/scrutiny/webapp/backend/pkg/config" "github.com/analogj/scrutiny/webapp/backend/pkg/config"
"github.com/analogj/scrutiny/webapp/backend/pkg/models" "github.com/analogj/scrutiny/webapp/backend/pkg/models"
"github.com/analogj/scrutiny/webapp/backend/pkg/models/collector" "github.com/analogj/scrutiny/webapp/backend/pkg/models/collector"
@@ -163,6 +164,17 @@ func (sr *scrutinyRepository) UpdateDevice(ctx context.Context, wwn string, coll
return device, sr.gormClient.Model(&device).Updates(device).Error return device, sr.gormClient.Model(&device).Updates(device).Error
} }
//Update Device Status
func (sr *scrutinyRepository) UpdateDeviceStatus(ctx context.Context, wwn string, status pkg.DeviceStatus) (models.Device, error) {
var device models.Device
if err := sr.gormClient.WithContext(ctx).Where("wwn = ?", wwn).First(&device).Error; err != nil {
return device, fmt.Errorf("Could not get device from DB", err)
}
device.DeviceStatus = pkg.Set(device.DeviceStatus, status)
return device, sr.gormClient.Model(&device).Updates(device).Error
}
func (sr *scrutinyRepository) GetDeviceDetails(ctx context.Context, wwn string) (models.Device, error) { func (sr *scrutinyRepository) GetDeviceDetails(ctx context.Context, wwn string) (models.Device, error) {
var device models.Device var device models.Device
@@ -434,3 +446,11 @@ func (sr *scrutinyRepository) GetSummary(ctx context.Context) (map[string]*model
return summaries, nil return summaries, nil
} }
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Process Thresholds
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
func (sr *scrutinyRepository) ProcessSmartAttributeThresholds() {
}
+54 -31
View File
@@ -3,8 +3,8 @@ package measurements
import ( import (
"fmt" "fmt"
"github.com/analogj/scrutiny/webapp/backend/pkg" "github.com/analogj/scrutiny/webapp/backend/pkg"
"github.com/analogj/scrutiny/webapp/backend/pkg/metadata"
"github.com/analogj/scrutiny/webapp/backend/pkg/models/collector" "github.com/analogj/scrutiny/webapp/backend/pkg/models/collector"
"github.com/analogj/scrutiny/webapp/backend/pkg/thresholds"
"log" "log"
"strings" "strings"
"time" "time"
@@ -22,6 +22,9 @@ type Smart struct {
//Attributes (fields) //Attributes (fields)
Attributes map[string]SmartAttribute `json:"attrs"` Attributes map[string]SmartAttribute `json:"attrs"`
//status
Status pkg.DeviceStatus
} }
func (sm *Smart) Flatten() (tags map[string]string, fields map[string]interface{}) { func (sm *Smart) Flatten() (tags map[string]string, fields map[string]interface{}) {
@@ -133,6 +136,7 @@ func (sm *Smart) FromCollectorSmartInfo(wwn string, info collector.SmartInfo) er
//generate SmartAtaAttribute entries from Scrutiny Collector Smart data. //generate SmartAtaAttribute entries from Scrutiny Collector Smart data.
func (sm *Smart) ProcessAtaSmartInfo(info collector.SmartInfo) { func (sm *Smart) ProcessAtaSmartInfo(info collector.SmartInfo) {
sm.Status = pkg.DeviceStatusPassed
for _, collectorAttr := range info.AtaSmartAttributes.Table { for _, collectorAttr := range info.AtaSmartAttributes.Table {
attrModel := SmartAtaAttribute{ attrModel := SmartAtaAttribute{
AttributeId: collectorAttr.ID, AttributeId: collectorAttr.ID,
@@ -146,53 +150,72 @@ func (sm *Smart) ProcessAtaSmartInfo(info collector.SmartInfo) {
} }
//now that we've parsed the data from the smartctl response, lets match it against our metadata rules and add additional Scrutiny specific data. //now that we've parsed the data from the smartctl response, lets match it against our metadata rules and add additional Scrutiny specific data.
if smartMetadata, ok := metadata.AtaMetadata[collectorAttr.ID]; ok { if smartMetadata, ok := thresholds.AtaMetadata[collectorAttr.ID]; ok {
attrModel.Name = smartMetadata.DisplayName attrModel.Name = smartMetadata.DisplayName
if smartMetadata.Transform != nil { if smartMetadata.Transform != nil {
attrModel.TransformedValue = smartMetadata.Transform(attrModel.Value, attrModel.RawValue, attrModel.RawString) attrModel.TransformedValue = smartMetadata.Transform(attrModel.Value, attrModel.RawValue, attrModel.RawString)
} }
} }
attrModel.PopulateAttributeStatus()
sm.Attributes[string(collectorAttr.ID)] = &attrModel sm.Attributes[string(collectorAttr.ID)] = &attrModel
if attrModel.Status == pkg.SmartAttributeStatusFailed {
sm.Status = pkg.DeviceStatusFailedScrutiny
}
} }
} }
//generate SmartNvmeAttribute entries from Scrutiny Collector Smart data. //generate SmartNvmeAttribute entries from Scrutiny Collector Smart data.
func (sm *Smart) ProcessNvmeSmartInfo(info collector.SmartInfo) { func (sm *Smart) ProcessNvmeSmartInfo(info collector.SmartInfo) {
sm.Attributes = map[string]SmartAttribute{ sm.Attributes = map[string]SmartAttribute{
"critical_warning": &SmartNvmeAttribute{AttributeId: "critical_warning", Name: "Critical Warning", Value: info.NvmeSmartHealthInformationLog.CriticalWarning, Threshold: 0}, "critical_warning": (&SmartNvmeAttribute{AttributeId: "critical_warning", Name: "Critical Warning", Value: info.NvmeSmartHealthInformationLog.CriticalWarning, Threshold: 0}).PopulateAttributeStatus(),
"temperature": &SmartNvmeAttribute{AttributeId: "temperature", Name: "Temperature", Value: info.NvmeSmartHealthInformationLog.Temperature, Threshold: -1}, "temperature": (&SmartNvmeAttribute{AttributeId: "temperature", Name: "Temperature", Value: info.NvmeSmartHealthInformationLog.Temperature, Threshold: -1}).PopulateAttributeStatus(),
"available_spare": &SmartNvmeAttribute{AttributeId: "available_spare", Name: "Available Spare", Value: info.NvmeSmartHealthInformationLog.AvailableSpare, Threshold: info.NvmeSmartHealthInformationLog.AvailableSpareThreshold}, "available_spare": (&SmartNvmeAttribute{AttributeId: "available_spare", Name: "Available Spare", Value: info.NvmeSmartHealthInformationLog.AvailableSpare, Threshold: info.NvmeSmartHealthInformationLog.AvailableSpareThreshold}).PopulateAttributeStatus(),
"percentage_used": &SmartNvmeAttribute{AttributeId: "percentage_used", Name: "Percentage Used", Value: info.NvmeSmartHealthInformationLog.PercentageUsed, Threshold: 100}, "percentage_used": (&SmartNvmeAttribute{AttributeId: "percentage_used", Name: "Percentage Used", Value: info.NvmeSmartHealthInformationLog.PercentageUsed, Threshold: 100}).PopulateAttributeStatus(),
"data_units_read": &SmartNvmeAttribute{AttributeId: "data_units_read", Name: "Data Units Read", Value: info.NvmeSmartHealthInformationLog.DataUnitsRead, Threshold: -1}, "data_units_read": (&SmartNvmeAttribute{AttributeId: "data_units_read", Name: "Data Units Read", Value: info.NvmeSmartHealthInformationLog.DataUnitsRead, Threshold: -1}).PopulateAttributeStatus(),
"data_units_written": &SmartNvmeAttribute{AttributeId: "data_units_written", Name: "Data Units Written", Value: info.NvmeSmartHealthInformationLog.DataUnitsWritten, Threshold: -1}, "data_units_written": (&SmartNvmeAttribute{AttributeId: "data_units_written", Name: "Data Units Written", Value: info.NvmeSmartHealthInformationLog.DataUnitsWritten, Threshold: -1}).PopulateAttributeStatus(),
"host_reads": &SmartNvmeAttribute{AttributeId: "host_reads", Name: "Host Reads", Value: info.NvmeSmartHealthInformationLog.HostReads, Threshold: -1}, "host_reads": (&SmartNvmeAttribute{AttributeId: "host_reads", Name: "Host Reads", Value: info.NvmeSmartHealthInformationLog.HostReads, Threshold: -1}).PopulateAttributeStatus(),
"host_writes": &SmartNvmeAttribute{AttributeId: "host_writes", Name: "Host Writes", Value: info.NvmeSmartHealthInformationLog.HostWrites, Threshold: -1}, "host_writes": (&SmartNvmeAttribute{AttributeId: "host_writes", Name: "Host Writes", Value: info.NvmeSmartHealthInformationLog.HostWrites, Threshold: -1}).PopulateAttributeStatus(),
"controller_busy_time": &SmartNvmeAttribute{AttributeId: "controller_busy_time", Name: "Controller Busy Time", Value: info.NvmeSmartHealthInformationLog.ControllerBusyTime, Threshold: -1}, "controller_busy_time": (&SmartNvmeAttribute{AttributeId: "controller_busy_time", Name: "Controller Busy Time", Value: info.NvmeSmartHealthInformationLog.ControllerBusyTime, Threshold: -1}).PopulateAttributeStatus(),
"power_cycles": &SmartNvmeAttribute{AttributeId: "power_cycles", Name: "Power Cycles", Value: info.NvmeSmartHealthInformationLog.PowerCycles, Threshold: -1}, "power_cycles": (&SmartNvmeAttribute{AttributeId: "power_cycles", Name: "Power Cycles", Value: info.NvmeSmartHealthInformationLog.PowerCycles, Threshold: -1}).PopulateAttributeStatus(),
"power_on_hours": &SmartNvmeAttribute{AttributeId: "power_on_hours", Name: "Power on Hours", Value: info.NvmeSmartHealthInformationLog.PowerOnHours, Threshold: -1}, "power_on_hours": (&SmartNvmeAttribute{AttributeId: "power_on_hours", Name: "Power on Hours", Value: info.NvmeSmartHealthInformationLog.PowerOnHours, Threshold: -1}).PopulateAttributeStatus(),
"unsafe_shutdowns": &SmartNvmeAttribute{AttributeId: "unsafe_shutdowns", Name: "Unsafe Shutdowns", Value: info.NvmeSmartHealthInformationLog.UnsafeShutdowns, Threshold: -1}, "unsafe_shutdowns": (&SmartNvmeAttribute{AttributeId: "unsafe_shutdowns", Name: "Unsafe Shutdowns", Value: info.NvmeSmartHealthInformationLog.UnsafeShutdowns, Threshold: -1}).PopulateAttributeStatus(),
"media_errors": &SmartNvmeAttribute{AttributeId: "media_errors", Name: "Media Errors", Value: info.NvmeSmartHealthInformationLog.MediaErrors, Threshold: 0}, "media_errors": (&SmartNvmeAttribute{AttributeId: "media_errors", Name: "Media Errors", Value: info.NvmeSmartHealthInformationLog.MediaErrors, Threshold: 0}).PopulateAttributeStatus(),
"num_err_log_entries": &SmartNvmeAttribute{AttributeId: "num_err_log_entries", Name: "Numb Err Log Entries", Value: info.NvmeSmartHealthInformationLog.NumErrLogEntries, Threshold: 0}, "num_err_log_entries": (&SmartNvmeAttribute{AttributeId: "num_err_log_entries", Name: "Numb Err Log Entries", Value: info.NvmeSmartHealthInformationLog.NumErrLogEntries, Threshold: 0}).PopulateAttributeStatus(),
"warning_temp_time": &SmartNvmeAttribute{AttributeId: "warning_temp_time", Name: "Warning Temp Time", Value: info.NvmeSmartHealthInformationLog.WarningTempTime, Threshold: -1}, "warning_temp_time": (&SmartNvmeAttribute{AttributeId: "warning_temp_time", Name: "Warning Temp Time", Value: info.NvmeSmartHealthInformationLog.WarningTempTime, Threshold: -1}).PopulateAttributeStatus(),
"critical_comp_time": &SmartNvmeAttribute{AttributeId: "critical_comp_time", Name: "Critical CompTime", Value: info.NvmeSmartHealthInformationLog.CriticalCompTime, Threshold: -1}, "critical_comp_time": (&SmartNvmeAttribute{AttributeId: "critical_comp_time", Name: "Critical CompTime", Value: info.NvmeSmartHealthInformationLog.CriticalCompTime, Threshold: -1}).PopulateAttributeStatus(),
}
//find analyzed attribute status
for _, val := range sm.Attributes {
if val.GetStatus() == pkg.SmartAttributeStatusFailed {
sm.Status = pkg.DeviceStatusFailedScrutiny
}
} }
} }
//generate SmartScsiAttribute entries from Scrutiny Collector Smart data. //generate SmartScsiAttribute entries from Scrutiny Collector Smart data.
func (sm *Smart) ProcessScsiSmartInfo(info collector.SmartInfo) { func (sm *Smart) ProcessScsiSmartInfo(info collector.SmartInfo) {
sm.Attributes = map[string]SmartAttribute{ sm.Attributes = map[string]SmartAttribute{
"scsi_grown_defect_list": &SmartScsiAttribute{AttributeId: "scsi_grown_defect_list", Name: "Grown Defect List", Value: info.ScsiGrownDefectList, Threshold: 0}, "scsi_grown_defect_list": (&SmartScsiAttribute{AttributeId: "scsi_grown_defect_list", Name: "Grown Defect List", Value: info.ScsiGrownDefectList, Threshold: 0}).PopulateAttributeStatus(),
"read_errors_corrected_by_eccfast": &SmartScsiAttribute{AttributeId: "read_errors_corrected_by_eccfast", Name: "Read Errors Corrected by ECC Fast", Value: info.ScsiErrorCounterLog.Read.ErrorsCorrectedByEccfast, Threshold: -1}, "read_errors_corrected_by_eccfast": (&SmartScsiAttribute{AttributeId: "read_errors_corrected_by_eccfast", Name: "Read Errors Corrected by ECC Fast", Value: info.ScsiErrorCounterLog.Read.ErrorsCorrectedByEccfast, Threshold: -1}).PopulateAttributeStatus(),
"read_errors_corrected_by_eccdelayed": &SmartScsiAttribute{AttributeId: "read_errors_corrected_by_eccdelayed", Name: "Read Errors Corrected by ECC Delayed", Value: info.ScsiErrorCounterLog.Read.ErrorsCorrectedByEccdelayed, Threshold: -1}, "read_errors_corrected_by_eccdelayed": (&SmartScsiAttribute{AttributeId: "read_errors_corrected_by_eccdelayed", Name: "Read Errors Corrected by ECC Delayed", Value: info.ScsiErrorCounterLog.Read.ErrorsCorrectedByEccdelayed, Threshold: -1}).PopulateAttributeStatus(),
"read_errors_corrected_by_rereads_rewrites": &SmartScsiAttribute{AttributeId: "read_errors_corrected_by_rereads_rewrites", Name: "Read Errors Corrected by ReReads/ReWrites", Value: info.ScsiErrorCounterLog.Read.ErrorsCorrectedByRereadsRewrites, Threshold: 0}, "read_errors_corrected_by_rereads_rewrites": (&SmartScsiAttribute{AttributeId: "read_errors_corrected_by_rereads_rewrites", Name: "Read Errors Corrected by ReReads/ReWrites", Value: info.ScsiErrorCounterLog.Read.ErrorsCorrectedByRereadsRewrites, Threshold: 0}).PopulateAttributeStatus(),
"read_total_errors_corrected": &SmartScsiAttribute{AttributeId: "read_total_errors_corrected", Name: "Read Total Errors Corrected", Value: info.ScsiErrorCounterLog.Read.TotalErrorsCorrected, Threshold: -1}, "read_total_errors_corrected": (&SmartScsiAttribute{AttributeId: "read_total_errors_corrected", Name: "Read Total Errors Corrected", Value: info.ScsiErrorCounterLog.Read.TotalErrorsCorrected, Threshold: -1}).PopulateAttributeStatus(),
"read_correction_algorithm_invocations": &SmartScsiAttribute{AttributeId: "read_correction_algorithm_invocations", Name: "Read Correction Algorithm Invocations", Value: info.ScsiErrorCounterLog.Read.CorrectionAlgorithmInvocations, Threshold: -1}, "read_correction_algorithm_invocations": (&SmartScsiAttribute{AttributeId: "read_correction_algorithm_invocations", Name: "Read Correction Algorithm Invocations", Value: info.ScsiErrorCounterLog.Read.CorrectionAlgorithmInvocations, Threshold: -1}).PopulateAttributeStatus(),
"read_total_uncorrected_errors": &SmartScsiAttribute{AttributeId: "read_total_uncorrected_errors", Name: "Read Total Uncorrected Errors", Value: info.ScsiErrorCounterLog.Read.TotalUncorrectedErrors, Threshold: 0}, "read_total_uncorrected_errors": (&SmartScsiAttribute{AttributeId: "read_total_uncorrected_errors", Name: "Read Total Uncorrected Errors", Value: info.ScsiErrorCounterLog.Read.TotalUncorrectedErrors, Threshold: 0}).PopulateAttributeStatus(),
"write_errors_corrected_by_eccfast": &SmartScsiAttribute{AttributeId: "write_errors_corrected_by_eccfast", Name: "Write Errors Corrected by ECC Fast", Value: info.ScsiErrorCounterLog.Write.ErrorsCorrectedByEccfast, Threshold: -1}, "write_errors_corrected_by_eccfast": (&SmartScsiAttribute{AttributeId: "write_errors_corrected_by_eccfast", Name: "Write Errors Corrected by ECC Fast", Value: info.ScsiErrorCounterLog.Write.ErrorsCorrectedByEccfast, Threshold: -1}).PopulateAttributeStatus(),
"write_errors_corrected_by_eccdelayed": &SmartScsiAttribute{AttributeId: "write_errors_corrected_by_eccdelayed", Name: "Write Errors Corrected by ECC Delayed", Value: info.ScsiErrorCounterLog.Write.ErrorsCorrectedByEccdelayed, Threshold: -1}, "write_errors_corrected_by_eccdelayed": (&SmartScsiAttribute{AttributeId: "write_errors_corrected_by_eccdelayed", Name: "Write Errors Corrected by ECC Delayed", Value: info.ScsiErrorCounterLog.Write.ErrorsCorrectedByEccdelayed, Threshold: -1}).PopulateAttributeStatus(),
"write_errors_corrected_by_rereads_rewrites": &SmartScsiAttribute{AttributeId: "write_errors_corrected_by_rereads_rewrites", Name: "Write Errors Corrected by ReReads/ReWrites", Value: info.ScsiErrorCounterLog.Write.ErrorsCorrectedByRereadsRewrites, Threshold: 0}, "write_errors_corrected_by_rereads_rewrites": (&SmartScsiAttribute{AttributeId: "write_errors_corrected_by_rereads_rewrites", Name: "Write Errors Corrected by ReReads/ReWrites", Value: info.ScsiErrorCounterLog.Write.ErrorsCorrectedByRereadsRewrites, Threshold: 0}).PopulateAttributeStatus(),
"write_total_errors_corrected": &SmartScsiAttribute{AttributeId: "write_total_errors_corrected", Name: "Write Total Errors Corrected", Value: info.ScsiErrorCounterLog.Write.TotalErrorsCorrected, Threshold: -1}, "write_total_errors_corrected": (&SmartScsiAttribute{AttributeId: "write_total_errors_corrected", Name: "Write Total Errors Corrected", Value: info.ScsiErrorCounterLog.Write.TotalErrorsCorrected, Threshold: -1}).PopulateAttributeStatus(),
"write_correction_algorithm_invocations": &SmartScsiAttribute{AttributeId: "write_correction_algorithm_invocations", Name: "Write Correction Algorithm Invocations", Value: info.ScsiErrorCounterLog.Write.CorrectionAlgorithmInvocations, Threshold: -1}, "write_correction_algorithm_invocations": (&SmartScsiAttribute{AttributeId: "write_correction_algorithm_invocations", Name: "Write Correction Algorithm Invocations", Value: info.ScsiErrorCounterLog.Write.CorrectionAlgorithmInvocations, Threshold: -1}).PopulateAttributeStatus(),
"write_total_uncorrected_errors": &SmartScsiAttribute{AttributeId: "write_total_uncorrected_errors", Name: "Write Total Uncorrected Errors", Value: info.ScsiErrorCounterLog.Write.TotalUncorrectedErrors, Threshold: 0}, "write_total_uncorrected_errors": (&SmartScsiAttribute{AttributeId: "write_total_uncorrected_errors", Name: "Write Total Uncorrected Errors", Value: info.ScsiErrorCounterLog.Write.TotalUncorrectedErrors, Threshold: 0}).PopulateAttributeStatus(),
}
//find analyzed attribute status
for _, val := range sm.Attributes {
if val.GetStatus() == pkg.SmartAttributeStatusFailed {
sm.Status = pkg.DeviceStatusFailedScrutiny
}
} }
} }
@@ -2,14 +2,12 @@ package measurements
import ( import (
"fmt" "fmt"
"github.com/analogj/scrutiny/webapp/backend/pkg"
"github.com/analogj/scrutiny/webapp/backend/pkg/thresholds"
"strconv" "strconv"
"strings" "strings"
) )
const SmartAttributeStatusPassed = "passed"
const SmartAttributeStatusFailed = "failed"
const SmartAttributeStatusWarning = "warn"
type SmartAtaAttribute struct { type SmartAtaAttribute struct {
AttributeId int `json:"attribute_id"` AttributeId int `json:"attribute_id"`
Name string `json:"name"` Name string `json:"name"`
@@ -27,6 +25,10 @@ type SmartAtaAttribute struct {
FailureRate float64 `json:"failure_rate,omitempty"` FailureRate float64 `json:"failure_rate,omitempty"`
} }
func (sa *SmartAtaAttribute) GetStatus() string {
return sa.Status
}
func (sa *SmartAtaAttribute) Flatten() map[string]interface{} { func (sa *SmartAtaAttribute) Flatten() map[string]interface{} {
idString := strconv.Itoa(sa.AttributeId) idString := strconv.Itoa(sa.AttributeId)
@@ -71,81 +73,82 @@ func (sa *SmartAtaAttribute) Inflate(key string, val interface{}) {
} }
} }
// //populate attribute status, using SMART Thresholds & Observed Metadata
////populate attribute status, using SMART Thresholds & Observed Metadata // Chainable
//func (sa *SmartAtaAttribute) PopulateAttributeStatus() { func (sa *SmartAtaAttribute) PopulateAttributeStatus() *SmartAtaAttribute {
// if strings.ToUpper(sa.WhenFailed) == SmartWhenFailedFailingNow { if strings.ToUpper(sa.WhenFailed) == pkg.SmartWhenFailedFailingNow {
// //this attribute has previously failed //this attribute has previously failed
// sa.Status = SmartAttributeStatusFailed sa.Status = pkg.SmartAttributeStatusFailed
// sa.StatusReason = "Attribute is failing manufacturer SMART threshold" sa.StatusReason = "Attribute is failing manufacturer SMART threshold"
//
// } else if strings.ToUpper(sa.WhenFailed) == SmartWhenFailedInThePast { } else if strings.ToUpper(sa.WhenFailed) == pkg.SmartWhenFailedInThePast {
// sa.Status = SmartAttributeStatusWarning sa.Status = pkg.SmartAttributeStatusWarning
// sa.StatusReason = "Attribute has previously failed manufacturer SMART threshold" sa.StatusReason = "Attribute has previously failed manufacturer SMART threshold"
// } }
//
// if smartMetadata, ok := metadata.AtaMetadata[sa.AttributeId]; ok { if smartMetadata, ok := thresholds.AtaMetadata[sa.AttributeId]; ok {
// sa.MetadataObservedThresholdStatus(smartMetadata) sa.ValidateThreshold(smartMetadata)
// } }
//
// //check if status is blank, set to "passed" //check if status is blank, set to "passed"
// if len(sa.Status) == 0 { if len(sa.Status) == 0 {
// sa.Status = SmartAttributeStatusPassed sa.Status = pkg.SmartAttributeStatusPassed
// } }
//} return sa
// }
//// compare the attribute (raw, normalized, transformed) value to observed thresholds, and update status if necessary
//func (sa *SmartAtaAttribute) MetadataObservedThresholdStatus(smartMetadata metadata.AtaAttributeMetadata) { // compare the attribute (raw, normalized, transformed) value to observed thresholds, and update status if necessary
// //TODO: multiple rules func (sa *SmartAtaAttribute) ValidateThreshold(smartMetadata thresholds.AtaAttributeMetadata) {
// // try to predict the failure rates for observed thresholds that have 0 failure rate and error bars. //TODO: multiple rules
// // - if the attribute is critical // try to predict the failure rates for observed thresholds that have 0 failure rate and error bars.
// // - the failure rate is over 10 - set to failed // - if the attribute is critical
// // - the attribute does not match any threshold, set to warn // - the failure rate is over 10 - set to failed
// // - if the attribute is not critical // - the attribute does not match any threshold, set to warn
// // - if failure rate is above 20 - set to failed // - if the attribute is not critical
// // - if failure rate is above 10 but below 20 - set to warn // - if failure rate is above 20 - set to failed
// // - if failure rate is above 10 but below 20 - set to warn
// //update the smart attribute status based on Observed thresholds.
// var value int64 //update the smart attribute status based on Observed thresholds.
// if smartMetadata.DisplayType == metadata.AtaSmartAttributeDisplayTypeNormalized { var value int64
// value = int64(sa.Value) if smartMetadata.DisplayType == thresholds.AtaSmartAttributeDisplayTypeNormalized {
// } else if smartMetadata.DisplayType == metadata.AtaSmartAttributeDisplayTypeTransformed { value = int64(sa.Value)
// value = sa.TransformedValue } else if smartMetadata.DisplayType == thresholds.AtaSmartAttributeDisplayTypeTransformed {
// } else { value = sa.TransformedValue
// value = sa.RawValue } else {
// } value = sa.RawValue
// }
// for _, obsThresh := range smartMetadata.ObservedThresholds {
// for _, obsThresh := range smartMetadata.ObservedThresholds {
// //check if "value" is in this bucket
// if ((obsThresh.Low == obsThresh.High) && value == obsThresh.Low) || //check if "value" is in this bucket
// (obsThresh.Low < value && value <= obsThresh.High) { if ((obsThresh.Low == obsThresh.High) && value == obsThresh.Low) ||
// sa.FailureRate = obsThresh.AnnualFailureRate (obsThresh.Low < value && value <= obsThresh.High) {
// sa.FailureRate = obsThresh.AnnualFailureRate
// if smartMetadata.Critical {
// if obsThresh.AnnualFailureRate >= 0.10 { if smartMetadata.Critical {
// sa.Status = SmartAttributeStatusFailed if obsThresh.AnnualFailureRate >= 0.10 {
// sa.StatusReason = "Observed Failure Rate for Critical Attribute is greater than 10%" sa.Status = pkg.SmartAttributeStatusFailed
// } sa.StatusReason = "Observed Failure Rate for Critical Attribute is greater than 10%"
// } else { }
// if obsThresh.AnnualFailureRate >= 0.20 { } else {
// sa.Status = SmartAttributeStatusFailed if obsThresh.AnnualFailureRate >= 0.20 {
// sa.StatusReason = "Observed Failure Rate for Attribute is greater than 20%" sa.Status = pkg.SmartAttributeStatusFailed
// } else if obsThresh.AnnualFailureRate >= 0.10 { sa.StatusReason = "Observed Failure Rate for Attribute is greater than 20%"
// sa.Status = SmartAttributeStatusWarning } else if obsThresh.AnnualFailureRate >= 0.10 {
// sa.StatusReason = "Observed Failure Rate for Attribute is greater than 10%" sa.Status = pkg.SmartAttributeStatusWarning
// } sa.StatusReason = "Observed Failure Rate for Attribute is greater than 10%"
// } }
// }
// //we've found the correct bucket, we can drop out of this loop
// return //we've found the correct bucket, we can drop out of this loop
// } return
// } }
// // no bucket found }
// if smartMetadata.Critical { // no bucket found
// sa.Status = SmartAttributeStatusWarning if smartMetadata.Critical {
// sa.StatusReason = "Could not determine Observed Failure Rate for Critical Attribute" sa.Status = pkg.SmartAttributeStatusWarning
// } sa.StatusReason = "Could not determine Observed Failure Rate for Critical Attribute"
// }
// return
//} return
}
@@ -3,4 +3,5 @@ package measurements
type SmartAttribute interface { type SmartAttribute interface {
Flatten() (fields map[string]interface{}) Flatten() (fields map[string]interface{})
Inflate(key string, val interface{}) Inflate(key string, val interface{})
GetStatus() string
} }
@@ -2,6 +2,8 @@ package measurements
import ( import (
"fmt" "fmt"
"github.com/analogj/scrutiny/webapp/backend/pkg"
"github.com/analogj/scrutiny/webapp/backend/pkg/thresholds"
"strings" "strings"
) )
@@ -17,6 +19,10 @@ type SmartNvmeAttribute struct {
FailureRate float64 `json:"failure_rate,omitempty"` FailureRate float64 `json:"failure_rate,omitempty"`
} }
func (sa *SmartNvmeAttribute) GetStatus() string {
return sa.Status
}
func (sa *SmartNvmeAttribute) Flatten() map[string]interface{} { func (sa *SmartNvmeAttribute) Flatten() map[string]interface{} {
return map[string]interface{}{ return map[string]interface{}{
fmt.Sprintf("attr.%s.attribute_id", sa.AttributeId): sa.AttributeId, fmt.Sprintf("attr.%s.attribute_id", sa.AttributeId): sa.AttributeId,
@@ -44,25 +50,26 @@ func (sa *SmartNvmeAttribute) Inflate(key string, val interface{}) {
} }
} }
// //populate attribute status, using SMART Thresholds & Observed Metadata
////populate attribute status, using SMART Thresholds & Observed Metadata // Chainable
//func (sa *SmartNvmeAttribute) PopulateAttributeStatus() { func (sa *SmartNvmeAttribute) PopulateAttributeStatus() *SmartNvmeAttribute {
//
// //-1 is a special number meaning no threshold. //-1 is a special number meaning no threshold.
// if sa.Threshold != -1 { if sa.Threshold != -1 {
// if smartMetadata, ok := metadata.NmveMetadata[sa.AttributeId]; ok { if smartMetadata, ok := thresholds.NmveMetadata[sa.AttributeId]; ok {
// //check what the ideal is. Ideal tells us if we our recorded value needs to be above, or below the threshold //check what the ideal is. Ideal tells us if we our recorded value needs to be above, or below the threshold
// if (smartMetadata.Ideal == "low" && sa.Value > sa.Threshold) || if (smartMetadata.Ideal == "low" && sa.Value > sa.Threshold) ||
// (smartMetadata.Ideal == "high" && sa.Value < sa.Threshold) { (smartMetadata.Ideal == "high" && sa.Value < sa.Threshold) {
// sa.Status = SmartAttributeStatusFailed sa.Status = pkg.SmartAttributeStatusFailed
// sa.StatusReason = "Attribute is failing recommended SMART threshold" sa.StatusReason = "Attribute is failing recommended SMART threshold"
// } }
// } }
// } }
// //TODO: eventually figure out the critical_warning bits and determine correct error messages here. //TODO: eventually figure out the critical_warning bits and determine correct error messages here.
//
// //check if status is blank, set to "passed" //check if status is blank, set to "passed"
// if len(sa.Status) == 0 { if len(sa.Status) == 0 {
// sa.Status = SmartAttributeStatusPassed sa.Status = pkg.SmartAttributeStatusPassed
// } }
//} return sa
}
@@ -2,6 +2,8 @@ package measurements
import ( import (
"fmt" "fmt"
"github.com/analogj/scrutiny/webapp/backend/pkg"
"github.com/analogj/scrutiny/webapp/backend/pkg/thresholds"
"strings" "strings"
) )
@@ -17,6 +19,10 @@ type SmartScsiAttribute struct {
FailureRate float64 `json:"failure_rate,omitempty"` FailureRate float64 `json:"failure_rate,omitempty"`
} }
func (sa *SmartScsiAttribute) GetStatus() string {
return sa.Status
}
func (sa *SmartScsiAttribute) Flatten() map[string]interface{} { func (sa *SmartScsiAttribute) Flatten() map[string]interface{} {
return map[string]interface{}{ return map[string]interface{}{
fmt.Sprintf("attr.%s.attribute_id", sa.AttributeId): sa.AttributeId, fmt.Sprintf("attr.%s.attribute_id", sa.AttributeId): sa.AttributeId,
@@ -45,23 +51,25 @@ func (sa *SmartScsiAttribute) Inflate(key string, val interface{}) {
} }
// //
////populate attribute status, using SMART Thresholds & Observed Metadata //populate attribute status, using SMART Thresholds & Observed Metadata
//func (sa *SmartScsiAttribute) PopulateAttributeStatus() { //Chainable
// func (sa *SmartScsiAttribute) PopulateAttributeStatus() *SmartScsiAttribute {
// //-1 is a special number meaning no threshold.
// if sa.Threshold != -1 { //-1 is a special number meaning no threshold.
// if smartMetadata, ok := metadata.NmveMetadata[sa.AttributeId]; ok { if sa.Threshold != -1 {
// //check what the ideal is. Ideal tells us if we our recorded value needs to be above, or below the threshold if smartMetadata, ok := thresholds.NmveMetadata[sa.AttributeId]; ok {
// if (smartMetadata.Ideal == "low" && sa.Value > sa.Threshold) || //check what the ideal is. Ideal tells us if we our recorded value needs to be above, or below the threshold
// (smartMetadata.Ideal == "high" && sa.Value < sa.Threshold) { if (smartMetadata.Ideal == "low" && sa.Value > sa.Threshold) ||
// sa.Status = SmartAttributeStatusFailed (smartMetadata.Ideal == "high" && sa.Value < sa.Threshold) {
// sa.StatusReason = "Attribute is failing recommended SMART threshold" sa.Status = pkg.SmartAttributeStatusFailed
// } sa.StatusReason = "Attribute is failing recommended SMART threshold"
// } }
// } }
// }
// //check if status is blank, set to "passed"
// if len(sa.Status) == 0 { //check if status is blank, set to "passed"
// sa.Status = SmartAttributeStatusPassed if len(sa.Status) == 0 {
// } sa.Status = pkg.SmartAttributeStatusPassed
//} }
return sa
}
@@ -1,4 +1,4 @@
package metadata package thresholds
const AtaSmartAttributeDisplayTypeRaw = "raw" const AtaSmartAttributeDisplayTypeRaw = "raw"
const AtaSmartAttributeDisplayTypeNormalized = "normalized" const AtaSmartAttributeDisplayTypeNormalized = "normalized"
@@ -1,4 +1,4 @@
package metadata package thresholds
// https://media.kingston.com/support/downloads/MKP_521.6_SMART-DCP1000_attribute.pdf // https://media.kingston.com/support/downloads/MKP_521.6_SMART-DCP1000_attribute.pdf
// https://www.percona.com/blog/2017/02/09/using-nvme-command-line-tools-to-check-nvme-flash-health/ // https://www.percona.com/blog/2017/02/09/using-nvme-command-line-tools-to-check-nvme-flash-health/
@@ -1,4 +1,4 @@
package metadata package thresholds
type ScsiAttributeMetadata struct { type ScsiAttributeMetadata struct {
ID string `json:"-"` ID string `json:"-"`
@@ -2,7 +2,7 @@ package handler
import ( import (
"github.com/analogj/scrutiny/webapp/backend/pkg/database" "github.com/analogj/scrutiny/webapp/backend/pkg/database"
"github.com/analogj/scrutiny/webapp/backend/pkg/metadata" "github.com/analogj/scrutiny/webapp/backend/pkg/thresholds"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
"net/http" "net/http"
@@ -23,11 +23,11 @@ func GetDeviceDetails(c *gin.Context) {
var deviceMetadata interface{} var deviceMetadata interface{}
if device.IsAta() { if device.IsAta() {
deviceMetadata = metadata.AtaMetadata deviceMetadata = thresholds.AtaMetadata
} else if device.IsNvme() { } else if device.IsNvme() {
deviceMetadata = metadata.NmveMetadata deviceMetadata = thresholds.NmveMetadata
} else if device.IsScsi() { } else if device.IsScsi() {
deviceMetadata = metadata.ScsiMetadata deviceMetadata = thresholds.ScsiMetadata
} }
c.JSON(http.StatusOK, gin.H{"success": true, "data": map[string]interface{}{"device": device, "smart_results": smartResults}, "metadata": deviceMetadata}) c.JSON(http.StatusOK, gin.H{"success": true, "data": map[string]interface{}{"device": device, "smart_results": smartResults}, "metadata": deviceMetadata})
@@ -37,13 +37,23 @@ func UploadDeviceMetrics(c *gin.Context) {
} }
// insert smart info // insert smart info
_, err = deviceRepo.SaveSmartAttributes(c, c.Param("wwn"), collectorSmartData) smartData, err := deviceRepo.SaveSmartAttributes(c, c.Param("wwn"), collectorSmartData)
if err != nil { if err != nil {
logger.Errorln("An error occurred while saving smartctl metrics", err) logger.Errorln("An error occurred while saving smartctl metrics", err)
c.JSON(http.StatusInternalServerError, gin.H{"success": false}) c.JSON(http.StatusInternalServerError, gin.H{"success": false})
return return
} }
if smartData.Status != pkg.DeviceStatusPassed {
//there is a failure detected by Scrutiny, update the device status on the homepage.
updatedDevice, err = deviceRepo.UpdateDeviceStatus(c, c.Param("wwn"), smartData.Status)
if err != nil {
logger.Errorln("An error occurred while updating device status", err)
c.JSON(http.StatusInternalServerError, gin.H{"success": false})
return
}
}
// save smart temperature data (ignore failures) // save smart temperature data (ignore failures)
err = deviceRepo.SaveSmartTemperature(c, c.Param("wwn"), updatedDevice.DeviceProtocol, collectorSmartData) err = deviceRepo.SaveSmartTemperature(c, c.Param("wwn"), updatedDevice.DeviceProtocol, collectorSmartData)
if err != nil { if err != nil {