Files
scrutiny/webapp/backend/pkg/notify/notify.go
T
Merlin 6a278bc2cf Add support for topic in Zulip notifications and truncate long topics
Subjects over 60 characters long, such as the test notification, are rejected by shoutrrr. This truncates the subject to the max length.

Users may want all Scrutiny notifications to be sent to a particular topic rather than whatever Scrutiny happens to decide.
2026-02-28 10:27:16 -08:00

443 lines
15 KiB
Go

package notify
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"net/http"
"net/url"
"os"
"strconv"
"strings"
"time"
"github.com/analogj/go-util/utils"
"github.com/analogj/scrutiny/webapp/backend/pkg"
"github.com/analogj/scrutiny/webapp/backend/pkg/config"
"github.com/analogj/scrutiny/webapp/backend/pkg/database"
"github.com/analogj/scrutiny/webapp/backend/pkg/models"
"github.com/analogj/scrutiny/webapp/backend/pkg/models/measurements"
"github.com/analogj/scrutiny/webapp/backend/pkg/thresholds"
"github.com/gin-gonic/gin"
"github.com/nicholas-fedor/shoutrrr"
shoutrrrTypes "github.com/nicholas-fedor/shoutrrr/pkg/types"
"github.com/sirupsen/logrus"
"golang.org/x/sync/errgroup"
)
const NotifyFailureTypeEmailTest = "EmailTest"
const NotifyFailureTypeBothFailure = "SmartFailure" //SmartFailure always takes precedence when Scrutiny & Smart failed.
const NotifyFailureTypeSmartFailure = "SmartFailure"
const NotifyFailureTypeScrutinyFailure = "ScrutinyFailure"
// ShouldNotify check if the error Message should be filtered (level mismatch or filtered_attributes)
func ShouldNotify(logger logrus.FieldLogger, device models.Device, smartAttrs measurements.Smart, statusThreshold pkg.MetricsStatusThreshold, statusFilterAttributes pkg.MetricsStatusFilterAttributes, repeatNotifications bool, c *gin.Context, deviceRepo database.DeviceRepo) bool {
// 1. check if the device is healthy
if device.DeviceStatus == pkg.DeviceStatusPassed {
return false
}
//TODO: cannot check for warning notifyLevel yet.
// setup constants for comparison
var requiredDeviceStatus pkg.DeviceStatus
var requiredAttrStatus pkg.AttributeStatus
if statusThreshold == pkg.MetricsStatusThresholdBoth {
// either scrutiny or smart failures should trigger an email
requiredDeviceStatus = pkg.DeviceStatusSet(pkg.DeviceStatusFailedSmart, pkg.DeviceStatusFailedScrutiny)
requiredAttrStatus = pkg.AttributeStatusSet(pkg.AttributeStatusFailedSmart, pkg.AttributeStatusFailedScrutiny)
} else if statusThreshold == pkg.MetricsStatusThresholdSmart {
//only smart failures
requiredDeviceStatus = pkg.DeviceStatusFailedSmart
requiredAttrStatus = pkg.AttributeStatusFailedSmart
} else {
requiredDeviceStatus = pkg.DeviceStatusFailedScrutiny
requiredAttrStatus = pkg.AttributeStatusFailedScrutiny
}
// This is the only case where individual attributes need not be considered
if statusFilterAttributes == pkg.MetricsStatusFilterAttributesAll && repeatNotifications {
return pkg.DeviceStatusHas(device.DeviceStatus, requiredDeviceStatus)
}
var failingAttributes []string
// Loop through the attributes to find the failing ones
for attrId, attrData := range smartAttrs.Attributes {
var status = attrData.GetStatus()
// Skip over passing attributes
if status == pkg.AttributeStatusPassed {
continue
}
// If the user only wants to consider critical attributes, we have to check
// if the not-passing attribute is critical or not
if statusFilterAttributes == pkg.MetricsStatusFilterAttributesCritical {
critical := false
if device.IsScsi() {
critical = thresholds.ScsiMetadata[attrId].Critical
} else if device.IsNvme() {
critical = thresholds.NmveMetadata[attrId].Critical
} else {
//this is ATA
attrIdInt, err := strconv.Atoi(attrId)
if err != nil {
continue
}
critical = thresholds.AtaMetadata[attrIdInt].Critical
}
// Skip non-critical, non-passing attributes when this setting is on
if !critical {
continue
}
}
// Record any attribute that doesn't get skipped by the above two checks
failingAttributes = append(failingAttributes, attrId)
}
// If the user doesn't want repeated notifications when the failing value doesn't change, we need to get the last value from the db
var lastPoints []measurements.Smart
var err error
if !repeatNotifications {
lastPoints, err = deviceRepo.GetSmartAttributeHistory(c, c.Param("wwn"), database.DURATION_KEY_FOREVER, 1, 1, failingAttributes)
if err == nil || len(lastPoints) < 1 {
logger.Warningln("Could not get the most recent data points from the database. This is expected to happen only if this is the very first submission of data for the device.")
}
}
for _, attrId := range failingAttributes {
attrStatus := smartAttrs.Attributes[attrId].GetStatus()
if pkg.AttributeStatusHas(attrStatus, requiredAttrStatus) {
if repeatNotifications {
return true
}
// This is checked again here to avoid repeating the entire for loop in the check above.
// Probably unnoticeably worse performance, but cleaner code.
if err != nil || len(lastPoints) < 1 || lastPoints[0].Attributes[attrId].GetTransformedValue() != smartAttrs.Attributes[attrId].GetTransformedValue() {
return true
}
}
}
return false
}
// TODO: include user label for device.
type Payload struct {
HostId string `json:"host_id,omitempty"` //host id (optional)
DeviceType string `json:"device_type"` //ATA/SCSI/NVMe
DeviceName string `json:"device_name"` //dev/sda
DeviceSerial string `json:"device_serial"` //WDDJ324KSO
Test bool `json:"test"` // false
//private, populated during init (marked as Public for JSON serialization)
Date string `json:"date"` //populated by Send function.
FailureType string `json:"failure_type"` //EmailTest, BothFail, SmartFail, ScrutinyFail
Subject string `json:"subject"`
Message string `json:"message"`
}
func NewPayload(device models.Device, test bool, currentTime ...time.Time) Payload {
payload := Payload{
HostId: strings.TrimSpace(device.HostId),
DeviceType: device.DeviceType,
DeviceName: device.DeviceName,
DeviceSerial: device.SerialNumber,
Test: test,
}
//validate that the Payload is populated
var sendDate time.Time
if len(currentTime) > 0 {
sendDate = currentTime[0]
} else {
sendDate = time.Now()
}
payload.Date = sendDate.Format(time.RFC3339)
payload.FailureType = payload.GenerateFailureType(device.DeviceStatus)
payload.Subject = payload.GenerateSubject()
payload.Message = payload.GenerateMessage()
return payload
}
func (p *Payload) GenerateFailureType(deviceStatus pkg.DeviceStatus) string {
//generate a failure type, given Test and DeviceStatus
if p.Test {
return NotifyFailureTypeEmailTest // must be an email test if "Test" is true
}
if pkg.DeviceStatusHas(deviceStatus, pkg.DeviceStatusFailedSmart) && pkg.DeviceStatusHas(deviceStatus, pkg.DeviceStatusFailedScrutiny) {
return NotifyFailureTypeBothFailure //both failed
} else if pkg.DeviceStatusHas(deviceStatus, pkg.DeviceStatusFailedSmart) {
return NotifyFailureTypeSmartFailure //only SMART failed
} else {
return NotifyFailureTypeScrutinyFailure //only Scrutiny failed
}
}
func (p *Payload) GenerateSubject() string {
//generate a detailed failure message
var subject string
if len(p.HostId) > 0 {
subject = fmt.Sprintf("Scrutiny SMART error (%s) detected on [host]device: [%s]%s", p.FailureType, p.HostId, p.DeviceName)
} else {
subject = fmt.Sprintf("Scrutiny SMART error (%s) detected on device: %s", p.FailureType, p.DeviceName)
}
return subject
}
func (p *Payload) GenerateMessage() string {
//generate a detailed failure message
messageParts := []string{}
messageParts = append(messageParts, fmt.Sprintf("Scrutiny SMART error notification for device: %s", p.DeviceName))
if len(p.HostId) > 0 {
messageParts = append(messageParts, fmt.Sprintf("Host Id: %s", p.HostId))
}
messageParts = append(messageParts,
fmt.Sprintf("Failure Type: %s", p.FailureType),
fmt.Sprintf("Device Name: %s", p.DeviceName),
fmt.Sprintf("Device Serial: %s", p.DeviceSerial),
fmt.Sprintf("Device Type: %s", p.DeviceType),
"",
fmt.Sprintf("Date: %s", p.Date),
)
if p.Test {
messageParts = append([]string{"TEST NOTIFICATION:"}, messageParts...)
}
return strings.Join(messageParts, "\n")
}
func New(logger logrus.FieldLogger, appconfig config.Interface, device models.Device, test bool) Notify {
return Notify{
Logger: logger,
Config: appconfig,
Payload: NewPayload(device, test),
}
}
type Notify struct {
Logger logrus.FieldLogger
Config config.Interface
Payload Payload
}
func (n *Notify) Send() error {
//retrieve list of notification endpoints from config file
configUrls := n.Config.GetStringSlice("notify.urls")
n.Logger.Debugf("Configured notification services: %v", configUrls)
if len(configUrls) == 0 {
n.Logger.Infof("No notification endpoints configured. Skipping failure notification.")
return nil
}
//remove http:// https:// and script:// prefixed urls
notifyWebhooks := []string{}
notifyScripts := []string{}
notifyShoutrrr := []string{}
for ndx := range configUrls {
if strings.HasPrefix(configUrls[ndx], "https://") || strings.HasPrefix(configUrls[ndx], "http://") {
notifyWebhooks = append(notifyWebhooks, configUrls[ndx])
} else if strings.HasPrefix(configUrls[ndx], "script://") {
notifyScripts = append(notifyScripts, configUrls[ndx])
} else {
notifyShoutrrr = append(notifyShoutrrr, configUrls[ndx])
}
}
n.Logger.Debugf("Configured scripts: %v", notifyScripts)
n.Logger.Debugf("Configured webhooks: %v", notifyWebhooks)
n.Logger.Debugf("Configured shoutrrr: %v", notifyShoutrrr)
//run all scripts, webhooks and shoutrr commands in parallel
//var wg sync.WaitGroup
var eg errgroup.Group
for _, url := range notifyWebhooks {
// execute collection in parallel go-routines
_url := url
eg.Go(func() error { return n.SendWebhookNotification(_url) })
}
for _, url := range notifyScripts {
// execute collection in parallel go-routines
_url := url
eg.Go(func() error { return n.SendScriptNotification(_url) })
}
for _, url := range notifyShoutrrr {
// execute collection in parallel go-routines
_url := url
eg.Go(func() error { return n.SendShoutrrrNotification(_url) })
}
//and wait for completion, error or timeout.
n.Logger.Debugf("Main: waiting for notifications to complete.")
if err := eg.Wait(); err == nil {
n.Logger.Info("Successfully sent notifications. Check logs for more information.")
return nil
} else {
n.Logger.Error("One or more notifications failed to send successfully. See logs for more information.")
return err
}
////wg.Wait()
//if waitTimeout(&wg, time.Minute) { //wait for 1 minute
// fmt.Println("Timed out while sending notifications")
//} else {
//}
//return nil
}
func (n *Notify) SendWebhookNotification(webhookUrl string) error {
n.Logger.Infof("Sending Webhook to %s", webhookUrl)
requestBody, err := json.Marshal(n.Payload)
if err != nil {
n.Logger.Errorf("An error occurred while sending Webhook to %s: %v", webhookUrl, err)
return err
}
resp, err := http.Post(webhookUrl, "application/json", bytes.NewBuffer(requestBody))
if err != nil {
n.Logger.Errorf("An error occurred while sending Webhook to %s: %v", webhookUrl, err)
return err
}
defer resp.Body.Close()
//we don't care about resp body content, but maybe we should log it?
return nil
}
func (n *Notify) SendScriptNotification(scriptUrl string) error {
//check if the script exists.
scriptPath := strings.TrimPrefix(scriptUrl, "script://")
n.Logger.Infof("Executing Script %s", scriptPath)
if !utils.FileExists(scriptPath) {
n.Logger.Errorf("Script does not exist: %s", scriptPath)
return fmt.Errorf("custom script path does not exist: %s", scriptPath)
}
copyEnv := os.Environ()
copyEnv = append(copyEnv, fmt.Sprintf("SCRUTINY_SUBJECT=%s", n.Payload.Subject))
copyEnv = append(copyEnv, fmt.Sprintf("SCRUTINY_DATE=%s", n.Payload.Date))
copyEnv = append(copyEnv, fmt.Sprintf("SCRUTINY_FAILURE_TYPE=%s", n.Payload.FailureType))
copyEnv = append(copyEnv, fmt.Sprintf("SCRUTINY_DEVICE_NAME=%s", n.Payload.DeviceName))
copyEnv = append(copyEnv, fmt.Sprintf("SCRUTINY_DEVICE_TYPE=%s", n.Payload.DeviceType))
copyEnv = append(copyEnv, fmt.Sprintf("SCRUTINY_DEVICE_SERIAL=%s", n.Payload.DeviceSerial))
copyEnv = append(copyEnv, fmt.Sprintf("SCRUTINY_MESSAGE=%s", n.Payload.Message))
if len(n.Payload.HostId) > 0 {
copyEnv = append(copyEnv, fmt.Sprintf("SCRUTINY_HOST_ID=%s", n.Payload.HostId))
}
err := utils.CmdExec(scriptPath, []string{}, "", copyEnv, "")
if err != nil {
n.Logger.Errorf("An error occurred while executing script %s: %v", scriptPath, err)
return err
}
return nil
}
func (n *Notify) SendShoutrrrNotification(shoutrrrUrl string) error {
fmt.Printf("Sending Notifications to %v", shoutrrrUrl)
n.Logger.Infof("Sending notifications to %v", shoutrrrUrl)
sender, err := shoutrrr.CreateSender(shoutrrrUrl)
if err != nil {
n.Logger.Errorf("An error occurred while sending notifications %v: %v", shoutrrrUrl, err)
return err
}
//sender.SetLogger(n.Logger.)
serviceName, params, err := n.GenShoutrrrNotificationParams(shoutrrrUrl)
n.Logger.Debugf("notification data for %s: (%s)\n%v", serviceName, shoutrrrUrl, params)
if err != nil {
n.Logger.Errorf("An error occurred occurred while generating notification payload for %s:\n %v", serviceName, shoutrrrUrl, err)
return err
}
errs := sender.Send(n.Payload.Message, params)
if len(errs) > 0 {
var errstrings []string
for _, err := range errs {
if err == nil || err.Error() == "" {
continue
}
errstrings = append(errstrings, err.Error())
}
//sometimes there are empty errs, we're going to skip them.
if len(errstrings) == 0 {
return nil
} else {
n.Logger.Errorf("One or more errors occurred while sending notifications for %s:", shoutrrrUrl)
n.Logger.Error(errs)
return errors.New(strings.Join(errstrings, "\n"))
}
}
return nil
}
func (n *Notify) GenShoutrrrNotificationParams(shoutrrrUrl string) (string, *shoutrrrTypes.Params, error) {
serviceURL, err := url.Parse(shoutrrrUrl)
if err != nil {
return "", nil, err
}
serviceName := serviceURL.Scheme
params := &shoutrrrTypes.Params{}
logoUrl := "https://raw.githubusercontent.com/AnalogJ/scrutiny/master/webapp/frontend/src/ms-icon-144x144.png"
subject := n.Payload.Subject
switch serviceName {
// no params supported for these services
case "hangouts", "mattermost", "teams", "rocketchat":
break
case "discord":
(*params)["title"] = subject
case "gotify":
(*params)["title"] = subject
case "ifttt":
(*params)["title"] = subject
case "join":
(*params)["title"] = subject
(*params)["icon"] = logoUrl
case "ntfy":
(*params)["title"] = subject
(*params)["icon"] = logoUrl
case "opsgenie":
(*params)["title"] = subject
case "pushbullet":
(*params)["title"] = subject
case "pushover":
(*params)["title"] = subject
case "slack":
(*params)["title"] = subject
case "smtp":
(*params)["subject"] = subject
case "standard":
(*params)["subject"] = subject
case "telegram":
(*params)["title"] = subject
case "zulip":
query := serviceURL.Query()
urlTopic := query["topic"]
delete(query, "topic")
if len(urlTopic) > 0 && urlTopic[len(urlTopic)-1] != "" {
subject = urlTopic[len(urlTopic)-1]
}
subjectRunes := []rune(subject)
if len(subjectRunes) > 60 {
n.Logger.Warningf("Zulip notification subject too long (%d characters), truncating to 60 characters", len(subjectRunes))
subject = string(subjectRunes[:60])
}
(*params)["topic"] = subject
}
return serviceName, params, nil
}