diff --git a/README.md b/README.md index e6278ab..af37e1a 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Cross platform command line application to run health checks against sensor stat This tool lets you automatically check if senseBoxes are still runnning correctly, and when that's not the case, notifies you. Currently, email notifications are implemented, but other transports can be added easily. -Implemented health checks are [described below](#possible-values-for-defaulthealthchecksevents), and new ones can be added just as easily (given some knowledge of programming). +Implemented health checks are [described below](#available-healthchecks), and new ones can be added just as easily (given some knowledge of programming). The tool has multiple modes of operation: @@ -61,7 +61,7 @@ Contributions are welcome! Check out the following locations for plugging in new functionality: - new notification transports: [core/notifiers.go](core/notifiers.go) -- new health checks: [core/healthcheck*.go](core/healtchecks.go) +- new health checks: [core/healthcheck*.go](core/healthchecks.go) - new commands: [cmd/](cmd/) Before committing and submitting a pull request, please run `go fmt ./ cmd/ core/`. diff --git a/cmd/check.go b/cmd/cmd_check.go similarity index 100% rename from cmd/check.go rename to cmd/cmd_check.go diff --git a/cmd/debug.go b/cmd/cmd_debug.go similarity index 68% rename from cmd/debug.go rename to cmd/cmd_debug.go index ad1232a..3946588 100644 --- a/cmd/debug.go +++ b/cmd/cmd_debug.go @@ -12,8 +12,14 @@ import ( "github.com/noerw/osem_notify/utils" ) +var ( + clearCache bool +) + func init() { debugCmd.AddCommand(debugNotificationsCmd) + debugCacheCmd.PersistentFlags().BoolVarP(&clearCache, "clear", "", false, "reset the notifications cache") + debugCmd.AddCommand(debugCacheCmd) rootCmd.AddCommand(debugCmd) } @@ -32,13 +38,28 @@ var debugCmd = &cobra.Command{ }, } +var debugCacheCmd = &cobra.Command{ + Use: "cache", + Short: "Print or clear the notifications cache", + Long: "osem_notify debug cache prints the contents of the notifications cache", + RunE: func(cmd *cobra.Command, args []string) error { + if clearCache { + return core.ClearCache() + } + core.PrintCache() + return nil + }, +} + + var debugNotificationsCmd = &cobra.Command{ Use: "notifications", Short: "Verify that notifications are working", - Long: "osem_notify debug tests the functionality of the given feature", + Long: `osem_notify debug notifications sends a test notification according +to healthchecks.default.notifications.options as defined in the config file`, RunE: func(cmd *cobra.Command, args []string) error { defaultNotifyConf := &core.NotifyConfig{} - err := viper.UnmarshalKey("defaultHealthchecks", defaultNotifyConf) + err := viper.UnmarshalKey("healthchecks.default", defaultNotifyConf) if err != nil { return err } @@ -55,11 +76,11 @@ var debugNotificationsCmd = &cobra.Command{ host, _ := os.Hostname() err = n.Submit(core.Notification{ - Subject: "Test notification from opeSenseMap notifier", + Subject: "Test notification from openSenseMap notifier", Body: fmt.Sprintf("Your notification set up on %s is working fine!", host), }) if err != nil { - notLog.Warnf("could not submit test notification for %s notifier!", transport) + notLog.Warnf("could not submit test notification for %s notifier: %s", transport, err) continue } notLog.Info("Test notification (successfully?) submitted, check the specified inbox") diff --git a/cmd/root.go b/cmd/cmd_root.go similarity index 73% rename from cmd/root.go rename to cmd/cmd_root.go index eb88a7a..c3549b5 100644 --- a/cmd/root.go +++ b/cmd/cmd_root.go @@ -23,21 +23,32 @@ var configHelpCmd = &cobra.Command{ > Example configuration: - # override default health checks: - defaultHealthchecks: - notifications: - transport: email - options: - recipients: - - fridolina@example.com - - ruth.less@example.com - events: - - type: "measurement_age" - target: "all" # all sensors - threshold: "15m" # any duration - - type: "measurement_faulty" - target: "all" - threshold: "" + healthchecks: + # override default health checks for all boxes + default: + notifications: + transport: email + options: + recipients: + - fridolina@example.com + events: + - type: "measurement_age" + target: "all" # all sensors + threshold: "15m" # any duration + - type: "measurement_faulty" + target: "all" + threshold: "" + + # set health checks per box + 593bcd656ccf3b0011791f5a: + notifications: + options: + recipients: + - ruth.less@example.com + events: + - type: "measurement_max" + target: "593bcd656ccf3b0011791f5b" + threshold: "40" # only needed when sending notifications via email email: @@ -48,14 +59,14 @@ var configHelpCmd = &cobra.Command{ from: hildegunst@example.com -> possible values for defaultHealthchecks.notifications: +> possible values for healthchecks.*.notifications: transport | options ----------|------------------------------------- email | recipients: list of email addresses -> possible values for defaultHealthchecks.events[]: +> possible values for healthchecks.*.events[]: type | description -------------------|--------------------------------------------------- @@ -105,8 +116,9 @@ var cfgFile string func init() { var ( - shouldNotify bool debug bool + noCache bool + shouldNotify string logFormat string api string ) @@ -117,14 +129,16 @@ func init() { rootCmd.PersistentFlags().StringVarP(&api, "api", "a", "https://api.opensensemap.org", "openSenseMap API to query against") rootCmd.PersistentFlags().StringVarP(&logFormat, "logformat", "l", "plain", "log format, can be plain or json") rootCmd.PersistentFlags().BoolVarP(&debug, "debug", "d", false, "enable verbose logging") - rootCmd.PersistentFlags().BoolVarP(&shouldNotify, "notify", "n", false, `if set, will send out notifications, -Otherwise results are printed to stdout only. + rootCmd.PersistentFlags().StringVarP(&shouldNotify, "notify", "n", "", `If set, will send out notifications for the specified type of check result, +otherwise results are printed to stdout only. +Allowed values are "all", "error", "ok". You might want to run 'osem_notify debug notifications' first to verify everything works. -Notifications for failing checks are sent only once, -and then cached until the issue got resolved. -To clear the cache, delete the file ~/.osem_notify_cache.yaml. +Notifications for failing checks are sent only once, and then cached until the issue got +resolved, unless --no-cache is set. +To clear the cache, run 'osem_notify debug cache --clear'. `) + rootCmd.PersistentFlags().BoolVarP(&noCache, "no-cache", "", false, "send all notifications, ignoring results from previous runs. also don't update the cache.") viper.BindPFlags(rootCmd.PersistentFlags()) // let flags override config diff --git a/cmd/version.go b/cmd/cmd_version.go similarity index 100% rename from cmd/version.go rename to cmd/cmd_version.go diff --git a/cmd/watch.go b/cmd/cmd_watch.go similarity index 100% rename from cmd/watch.go rename to cmd/cmd_watch.go diff --git a/cmd/config.go b/cmd/config.go index 450e273..9d69857 100644 --- a/cmd/config.go +++ b/cmd/config.go @@ -4,9 +4,11 @@ import ( "os" "strings" - "github.com/noerw/osem_notify/utils" log "github.com/sirupsen/logrus" "github.com/spf13/viper" + + "github.com/noerw/osem_notify/core" + "github.com/noerw/osem_notify/utils" ) // initConfig reads in config file and ENV variables if set. @@ -58,3 +60,55 @@ func validateConfig() { } } } + +func getNotifyConf(boxID string) (*core.NotifyConfig, error) { + // config used when no configuration is present at all + conf := &core.NotifyConfig{ + Events: []core.NotifyEvent{ + core.NotifyEvent{ + Type: "measurement_age", + Target: "all", + Threshold: "15m", + }, + core.NotifyEvent{ + Type: "measurement_faulty", + Target: "all", + Threshold: "", + }, + }, + } + + // override with default configuration from file + // considering the case that .events may be defined but empty + // to allow to define no events, and don't leak shorter lists into + // previous longer ones + if keyDefined("healthchecks.default.events") { + conf.Events = []core.NotifyEvent{} + } + err := viper.UnmarshalKey("healthchecks.default", conf) + if err != nil { + return nil, err + } + + // override with per box configuration from file + if keyDefined("healthchecks." + boxID + ".events") { + conf.Events = []core.NotifyEvent{} + } + err = viper.UnmarshalKey("healthchecks."+boxID, conf) + if err != nil { + return nil, err + } + + return conf, nil +} + +// implement our own keyCheck, as viper.InConfig() does not work +func keyDefined(key string) bool { + allConfKeys := viper.AllKeys() + for _, k := range allConfKeys { + if k == key { + return true + } + } + return false +} diff --git a/cmd/shared.go b/cmd/shared.go index bf2c40e..ea56cc5 100644 --- a/cmd/shared.go +++ b/cmd/shared.go @@ -3,6 +3,7 @@ package cmd import ( "fmt" "regexp" + "strings" "github.com/spf13/cobra" "github.com/spf13/viper" @@ -33,21 +34,38 @@ func BoxIdValidator(cmd *cobra.Command, args []string) error { } func checkAndNotify(boxIds []string) error { - defaultNotifyConf := &core.NotifyConfig{} - err := viper.UnmarshalKey("defaultHealthchecks", defaultNotifyConf) - if err != nil { - return err + boxLocalConfig := map[string]*core.NotifyConfig{} + for _, boxID := range boxIds { + c, err := getNotifyConf(boxID) + if err != nil { + return err + } + boxLocalConfig[boxID] = c } - results, err := core.CheckBoxes(boxIds, defaultNotifyConf) + results, err := core.CheckBoxes(boxLocalConfig) if err != nil { return err } results.Log() - if viper.GetBool("notify") { - return results.SendNotifications() + notify := strings.ToLower(viper.GetString("notify")) + if notify != "" { + types := []string{} + switch notify { + case "all": + types = []string{core.CheckErr, core.CheckOk} + case "error", "err": + types = []string{core.CheckErr} + case "ok": + types = []string{core.CheckOk} + default: + return fmt.Errorf("invalid value %s for \"notify\"", notify) + } + + useCache := !viper.GetBool("no-cache") + return results.SendNotifications(types, useCache) } return nil } diff --git a/core/cache.go b/core/cache.go index 92eafc7..877d6f3 100644 --- a/core/cache.go +++ b/core/cache.go @@ -60,3 +60,18 @@ func updateCache(box *Box, results []CheckResult) error { } return cache.WriteConfig() } + +func ClearCache() error { + fileName := utils.GetConfigFile("osem_notify_cache") + _, err := os.Stat(fileName) + if err != nil { + return nil + } + return os.Remove(fileName) +} + +func PrintCache() { + for key, val := range cache.AllSettings() { + log.Infof("%20s: %v", key, val) + } +} diff --git a/core/checkrunner.go b/core/checkrunner.go index ac4db93..9f78d90 100644 --- a/core/checkrunner.go +++ b/core/checkrunner.go @@ -10,11 +10,11 @@ import ( type BoxCheckResults map[*Box][]CheckResult -func (results BoxCheckResults) Size(status string) int { +func (results BoxCheckResults) Size(statusToCheck []string) int { size := 0 for _, boxResults := range results { for _, result := range boxResults { - if status == result.Status || status == "" { + if result.HasStatus(statusToCheck) { size++ } } @@ -42,24 +42,26 @@ func (results BoxCheckResults) Log() { countErr++ } } - if countErr == 0 { + if len(boxResults) == 0 { + boxLog.Infof("%s: no checks defined", box.Name) + } else if countErr == 0 { boxLog.Infof("%s: all is fine!", box.Name) } } } -func CheckBoxes(boxIds []string, defaultConf *NotifyConfig) (BoxCheckResults, error) { - log.Debug("Checking notifications for ", len(boxIds), " box(es)") +func CheckBoxes(boxLocalConfs map[string]*NotifyConfig) (BoxCheckResults, error) { + log.Debug("Checking notifications for ", len(boxLocalConfs), " box(es)") results := BoxCheckResults{} errs := []string{} // TODO: check boxes in parallel, capped at 5 at once - for _, boxId := range boxIds { + for boxId, localConf := range boxLocalConfs { boxLogger := log.WithField("boxId", boxId) boxLogger.Info("checking box for events") - box, res, err := checkBox(boxId, defaultConf) + box, res, err := checkBox(boxId, localConf) if err != nil { boxLogger.Errorf("could not run checks on box %s: %s", boxId, err) errs = append(errs, err.Error()) diff --git a/core/healthcheck_measurement_minmax.go b/core/healthcheck_measurement_minmax.go index 441d3fd..fa71b59 100644 --- a/core/healthcheck_measurement_minmax.go +++ b/core/healthcheck_measurement_minmax.go @@ -5,8 +5,13 @@ import ( "strconv" ) +const ( + nameMin = "measurement_min" + nameMax = "measurement_max" +) + var checkMeasurementMin = checkType{ - name: "measurement_min", + name: nameMin, toString: func(r CheckResult) string { return fmt.Sprintf("Sensor %s (%s) reads low value of %s", r.TargetName, r.Target, r.Value) }, @@ -14,7 +19,7 @@ var checkMeasurementMin = checkType{ } var checkMeasurementMax = checkType{ - name: "measurement_min", + name: nameMax, toString: func(r CheckResult) string { return fmt.Sprintf("Sensor %s (%s) reads high value of %s", r.TargetName, r.Target, r.Value) }, @@ -41,8 +46,8 @@ func validateMeasurementMinMax(e NotifyEvent, s Sensor, b Box) (CheckResult, err return result, err } - if e.Type == eventMeasurementValMax && val > thresh || - e.Type == eventMeasurementValMin && val < thresh { + if e.Type == nameMax && val > thresh || + e.Type == nameMin && val < thresh { result.Status = CheckErr } diff --git a/core/healthchecks.go b/core/healthchecks.go index 76ce74f..36792cb 100644 --- a/core/healthchecks.go +++ b/core/healthchecks.go @@ -9,16 +9,12 @@ import ( ) const ( - CheckOk = "OK" - CheckErr = "FAILED" - eventMeasurementAge = "measurement_age" - eventMeasurementValMin = "measurement_min" - eventMeasurementValMax = "measurement_max" - eventMeasurementValFaulty = "measurement_faulty" - eventTargetAll = "all" // if event.Target is this value, all sensors will be checked + CheckOk = "OK" + CheckErr = "FAILED" + eventTargetAll = "all" // if event.Target is this value, all sensors will be checked ) -type checkType = struct { +type checkType struct { name string // name that is used in config toString func(result CheckResult) string // error message when check failed checkFunc func(event NotifyEvent, sensor Sensor, context Box) (CheckResult, error) @@ -41,6 +37,15 @@ type CheckResult struct { Threshold string } +func (r CheckResult) HasStatus(statusToCheck []string) bool { + for _, status := range statusToCheck { + if status == r.Status { + return true + } + } + return false +} + func (r CheckResult) EventID() string { s := fmt.Sprintf("%s%s%s", r.Event, r.Target, r.Threshold) hasher := sha256.New() diff --git a/core/notifiers.go b/core/notifiers.go index 4f157d2..2b41503 100644 --- a/core/notifiers.go +++ b/core/notifiers.go @@ -39,25 +39,24 @@ func (box Box) GetNotifier() (AbstractNotifier, error) { return notifier.New(box.NotifyConf.Notifications.Options) } -func (results BoxCheckResults) SendNotifications() error { - // TODO: expose flags to not use cache, and to notify for checks turned CheckOk as well - - results = results.filterChangedFromCache() +func (results BoxCheckResults) SendNotifications(notifyTypes []string, useCache bool) error { + if useCache { + results = results.filterChangedFromCache() + } - nErr := results.Size(CheckErr) - if nErr == 0 { + toCheck := results.Size(notifyTypes) + if toCheck == 0 { log.Info("No notifications due.") } else { - log.Infof("Notifying for %v checks turned bad in total...", nErr) + log.Infof("Notifying for %v checks changing state to %v...", toCheck, notifyTypes) } - log.Debugf("%v checks turned OK!", results.Size(CheckOk)) errs := []string{} for box, resultsBox := range results { // only submit results which are errors resultsDue := []CheckResult{} for _, result := range resultsBox { - if result.Status != CheckOk { + if result.HasStatus(notifyTypes) { resultsDue = append(resultsDue, result) } } @@ -91,16 +90,18 @@ func (results BoxCheckResults) SendNotifications() error { } } - // update cache (also with CheckOk results to reset status) - notifyLog.Debug("updating cache") - cacheError := updateCache(box, resultsBox) - if cacheError != nil { - notifyLog.Error("could not cache notification results: ", cacheError) - errs = append(errs, cacheError.Error()) + // update cache (with /all/ changed results to reset status) + if useCache { + notifyLog.Debug("updating cache") + cacheError := updateCache(box, resultsBox) + if cacheError != nil { + notifyLog.Error("could not cache notification results: ", cacheError) + errs = append(errs, cacheError.Error()) + } } if len(resultsDue) != 0 { - notifyLog.Infof("Sent notification for %s via %s with %v new issues", box.Name, transport, len(resultsDue)) + notifyLog.Infof("Sent notification for %s via %s with %v updated issues", box.Name, transport, len(resultsDue)) } }