Merge branch 'develop'
This commit is contained in:
commit
794ea5369d
13 changed files with 207 additions and 72 deletions
|
@ -5,7 +5,7 @@ Cross platform command line application to run health checks against sensor stat
|
|||
This tool lets you automatically check if senseBoxes are still runnning correctly,
|
||||
and when that's not the case, notifies you.
|
||||
Currently, email notifications are implemented, but other transports can be added easily.
|
||||
Implemented health checks are [described below](#possible-values-for-defaulthealthchecksevents), and new ones can be added just as easily (given some knowledge of programming).
|
||||
Implemented health checks are [described below](#available-healthchecks), and new ones can be added just as easily (given some knowledge of programming).
|
||||
|
||||
The tool has multiple modes of operation:
|
||||
|
||||
|
@ -61,7 +61,7 @@ Contributions are welcome!
|
|||
Check out the following locations for plugging in new functionality:
|
||||
|
||||
- new notification transports: [core/notifiers.go](core/notifiers.go)
|
||||
- new health checks: [core/healthcheck*.go](core/healtchecks.go)
|
||||
- new health checks: [core/healthcheck*.go](core/healthchecks.go)
|
||||
- new commands: [cmd/](cmd/)
|
||||
|
||||
Before committing and submitting a pull request, please run `go fmt ./ cmd/ core/`.
|
||||
|
|
|
@ -12,8 +12,14 @@ import (
|
|||
"github.com/noerw/osem_notify/utils"
|
||||
)
|
||||
|
||||
var (
|
||||
clearCache bool
|
||||
)
|
||||
|
||||
func init() {
|
||||
debugCmd.AddCommand(debugNotificationsCmd)
|
||||
debugCacheCmd.PersistentFlags().BoolVarP(&clearCache, "clear", "", false, "reset the notifications cache")
|
||||
debugCmd.AddCommand(debugCacheCmd)
|
||||
rootCmd.AddCommand(debugCmd)
|
||||
}
|
||||
|
||||
|
@ -32,13 +38,28 @@ var debugCmd = &cobra.Command{
|
|||
},
|
||||
}
|
||||
|
||||
var debugCacheCmd = &cobra.Command{
|
||||
Use: "cache",
|
||||
Short: "Print or clear the notifications cache",
|
||||
Long: "osem_notify debug cache prints the contents of the notifications cache",
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
if clearCache {
|
||||
return core.ClearCache()
|
||||
}
|
||||
core.PrintCache()
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
var debugNotificationsCmd = &cobra.Command{
|
||||
Use: "notifications",
|
||||
Short: "Verify that notifications are working",
|
||||
Long: "osem_notify debug <feature> tests the functionality of the given feature",
|
||||
Long: `osem_notify debug notifications sends a test notification according
|
||||
to healthchecks.default.notifications.options as defined in the config file`,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
defaultNotifyConf := &core.NotifyConfig{}
|
||||
err := viper.UnmarshalKey("defaultHealthchecks", defaultNotifyConf)
|
||||
err := viper.UnmarshalKey("healthchecks.default", defaultNotifyConf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -55,11 +76,11 @@ var debugNotificationsCmd = &cobra.Command{
|
|||
|
||||
host, _ := os.Hostname()
|
||||
err = n.Submit(core.Notification{
|
||||
Subject: "Test notification from opeSenseMap notifier",
|
||||
Subject: "Test notification from openSenseMap notifier",
|
||||
Body: fmt.Sprintf("Your notification set up on %s is working fine!", host),
|
||||
})
|
||||
if err != nil {
|
||||
notLog.Warnf("could not submit test notification for %s notifier!", transport)
|
||||
notLog.Warnf("could not submit test notification for %s notifier: %s", transport, err)
|
||||
continue
|
||||
}
|
||||
notLog.Info("Test notification (successfully?) submitted, check the specified inbox")
|
|
@ -23,14 +23,14 @@ var configHelpCmd = &cobra.Command{
|
|||
|
||||
> Example configuration:
|
||||
|
||||
# override default health checks:
|
||||
defaultHealthchecks:
|
||||
healthchecks:
|
||||
# override default health checks for all boxes
|
||||
default:
|
||||
notifications:
|
||||
transport: email
|
||||
options:
|
||||
recipients:
|
||||
- fridolina@example.com
|
||||
- ruth.less@example.com
|
||||
events:
|
||||
- type: "measurement_age"
|
||||
target: "all" # all sensors
|
||||
|
@ -39,6 +39,17 @@ var configHelpCmd = &cobra.Command{
|
|||
target: "all"
|
||||
threshold: ""
|
||||
|
||||
# set health checks per box
|
||||
593bcd656ccf3b0011791f5a:
|
||||
notifications:
|
||||
options:
|
||||
recipients:
|
||||
- ruth.less@example.com
|
||||
events:
|
||||
- type: "measurement_max"
|
||||
target: "593bcd656ccf3b0011791f5b"
|
||||
threshold: "40"
|
||||
|
||||
# only needed when sending notifications via email
|
||||
email:
|
||||
host: smtp.example.com
|
||||
|
@ -48,14 +59,14 @@ var configHelpCmd = &cobra.Command{
|
|||
from: hildegunst@example.com
|
||||
|
||||
|
||||
> possible values for defaultHealthchecks.notifications:
|
||||
> possible values for healthchecks.*.notifications:
|
||||
|
||||
transport | options
|
||||
----------|-------------------------------------
|
||||
email | recipients: list of email addresses
|
||||
|
||||
|
||||
> possible values for defaultHealthchecks.events[]:
|
||||
> possible values for healthchecks.*.events[]:
|
||||
|
||||
type | description
|
||||
-------------------|---------------------------------------------------
|
||||
|
@ -105,8 +116,9 @@ var cfgFile string
|
|||
|
||||
func init() {
|
||||
var (
|
||||
shouldNotify bool
|
||||
debug bool
|
||||
noCache bool
|
||||
shouldNotify string
|
||||
logFormat string
|
||||
api string
|
||||
)
|
||||
|
@ -117,14 +129,16 @@ func init() {
|
|||
rootCmd.PersistentFlags().StringVarP(&api, "api", "a", "https://api.opensensemap.org", "openSenseMap API to query against")
|
||||
rootCmd.PersistentFlags().StringVarP(&logFormat, "logformat", "l", "plain", "log format, can be plain or json")
|
||||
rootCmd.PersistentFlags().BoolVarP(&debug, "debug", "d", false, "enable verbose logging")
|
||||
rootCmd.PersistentFlags().BoolVarP(&shouldNotify, "notify", "n", false, `if set, will send out notifications,
|
||||
Otherwise results are printed to stdout only.
|
||||
rootCmd.PersistentFlags().StringVarP(&shouldNotify, "notify", "n", "", `If set, will send out notifications for the specified type of check result,
|
||||
otherwise results are printed to stdout only.
|
||||
Allowed values are "all", "error", "ok".
|
||||
You might want to run 'osem_notify debug notifications' first to verify everything works.
|
||||
|
||||
Notifications for failing checks are sent only once,
|
||||
and then cached until the issue got resolved.
|
||||
To clear the cache, delete the file ~/.osem_notify_cache.yaml.
|
||||
Notifications for failing checks are sent only once, and then cached until the issue got
|
||||
resolved, unless --no-cache is set.
|
||||
To clear the cache, run 'osem_notify debug cache --clear'.
|
||||
`)
|
||||
rootCmd.PersistentFlags().BoolVarP(&noCache, "no-cache", "", false, "send all notifications, ignoring results from previous runs. also don't update the cache.")
|
||||
|
||||
viper.BindPFlags(rootCmd.PersistentFlags()) // let flags override config
|
||||
|
|
@ -4,9 +4,11 @@ import (
|
|||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/noerw/osem_notify/utils"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/spf13/viper"
|
||||
|
||||
"github.com/noerw/osem_notify/core"
|
||||
"github.com/noerw/osem_notify/utils"
|
||||
)
|
||||
|
||||
// initConfig reads in config file and ENV variables if set.
|
||||
|
@ -58,3 +60,55 @@ func validateConfig() {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func getNotifyConf(boxID string) (*core.NotifyConfig, error) {
|
||||
// config used when no configuration is present at all
|
||||
conf := &core.NotifyConfig{
|
||||
Events: []core.NotifyEvent{
|
||||
core.NotifyEvent{
|
||||
Type: "measurement_age",
|
||||
Target: "all",
|
||||
Threshold: "15m",
|
||||
},
|
||||
core.NotifyEvent{
|
||||
Type: "measurement_faulty",
|
||||
Target: "all",
|
||||
Threshold: "",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// override with default configuration from file
|
||||
// considering the case that .events may be defined but empty
|
||||
// to allow to define no events, and don't leak shorter lists into
|
||||
// previous longer ones
|
||||
if keyDefined("healthchecks.default.events") {
|
||||
conf.Events = []core.NotifyEvent{}
|
||||
}
|
||||
err := viper.UnmarshalKey("healthchecks.default", conf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// override with per box configuration from file
|
||||
if keyDefined("healthchecks." + boxID + ".events") {
|
||||
conf.Events = []core.NotifyEvent{}
|
||||
}
|
||||
err = viper.UnmarshalKey("healthchecks."+boxID, conf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return conf, nil
|
||||
}
|
||||
|
||||
// implement our own keyCheck, as viper.InConfig() does not work
|
||||
func keyDefined(key string) bool {
|
||||
allConfKeys := viper.AllKeys()
|
||||
for _, k := range allConfKeys {
|
||||
if k == key {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@ package cmd
|
|||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/spf13/viper"
|
||||
|
@ -33,21 +34,38 @@ func BoxIdValidator(cmd *cobra.Command, args []string) error {
|
|||
}
|
||||
|
||||
func checkAndNotify(boxIds []string) error {
|
||||
defaultNotifyConf := &core.NotifyConfig{}
|
||||
err := viper.UnmarshalKey("defaultHealthchecks", defaultNotifyConf)
|
||||
boxLocalConfig := map[string]*core.NotifyConfig{}
|
||||
for _, boxID := range boxIds {
|
||||
c, err := getNotifyConf(boxID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
boxLocalConfig[boxID] = c
|
||||
}
|
||||
|
||||
results, err := core.CheckBoxes(boxIds, defaultNotifyConf)
|
||||
results, err := core.CheckBoxes(boxLocalConfig)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
results.Log()
|
||||
|
||||
if viper.GetBool("notify") {
|
||||
return results.SendNotifications()
|
||||
notify := strings.ToLower(viper.GetString("notify"))
|
||||
if notify != "" {
|
||||
types := []string{}
|
||||
switch notify {
|
||||
case "all":
|
||||
types = []string{core.CheckErr, core.CheckOk}
|
||||
case "error", "err":
|
||||
types = []string{core.CheckErr}
|
||||
case "ok":
|
||||
types = []string{core.CheckOk}
|
||||
default:
|
||||
return fmt.Errorf("invalid value %s for \"notify\"", notify)
|
||||
}
|
||||
|
||||
useCache := !viper.GetBool("no-cache")
|
||||
return results.SendNotifications(types, useCache)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -60,3 +60,18 @@ func updateCache(box *Box, results []CheckResult) error {
|
|||
}
|
||||
return cache.WriteConfig()
|
||||
}
|
||||
|
||||
func ClearCache() error {
|
||||
fileName := utils.GetConfigFile("osem_notify_cache")
|
||||
_, err := os.Stat(fileName)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return os.Remove(fileName)
|
||||
}
|
||||
|
||||
func PrintCache() {
|
||||
for key, val := range cache.AllSettings() {
|
||||
log.Infof("%20s: %v", key, val)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,11 +10,11 @@ import (
|
|||
|
||||
type BoxCheckResults map[*Box][]CheckResult
|
||||
|
||||
func (results BoxCheckResults) Size(status string) int {
|
||||
func (results BoxCheckResults) Size(statusToCheck []string) int {
|
||||
size := 0
|
||||
for _, boxResults := range results {
|
||||
for _, result := range boxResults {
|
||||
if status == result.Status || status == "" {
|
||||
if result.HasStatus(statusToCheck) {
|
||||
size++
|
||||
}
|
||||
}
|
||||
|
@ -42,24 +42,26 @@ func (results BoxCheckResults) Log() {
|
|||
countErr++
|
||||
}
|
||||
}
|
||||
if countErr == 0 {
|
||||
if len(boxResults) == 0 {
|
||||
boxLog.Infof("%s: no checks defined", box.Name)
|
||||
} else if countErr == 0 {
|
||||
boxLog.Infof("%s: all is fine!", box.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func CheckBoxes(boxIds []string, defaultConf *NotifyConfig) (BoxCheckResults, error) {
|
||||
log.Debug("Checking notifications for ", len(boxIds), " box(es)")
|
||||
func CheckBoxes(boxLocalConfs map[string]*NotifyConfig) (BoxCheckResults, error) {
|
||||
log.Debug("Checking notifications for ", len(boxLocalConfs), " box(es)")
|
||||
|
||||
results := BoxCheckResults{}
|
||||
errs := []string{}
|
||||
|
||||
// TODO: check boxes in parallel, capped at 5 at once
|
||||
for _, boxId := range boxIds {
|
||||
for boxId, localConf := range boxLocalConfs {
|
||||
boxLogger := log.WithField("boxId", boxId)
|
||||
boxLogger.Info("checking box for events")
|
||||
|
||||
box, res, err := checkBox(boxId, defaultConf)
|
||||
box, res, err := checkBox(boxId, localConf)
|
||||
if err != nil {
|
||||
boxLogger.Errorf("could not run checks on box %s: %s", boxId, err)
|
||||
errs = append(errs, err.Error())
|
||||
|
|
|
@ -5,8 +5,13 @@ import (
|
|||
"strconv"
|
||||
)
|
||||
|
||||
const (
|
||||
nameMin = "measurement_min"
|
||||
nameMax = "measurement_max"
|
||||
)
|
||||
|
||||
var checkMeasurementMin = checkType{
|
||||
name: "measurement_min",
|
||||
name: nameMin,
|
||||
toString: func(r CheckResult) string {
|
||||
return fmt.Sprintf("Sensor %s (%s) reads low value of %s", r.TargetName, r.Target, r.Value)
|
||||
},
|
||||
|
@ -14,7 +19,7 @@ var checkMeasurementMin = checkType{
|
|||
}
|
||||
|
||||
var checkMeasurementMax = checkType{
|
||||
name: "measurement_min",
|
||||
name: nameMax,
|
||||
toString: func(r CheckResult) string {
|
||||
return fmt.Sprintf("Sensor %s (%s) reads high value of %s", r.TargetName, r.Target, r.Value)
|
||||
},
|
||||
|
@ -41,8 +46,8 @@ func validateMeasurementMinMax(e NotifyEvent, s Sensor, b Box) (CheckResult, err
|
|||
return result, err
|
||||
}
|
||||
|
||||
if e.Type == eventMeasurementValMax && val > thresh ||
|
||||
e.Type == eventMeasurementValMin && val < thresh {
|
||||
if e.Type == nameMax && val > thresh ||
|
||||
e.Type == nameMin && val < thresh {
|
||||
result.Status = CheckErr
|
||||
}
|
||||
|
||||
|
|
|
@ -11,14 +11,10 @@ import (
|
|||
const (
|
||||
CheckOk = "OK"
|
||||
CheckErr = "FAILED"
|
||||
eventMeasurementAge = "measurement_age"
|
||||
eventMeasurementValMin = "measurement_min"
|
||||
eventMeasurementValMax = "measurement_max"
|
||||
eventMeasurementValFaulty = "measurement_faulty"
|
||||
eventTargetAll = "all" // if event.Target is this value, all sensors will be checked
|
||||
)
|
||||
|
||||
type checkType = struct {
|
||||
type checkType struct {
|
||||
name string // name that is used in config
|
||||
toString func(result CheckResult) string // error message when check failed
|
||||
checkFunc func(event NotifyEvent, sensor Sensor, context Box) (CheckResult, error)
|
||||
|
@ -41,6 +37,15 @@ type CheckResult struct {
|
|||
Threshold string
|
||||
}
|
||||
|
||||
func (r CheckResult) HasStatus(statusToCheck []string) bool {
|
||||
for _, status := range statusToCheck {
|
||||
if status == r.Status {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (r CheckResult) EventID() string {
|
||||
s := fmt.Sprintf("%s%s%s", r.Event, r.Target, r.Threshold)
|
||||
hasher := sha256.New()
|
||||
|
|
|
@ -39,25 +39,24 @@ func (box Box) GetNotifier() (AbstractNotifier, error) {
|
|||
return notifier.New(box.NotifyConf.Notifications.Options)
|
||||
}
|
||||
|
||||
func (results BoxCheckResults) SendNotifications() error {
|
||||
// TODO: expose flags to not use cache, and to notify for checks turned CheckOk as well
|
||||
|
||||
func (results BoxCheckResults) SendNotifications(notifyTypes []string, useCache bool) error {
|
||||
if useCache {
|
||||
results = results.filterChangedFromCache()
|
||||
}
|
||||
|
||||
nErr := results.Size(CheckErr)
|
||||
if nErr == 0 {
|
||||
toCheck := results.Size(notifyTypes)
|
||||
if toCheck == 0 {
|
||||
log.Info("No notifications due.")
|
||||
} else {
|
||||
log.Infof("Notifying for %v checks turned bad in total...", nErr)
|
||||
log.Infof("Notifying for %v checks changing state to %v...", toCheck, notifyTypes)
|
||||
}
|
||||
log.Debugf("%v checks turned OK!", results.Size(CheckOk))
|
||||
|
||||
errs := []string{}
|
||||
for box, resultsBox := range results {
|
||||
// only submit results which are errors
|
||||
resultsDue := []CheckResult{}
|
||||
for _, result := range resultsBox {
|
||||
if result.Status != CheckOk {
|
||||
if result.HasStatus(notifyTypes) {
|
||||
resultsDue = append(resultsDue, result)
|
||||
}
|
||||
}
|
||||
|
@ -91,16 +90,18 @@ func (results BoxCheckResults) SendNotifications() error {
|
|||
}
|
||||
}
|
||||
|
||||
// update cache (also with CheckOk results to reset status)
|
||||
// update cache (with /all/ changed results to reset status)
|
||||
if useCache {
|
||||
notifyLog.Debug("updating cache")
|
||||
cacheError := updateCache(box, resultsBox)
|
||||
if cacheError != nil {
|
||||
notifyLog.Error("could not cache notification results: ", cacheError)
|
||||
errs = append(errs, cacheError.Error())
|
||||
}
|
||||
}
|
||||
|
||||
if len(resultsDue) != 0 {
|
||||
notifyLog.Infof("Sent notification for %s via %s with %v new issues", box.Name, transport, len(resultsDue))
|
||||
notifyLog.Infof("Sent notification for %s via %s with %v updated issues", box.Name, transport, len(resultsDue))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue