Merge branch 'develop'

master v1.1.0
Norwin 6 years ago
commit 794ea5369d

@ -5,7 +5,7 @@ Cross platform command line application to run health checks against sensor stat
This tool lets you automatically check if senseBoxes are still runnning correctly,
and when that's not the case, notifies you.
Currently, email notifications are implemented, but other transports can be added easily.
Implemented health checks are [described below](#possible-values-for-defaulthealthchecksevents), and new ones can be added just as easily (given some knowledge of programming).
Implemented health checks are [described below](#available-healthchecks), and new ones can be added just as easily (given some knowledge of programming).
The tool has multiple modes of operation:
@ -61,7 +61,7 @@ Contributions are welcome!
Check out the following locations for plugging in new functionality:
- new notification transports: [core/notifiers.go](core/notifiers.go)
- new health checks: [core/healthcheck*.go](core/healtchecks.go)
- new health checks: [core/healthcheck*.go](core/healthchecks.go)
- new commands: [cmd/](cmd/)
Before committing and submitting a pull request, please run `go fmt ./ cmd/ core/`.

@ -12,8 +12,14 @@ import (
"github.com/noerw/osem_notify/utils"
)
var (
clearCache bool
)
func init() {
debugCmd.AddCommand(debugNotificationsCmd)
debugCacheCmd.PersistentFlags().BoolVarP(&clearCache, "clear", "", false, "reset the notifications cache")
debugCmd.AddCommand(debugCacheCmd)
rootCmd.AddCommand(debugCmd)
}
@ -32,13 +38,28 @@ var debugCmd = &cobra.Command{
},
}
var debugCacheCmd = &cobra.Command{
Use: "cache",
Short: "Print or clear the notifications cache",
Long: "osem_notify debug cache prints the contents of the notifications cache",
RunE: func(cmd *cobra.Command, args []string) error {
if clearCache {
return core.ClearCache()
}
core.PrintCache()
return nil
},
}
var debugNotificationsCmd = &cobra.Command{
Use: "notifications",
Short: "Verify that notifications are working",
Long: "osem_notify debug <feature> tests the functionality of the given feature",
Long: `osem_notify debug notifications sends a test notification according
to healthchecks.default.notifications.options as defined in the config file`,
RunE: func(cmd *cobra.Command, args []string) error {
defaultNotifyConf := &core.NotifyConfig{}
err := viper.UnmarshalKey("defaultHealthchecks", defaultNotifyConf)
err := viper.UnmarshalKey("healthchecks.default", defaultNotifyConf)
if err != nil {
return err
}
@ -55,11 +76,11 @@ var debugNotificationsCmd = &cobra.Command{
host, _ := os.Hostname()
err = n.Submit(core.Notification{
Subject: "Test notification from opeSenseMap notifier",
Subject: "Test notification from openSenseMap notifier",
Body: fmt.Sprintf("Your notification set up on %s is working fine!", host),
})
if err != nil {
notLog.Warnf("could not submit test notification for %s notifier!", transport)
notLog.Warnf("could not submit test notification for %s notifier: %s", transport, err)
continue
}
notLog.Info("Test notification (successfully?) submitted, check the specified inbox")

@ -23,21 +23,32 @@ var configHelpCmd = &cobra.Command{
> Example configuration:
# override default health checks:
defaultHealthchecks:
notifications:
transport: email
options:
recipients:
- fridolina@example.com
- ruth.less@example.com
events:
- type: "measurement_age"
target: "all" # all sensors
threshold: "15m" # any duration
- type: "measurement_faulty"
target: "all"
threshold: ""
healthchecks:
# override default health checks for all boxes
default:
notifications:
transport: email
options:
recipients:
- fridolina@example.com
events:
- type: "measurement_age"
target: "all" # all sensors
threshold: "15m" # any duration
- type: "measurement_faulty"
target: "all"
threshold: ""
# set health checks per box
593bcd656ccf3b0011791f5a:
notifications:
options:
recipients:
- ruth.less@example.com
events:
- type: "measurement_max"
target: "593bcd656ccf3b0011791f5b"
threshold: "40"
# only needed when sending notifications via email
email:
@ -48,14 +59,14 @@ var configHelpCmd = &cobra.Command{
from: hildegunst@example.com
> possible values for defaultHealthchecks.notifications:
> possible values for healthchecks.*.notifications:
transport | options
----------|-------------------------------------
email | recipients: list of email addresses
> possible values for defaultHealthchecks.events[]:
> possible values for healthchecks.*.events[]:
type | description
-------------------|---------------------------------------------------
@ -105,8 +116,9 @@ var cfgFile string
func init() {
var (
shouldNotify bool
debug bool
noCache bool
shouldNotify string
logFormat string
api string
)
@ -117,14 +129,16 @@ func init() {
rootCmd.PersistentFlags().StringVarP(&api, "api", "a", "https://api.opensensemap.org", "openSenseMap API to query against")
rootCmd.PersistentFlags().StringVarP(&logFormat, "logformat", "l", "plain", "log format, can be plain or json")
rootCmd.PersistentFlags().BoolVarP(&debug, "debug", "d", false, "enable verbose logging")
rootCmd.PersistentFlags().BoolVarP(&shouldNotify, "notify", "n", false, `if set, will send out notifications,
Otherwise results are printed to stdout only.
rootCmd.PersistentFlags().StringVarP(&shouldNotify, "notify", "n", "", `If set, will send out notifications for the specified type of check result,
otherwise results are printed to stdout only.
Allowed values are "all", "error", "ok".
You might want to run 'osem_notify debug notifications' first to verify everything works.
Notifications for failing checks are sent only once,
and then cached until the issue got resolved.
To clear the cache, delete the file ~/.osem_notify_cache.yaml.
Notifications for failing checks are sent only once, and then cached until the issue got
resolved, unless --no-cache is set.
To clear the cache, run 'osem_notify debug cache --clear'.
`)
rootCmd.PersistentFlags().BoolVarP(&noCache, "no-cache", "", false, "send all notifications, ignoring results from previous runs. also don't update the cache.")
viper.BindPFlags(rootCmd.PersistentFlags()) // let flags override config

@ -4,9 +4,11 @@ import (
"os"
"strings"
"github.com/noerw/osem_notify/utils"
log "github.com/sirupsen/logrus"
"github.com/spf13/viper"
"github.com/noerw/osem_notify/core"
"github.com/noerw/osem_notify/utils"
)
// initConfig reads in config file and ENV variables if set.
@ -58,3 +60,55 @@ func validateConfig() {
}
}
}
func getNotifyConf(boxID string) (*core.NotifyConfig, error) {
// config used when no configuration is present at all
conf := &core.NotifyConfig{
Events: []core.NotifyEvent{
core.NotifyEvent{
Type: "measurement_age",
Target: "all",
Threshold: "15m",
},
core.NotifyEvent{
Type: "measurement_faulty",
Target: "all",
Threshold: "",
},
},
}
// override with default configuration from file
// considering the case that .events may be defined but empty
// to allow to define no events, and don't leak shorter lists into
// previous longer ones
if keyDefined("healthchecks.default.events") {
conf.Events = []core.NotifyEvent{}
}
err := viper.UnmarshalKey("healthchecks.default", conf)
if err != nil {
return nil, err
}
// override with per box configuration from file
if keyDefined("healthchecks." + boxID + ".events") {
conf.Events = []core.NotifyEvent{}
}
err = viper.UnmarshalKey("healthchecks."+boxID, conf)
if err != nil {
return nil, err
}
return conf, nil
}
// implement our own keyCheck, as viper.InConfig() does not work
func keyDefined(key string) bool {
allConfKeys := viper.AllKeys()
for _, k := range allConfKeys {
if k == key {
return true
}
}
return false
}

@ -3,6 +3,7 @@ package cmd
import (
"fmt"
"regexp"
"strings"
"github.com/spf13/cobra"
"github.com/spf13/viper"
@ -33,21 +34,38 @@ func BoxIdValidator(cmd *cobra.Command, args []string) error {
}
func checkAndNotify(boxIds []string) error {
defaultNotifyConf := &core.NotifyConfig{}
err := viper.UnmarshalKey("defaultHealthchecks", defaultNotifyConf)
if err != nil {
return err
boxLocalConfig := map[string]*core.NotifyConfig{}
for _, boxID := range boxIds {
c, err := getNotifyConf(boxID)
if err != nil {
return err
}
boxLocalConfig[boxID] = c
}
results, err := core.CheckBoxes(boxIds, defaultNotifyConf)
results, err := core.CheckBoxes(boxLocalConfig)
if err != nil {
return err
}
results.Log()
if viper.GetBool("notify") {
return results.SendNotifications()
notify := strings.ToLower(viper.GetString("notify"))
if notify != "" {
types := []string{}
switch notify {
case "all":
types = []string{core.CheckErr, core.CheckOk}
case "error", "err":
types = []string{core.CheckErr}
case "ok":
types = []string{core.CheckOk}
default:
return fmt.Errorf("invalid value %s for \"notify\"", notify)
}
useCache := !viper.GetBool("no-cache")
return results.SendNotifications(types, useCache)
}
return nil
}

@ -60,3 +60,18 @@ func updateCache(box *Box, results []CheckResult) error {
}
return cache.WriteConfig()
}
func ClearCache() error {
fileName := utils.GetConfigFile("osem_notify_cache")
_, err := os.Stat(fileName)
if err != nil {
return nil
}
return os.Remove(fileName)
}
func PrintCache() {
for key, val := range cache.AllSettings() {
log.Infof("%20s: %v", key, val)
}
}

@ -10,11 +10,11 @@ import (
type BoxCheckResults map[*Box][]CheckResult
func (results BoxCheckResults) Size(status string) int {
func (results BoxCheckResults) Size(statusToCheck []string) int {
size := 0
for _, boxResults := range results {
for _, result := range boxResults {
if status == result.Status || status == "" {
if result.HasStatus(statusToCheck) {
size++
}
}
@ -42,24 +42,26 @@ func (results BoxCheckResults) Log() {
countErr++
}
}
if countErr == 0 {
if len(boxResults) == 0 {
boxLog.Infof("%s: no checks defined", box.Name)
} else if countErr == 0 {
boxLog.Infof("%s: all is fine!", box.Name)
}
}
}
func CheckBoxes(boxIds []string, defaultConf *NotifyConfig) (BoxCheckResults, error) {
log.Debug("Checking notifications for ", len(boxIds), " box(es)")
func CheckBoxes(boxLocalConfs map[string]*NotifyConfig) (BoxCheckResults, error) {
log.Debug("Checking notifications for ", len(boxLocalConfs), " box(es)")
results := BoxCheckResults{}
errs := []string{}
// TODO: check boxes in parallel, capped at 5 at once
for _, boxId := range boxIds {
for boxId, localConf := range boxLocalConfs {
boxLogger := log.WithField("boxId", boxId)
boxLogger.Info("checking box for events")
box, res, err := checkBox(boxId, defaultConf)
box, res, err := checkBox(boxId, localConf)
if err != nil {
boxLogger.Errorf("could not run checks on box %s: %s", boxId, err)
errs = append(errs, err.Error())

@ -5,8 +5,13 @@ import (
"strconv"
)
const (
nameMin = "measurement_min"
nameMax = "measurement_max"
)
var checkMeasurementMin = checkType{
name: "measurement_min",
name: nameMin,
toString: func(r CheckResult) string {
return fmt.Sprintf("Sensor %s (%s) reads low value of %s", r.TargetName, r.Target, r.Value)
},
@ -14,7 +19,7 @@ var checkMeasurementMin = checkType{
}
var checkMeasurementMax = checkType{
name: "measurement_min",
name: nameMax,
toString: func(r CheckResult) string {
return fmt.Sprintf("Sensor %s (%s) reads high value of %s", r.TargetName, r.Target, r.Value)
},
@ -41,8 +46,8 @@ func validateMeasurementMinMax(e NotifyEvent, s Sensor, b Box) (CheckResult, err
return result, err
}
if e.Type == eventMeasurementValMax && val > thresh ||
e.Type == eventMeasurementValMin && val < thresh {
if e.Type == nameMax && val > thresh ||
e.Type == nameMin && val < thresh {
result.Status = CheckErr
}

@ -9,16 +9,12 @@ import (
)
const (
CheckOk = "OK"
CheckErr = "FAILED"
eventMeasurementAge = "measurement_age"
eventMeasurementValMin = "measurement_min"
eventMeasurementValMax = "measurement_max"
eventMeasurementValFaulty = "measurement_faulty"
eventTargetAll = "all" // if event.Target is this value, all sensors will be checked
CheckOk = "OK"
CheckErr = "FAILED"
eventTargetAll = "all" // if event.Target is this value, all sensors will be checked
)
type checkType = struct {
type checkType struct {
name string // name that is used in config
toString func(result CheckResult) string // error message when check failed
checkFunc func(event NotifyEvent, sensor Sensor, context Box) (CheckResult, error)
@ -41,6 +37,15 @@ type CheckResult struct {
Threshold string
}
func (r CheckResult) HasStatus(statusToCheck []string) bool {
for _, status := range statusToCheck {
if status == r.Status {
return true
}
}
return false
}
func (r CheckResult) EventID() string {
s := fmt.Sprintf("%s%s%s", r.Event, r.Target, r.Threshold)
hasher := sha256.New()

@ -39,25 +39,24 @@ func (box Box) GetNotifier() (AbstractNotifier, error) {
return notifier.New(box.NotifyConf.Notifications.Options)
}
func (results BoxCheckResults) SendNotifications() error {
// TODO: expose flags to not use cache, and to notify for checks turned CheckOk as well
results = results.filterChangedFromCache()
func (results BoxCheckResults) SendNotifications(notifyTypes []string, useCache bool) error {
if useCache {
results = results.filterChangedFromCache()
}
nErr := results.Size(CheckErr)
if nErr == 0 {
toCheck := results.Size(notifyTypes)
if toCheck == 0 {
log.Info("No notifications due.")
} else {
log.Infof("Notifying for %v checks turned bad in total...", nErr)
log.Infof("Notifying for %v checks changing state to %v...", toCheck, notifyTypes)
}
log.Debugf("%v checks turned OK!", results.Size(CheckOk))
errs := []string{}
for box, resultsBox := range results {
// only submit results which are errors
resultsDue := []CheckResult{}
for _, result := range resultsBox {
if result.Status != CheckOk {
if result.HasStatus(notifyTypes) {
resultsDue = append(resultsDue, result)
}
}
@ -91,16 +90,18 @@ func (results BoxCheckResults) SendNotifications() error {
}
}
// update cache (also with CheckOk results to reset status)
notifyLog.Debug("updating cache")
cacheError := updateCache(box, resultsBox)
if cacheError != nil {
notifyLog.Error("could not cache notification results: ", cacheError)
errs = append(errs, cacheError.Error())
// update cache (with /all/ changed results to reset status)
if useCache {
notifyLog.Debug("updating cache")
cacheError := updateCache(box, resultsBox)
if cacheError != nil {
notifyLog.Error("could not cache notification results: ", cacheError)
errs = append(errs, cacheError.Error())
}
}
if len(resultsDue) != 0 {
notifyLog.Infof("Sent notification for %s via %s with %v new issues", box.Name, transport, len(resultsDue))
notifyLog.Infof("Sent notification for %s via %s with %v updated issues", box.Name, transport, len(resultsDue))
}
}

Loading…
Cancel
Save