improve error handling

keep checking remaining boxes when error occurs
develop
noerw 6 years ago
parent 9c063a4659
commit 4a1ae551b2

@ -48,6 +48,7 @@ var watchBoxesCmd = &cobra.Command{
<-ticker
err = checkAndNotify(args)
if err != nil {
// we already did retries, so exiting seems appropriate
return err
}
}

@ -1,6 +1,9 @@
package core
import (
"fmt"
"strings"
log "github.com/sirupsen/logrus"
"github.com/spf13/viper"
)
@ -45,29 +48,35 @@ func CheckBoxes(boxIds []string, defaultConf *NotifyConfig) (BoxCheckResults, er
log.Debug("Checking notifications for ", len(boxIds), " box(es)")
results := BoxCheckResults{}
errs := []string{}
// TODO: check boxes in parallel, capped at 5 at once
for _, boxId := range boxIds {
// TODO: check boxes in parallel, capped at 5 at once
boxLogger := log.WithField("boxId", boxId)
boxLogger.Info("checking box for events")
box, res, err := checkBox(boxId, defaultConf)
if err != nil {
return nil, err
boxLogger.Errorf("could not run checks on box %s: %s", boxId, err)
errs = append(errs, err.Error())
continue
}
results[box] = res
}
if len(errs) != 0 {
return results, fmt.Errorf(strings.Join(errs, "\n"))
}
return results, nil
}
func checkBox(boxId string, defaultConf *NotifyConfig) (*Box, []CheckResult, error) {
boxLogger := log.WithFields(log.Fields{"boxId": boxId})
boxLogger.Info("checking box for events")
osem := NewOsemClient(viper.GetString("api"))
// get box data
box, err := osem.GetBox(boxId)
if err != nil {
boxLogger.Error(err)
return nil, nil, err
}
@ -79,7 +88,6 @@ func checkBox(boxId string, defaultConf *NotifyConfig) (*Box, []CheckResult, err
// run checks
results, err2 := box.RunChecks()
if err2 != nil {
boxLogger.Error("could not run checks on box: ", err2)
return box, results, err2
}

@ -2,6 +2,8 @@ package core
import (
"fmt"
"strings"
"time"
log "github.com/sirupsen/logrus"
"github.com/spf13/viper"
@ -39,9 +41,8 @@ func (box Box) GetNotifier() (AbstractNotifier, error) {
}
func (results BoxCheckResults) SendNotifications() error {
// FIXME: don't return on errors, process all boxes first!
// FIXME: only update cache when notifications sent successfully
results = results.FilterChangedFromCache(false)
errs := []string{}
n := results.Size()
if n == 0 {
@ -51,6 +52,7 @@ func (results BoxCheckResults) SendNotifications() error {
log.Infof("Notifying for %v checks turned bad in total...", results.Size())
}
// FIXME: only update cache when notifications sent successfully
for box, resultsDue := range results {
if len(resultsDue) == 0 {
continue
@ -62,20 +64,33 @@ func (results BoxCheckResults) SendNotifications() error {
"transport": transport,
})
notifier, err2 := box.GetNotifier()
if err2 != nil {
notifyLog.Error(err2)
return err2
notifier, err := box.GetNotifier()
if err != nil {
notifyLog.Error(err)
errs = append(errs, err.Error())
continue
}
notification := notifier.ComposeNotification(box, resultsDue)
err3 := notifier.Submit(notification)
if err3 != nil {
notifyLog.Error(err3)
return err3
var submitErr error
submitErr = notifier.Submit(notification)
for retry := 1; submitErr != nil && retry < 3; retry++ {
time.Sleep(10 * time.Second)
notifyLog.Debugf("trying to submit (retry %v)", retry)
}
if submitErr != nil {
notifyLog.Error(submitErr)
errs = append(errs, submitErr.Error())
continue
}
notifyLog.Infof("Sent notification for %s via %s with %v new issues", box.Name, transport, len(resultsDue))
}
if len(errs) != 0 {
return fmt.Errorf(strings.Join(errs, "\n"))
}
return nil
}

Loading…
Cancel
Save