299 lines
6.9 KiB
Go
299 lines
6.9 KiB
Go
package watchdog
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"net"
|
|
"net/http"
|
|
"net/url"
|
|
"os/exec"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
type Dog struct {
|
|
Name string
|
|
CheckURL string
|
|
Keywords string
|
|
Recover string
|
|
Webhooks []string
|
|
AllWebhooks map[string]ConfigWebhook
|
|
Logger chan string
|
|
error error
|
|
failures int
|
|
passes int
|
|
lastFailed time.Time
|
|
lastPassed time.Time
|
|
lastNotified time.Time
|
|
}
|
|
|
|
func New(d *Dog) *Dog {
|
|
d.lastPassed = time.Now().Add(-5 * time.Minute)
|
|
return d
|
|
}
|
|
|
|
func (d *Dog) Watch() {
|
|
d.watch()
|
|
for {
|
|
// TODO set cancellable callback ?
|
|
time.Sleep(5 * time.Minute)
|
|
d.watch()
|
|
}
|
|
}
|
|
|
|
func (d *Dog) watch() {
|
|
d.Logger <- fmt.Sprintf("Check: '%s'", d.Name)
|
|
|
|
err := d.check()
|
|
if nil == err {
|
|
return
|
|
}
|
|
|
|
time.Sleep(time.Duration(2) * time.Second)
|
|
err2 := d.check()
|
|
if nil != err2 {
|
|
d.Logger <- fmt.Sprintf("Down: '%s': %s", d.Name, err2)
|
|
} else {
|
|
d.Logger <- fmt.Sprintf("Hiccup: '%s': %s", d.Name, err)
|
|
return
|
|
}
|
|
|
|
failure := false
|
|
t := 10
|
|
for {
|
|
d.recover()
|
|
time.Sleep(time.Duration(t) * time.Second)
|
|
// backoff
|
|
t *= 2
|
|
err := d.check()
|
|
if nil != err {
|
|
d.Logger <- fmt.Sprintf("Unrecoverable: '%s': %s", d.Name, err)
|
|
failure = true
|
|
} else {
|
|
failure = false
|
|
}
|
|
|
|
// We should notify if
|
|
// * We've had success since the last notification
|
|
// * It's been at least 5 minutes since the last notification
|
|
fiveMinutesAgo := time.Now().Add(-5 * time.Minute)
|
|
if d.lastPassed.After(d.lastNotified) && d.lastNotified.Before(fiveMinutesAgo) {
|
|
d.notify(failure)
|
|
}
|
|
if !failure || d.failures >= 5 {
|
|
// go back to the main 5-minute loop
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
func (d *Dog) check() error {
|
|
var err error
|
|
defer func() {
|
|
if nil != err {
|
|
d.failures += 1
|
|
d.lastFailed = time.Now()
|
|
} else {
|
|
d.lastPassed = time.Now()
|
|
d.passes += 1
|
|
}
|
|
}()
|
|
|
|
client := NewHTTPClient()
|
|
response, err := client.Get(d.CheckURL)
|
|
if nil != err {
|
|
d.error = fmt.Errorf("Connection Failure: " + err.Error())
|
|
return err
|
|
}
|
|
|
|
b, err := ioutil.ReadAll(response.Body)
|
|
if nil != err {
|
|
d.error = fmt.Errorf("Network Failure: " + err.Error())
|
|
return err
|
|
}
|
|
|
|
if !bytes.Contains(b, []byte(d.Keywords)) {
|
|
err = fmt.Errorf("Down: '%s' Not Found for '%s'", d.Keywords, d.Name)
|
|
d.Logger <- fmt.Sprintf("%s", err)
|
|
d.error = err
|
|
return err
|
|
} else {
|
|
d.Logger <- fmt.Sprintf("Up: '%s'", d.Name)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (d *Dog) recover() {
|
|
if "" == d.Recover {
|
|
return
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
|
cmd := exec.CommandContext(ctx, "bash")
|
|
pipe, err := cmd.StdinPipe()
|
|
pipe.Write([]byte(d.Recover))
|
|
if nil != err {
|
|
d.Logger <- fmt.Sprintf("[Recover] Could not write to bash '%s': %s", d.Recover, err)
|
|
}
|
|
err = cmd.Start()
|
|
if nil != err {
|
|
d.Logger <- fmt.Sprintf("[Recover] Could not start '%s': %s", d.Recover, err)
|
|
}
|
|
err = pipe.Close()
|
|
if nil != err {
|
|
d.Logger <- fmt.Sprintf("[Recover] Could not close '%s': %s", d.Recover, err)
|
|
}
|
|
err = cmd.Wait()
|
|
cancel()
|
|
if nil != err {
|
|
d.Logger <- fmt.Sprintf("[Recover] '%s' failed for '%s': %s", d.Recover, d.Name, err)
|
|
}
|
|
}
|
|
|
|
func (d *Dog) notify(hardFail bool) {
|
|
d.Logger <- fmt.Sprintf("Notifying the authorities of %s's failure", d.Name)
|
|
d.lastNotified = time.Now()
|
|
|
|
for i := range d.Webhooks {
|
|
name := d.Webhooks[i]
|
|
if "" == name {
|
|
continue
|
|
}
|
|
|
|
h, ok := d.AllWebhooks[name]
|
|
if !ok {
|
|
// TODO check in main when config is read
|
|
d.Webhooks[i] = ""
|
|
d.Logger <- fmt.Sprintf("[Warning] Could not find webhook '%s' for '%s'", name, h.Name)
|
|
continue
|
|
}
|
|
|
|
// TODO do this in main on config init
|
|
if "" == h.Method {
|
|
h.Method = "POST"
|
|
}
|
|
|
|
var body *strings.Reader
|
|
var err error
|
|
// TODO real templates
|
|
if 0 != len(h.Form) {
|
|
form := url.Values{}
|
|
for k := range h.Form {
|
|
v := h.Form[k]
|
|
// because `{{` gets urlencoded
|
|
//k = strings.Replace(k, "{{ .Name }}", d.Name, -1)
|
|
v = strings.Replace(v, "{{ .Name }}", d.Name, -1)
|
|
d.Logger <- fmt.Sprintf("[HEADER] %s: %s", k, v)
|
|
form.Set(k, v)
|
|
}
|
|
body = strings.NewReader(form.Encode())
|
|
} else if 0 != len(h.JSON) {
|
|
bodyBuf, err := json.Marshal(h.JSON)
|
|
if nil != err {
|
|
d.Logger <- fmt.Sprintf("[Notify] JSON Marshal Error for '%s': %s", h.Name, err)
|
|
continue
|
|
}
|
|
// `{{` should be left alone
|
|
body = strings.NewReader(strings.Replace(string(bodyBuf), "{{ .Name }}", d.Name, -1))
|
|
}
|
|
|
|
client := NewHTTPClient()
|
|
req, err := http.NewRequest(h.Method, h.URL, body)
|
|
if nil != err {
|
|
d.Logger <- fmt.Sprintf("[Notify] HTTP Client Network Error for '%s': %s", h.Name, err)
|
|
continue
|
|
}
|
|
|
|
if 0 != len(h.Form) {
|
|
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
|
} else if 0 != len(h.JSON) {
|
|
req.Header.Set("Content-Type", "application/json")
|
|
}
|
|
|
|
if 0 != len(h.Auth) {
|
|
user := h.Auth["user"]
|
|
if "" == user {
|
|
user = h.Auth["username"]
|
|
}
|
|
pass := h.Auth["pass"]
|
|
if "" == user {
|
|
pass = h.Auth["password"]
|
|
}
|
|
req.SetBasicAuth(user, pass)
|
|
}
|
|
|
|
req.Header.Set("User-Agent", "Watchdog/1.0")
|
|
for k := range h.Headers {
|
|
req.Header.Set(k, h.Headers[k])
|
|
}
|
|
|
|
resp, err := client.Do(req)
|
|
if nil != err {
|
|
d.Logger <- fmt.Sprintf("[Notify] HTTP Client Error for '%s': %s", h.Name, err)
|
|
continue
|
|
}
|
|
|
|
if !(resp.StatusCode >= 200 && resp.StatusCode < 300) {
|
|
d.Logger <- fmt.Sprintf("[Notify] Response Error for '%s': %s", h.Name, resp.Status)
|
|
continue
|
|
}
|
|
|
|
// TODO json vs xml vs txt
|
|
var data map[string]interface{}
|
|
req.Header.Add("Accept", "application/json")
|
|
decoder := json.NewDecoder(resp.Body)
|
|
err = decoder.Decode(&data)
|
|
if err != nil {
|
|
d.Logger <- fmt.Sprintf("[Notify] Response Body Error for '%s': %s", h.Name, resp.Status)
|
|
continue
|
|
}
|
|
|
|
// TODO some sort of way to determine if data is successful (keywords)
|
|
d.Logger <- fmt.Sprintf("[Notify] Success? %#v", data)
|
|
}
|
|
}
|
|
|
|
type Config struct {
|
|
Watches []ConfigWatch `json:"watches"`
|
|
Webhooks []ConfigWebhook `json:"webhooks"`
|
|
}
|
|
|
|
type ConfigWatch struct {
|
|
Name string `json:"name"`
|
|
URL string `json:"url"`
|
|
Keywords string `json:"keywords"`
|
|
Webhooks []string `json:"webhooks"`
|
|
RecoverScript string `json:"recover_script"`
|
|
}
|
|
|
|
type ConfigWebhook struct {
|
|
Name string `json:"name"`
|
|
Method string `json:"method"`
|
|
URL string `json:"url"`
|
|
Auth map[string]string `json:"auth"`
|
|
Headers map[string]string `json:"headers"`
|
|
Form map[string]string `json:"form"`
|
|
JSON map[string]string `json:"json"`
|
|
Config map[string]string `json:"config"`
|
|
Configs []map[string]string `json:"configs"`
|
|
}
|
|
|
|
// The default http client uses unsafe defaults
|
|
func NewHTTPClient() *http.Client {
|
|
transport := &http.Transport{
|
|
Dial: (&net.Dialer{
|
|
Timeout: 10 * time.Second,
|
|
}).Dial,
|
|
TLSHandshakeTimeout: 5 * time.Second,
|
|
}
|
|
client := &http.Client{
|
|
Timeout: time.Second * 5,
|
|
Transport: transport,
|
|
}
|
|
return client
|
|
}
|