go-watchdog/watchdog.go

275 lines
5.5 KiB
Go
Raw Normal View History

2019-06-08 02:37:53 +00:00
package main
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net"
"net/http"
"os"
"os/exec"
"time"
)
func usage() {
fmt.Println("Usage: go run watchdog.go -c dog.json")
}
func main() {
if 3 != len(os.Args) {
usage()
os.Exit(1)
return
}
if "-c" != os.Args[1] {
usage()
os.Exit(1)
return
}
filename := os.Args[2]
f, err := os.Open(filename)
if nil != err {
log.Fatal(err)
return
}
configFile, err := ioutil.ReadAll(f)
if nil != err {
log.Fatal(err)
return
}
config := &Config{}
err = json.Unmarshal(configFile, config)
if nil != err {
log.Fatal(err)
return
}
//fmt.Printf("%#v\n", config)
done := make(chan struct{}, 1)
for i := range config.Watches {
c := config.Watches[i]
fmt.Printf("Watching '%s'", c.Name)
go func(c ConfigWatch) {
w := &Dog{
Name: c.Name,
CheckURL: c.URL,
Keywords: c.Keywords,
Recover: c.RecoverScript,
}
w.Watch()
}(config.Watches[i])
}
if 0 == len(config.Watches) {
log.Fatal("Nothing to watch")
return
}
<-done
}
type Dog struct {
Name string
CheckURL string
Keywords string
Recover string
error error
failures int
passes int
lastFailed time.Time
lastPassed time.Time
lastNotified time.Time
}
func (w *Dog) execRecover() {
if "" == w.Recover {
return
}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
cmd := exec.CommandContext(ctx, "bash")
pipe, err := cmd.StdinPipe()
pipe.Write([]byte(w.Recover))
if nil != err {
fmt.Fprintf(os.Stderr, "Could not write to bash '%s': %s\n", w.Recover, err)
}
err = cmd.Start()
if nil != err {
fmt.Fprintf(os.Stderr, "Could not start '%s': %s\n", w.Recover, err)
}
err = pipe.Close()
if nil != err {
fmt.Fprintf(os.Stderr, "Could not close '%s': %s\n", w.Recover, err)
}
err = cmd.Wait()
cancel()
if nil != err {
fmt.Fprintf(os.Stderr, "'%s' failed: %s\n", w.Recover, err)
}
}
func (w *Dog) Watch() {
w.watch()
for {
// TODO set cancellable callback ?
time.Sleep(5 * time.Minute)
w.watch()
}
}
func (w *Dog) watch() {
fmt.Println("Running a check")
err := w.check()
if nil == err {
return
}
failure := false
t := 10
for {
w.execRecover()
time.Sleep(time.Duration(t) * time.Second)
// backoff
t *= 2
err := w.check()
if nil != err {
failure = true
}
// We should notify if
// * We've had success since the last notification
// * It's been at least 5 minutes since the last notification
if w.lastPassed.After(w.lastNotified) && w.lastNotified.Before(time.Now().Add(-5*time.Minute)) {
err := w.notify(failure)
if nil != err {
fmt.Println("Notify:", err)
}
}
if w.failures >= 5 {
// go back to the main 5-minute loop
break
}
}
}
func NewHTTPClient() *http.Client {
transport := &http.Transport{
Dial: (&net.Dialer{
Timeout: 10 * time.Second,
}).Dial,
TLSHandshakeTimeout: 5 * time.Second,
}
client := &http.Client{
Timeout: time.Second * 5,
Transport: transport,
}
return client
}
func (w *Dog) check() error {
var err error
defer func() {
if nil != err {
w.failures += 1
w.lastFailed = time.Now()
} else {
w.lastPassed = time.Now()
w.passes += 1
}
}()
client := NewHTTPClient()
response, err := client.Get(w.CheckURL)
if nil != err {
w.error = fmt.Errorf("Connection Failure: " + err.Error())
return err
}
b, err := ioutil.ReadAll(response.Body)
if nil != err {
w.error = fmt.Errorf("Network Failure: " + err.Error())
return err
}
if !bytes.Contains(b, []byte(w.Keywords)) {
err = fmt.Errorf("Keywords Not Found: " + w.Keywords)
fmt.Println(err)
w.error = err
return err
} else {
fmt.Println("Happy day!")
}
return nil
}
func (w *Dog) notify(hardFail bool) error {
w.lastNotified = time.Now()
fmt.Println("Notifying the authorities of a failure")
return nil
/*
urlStr := "https://api.twilio.com/2010-04-01/Accounts/" + accountSid + "/Messages.json"
msgData := url.Values{}
msgData.Set("To", "+1 555 555 5555")
msgData.Set("From", "+1 555 555 1234")
if hardFail {
msgData.Set("Body", fmt.Sprintf("[%s] The system is down. The system is down.", w.Name))
} else {
msgData.Set("Body", fmt.Sprintf("[%s] had a hiccup.", w.Name))
}
msgDataReader := *strings.NewReader(msgData.Encode())
client := NewHTTPClient()
req, err := http.NewRequest("POST", urlStr, &msgDataReader)
if nil != err {
fmt.Fprintf(os.Stderr, "Failed to text: %s\n", err)
}
req.SetBasicAuth(accountSid, authToken)
req.Header.Add("Accept", "application/json")
req.Header.Add("Content-Type", "application/x-www-form-urlencoded")
resp, _ := client.Do(req)
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
var data map[string]interface{}
decoder := json.NewDecoder(resp.Body)
err := decoder.Decode(&data)
if err == nil {
fmt.Println(data["sid"])
}
} else {
fmt.Println("Error:", resp.Status)
}
return nil
*/
}
type Config struct {
Watches []ConfigWatch `json:"watches"`
}
type ConfigWatch struct {
Name string `json:"name"`
URL string `json:"url"`
Keywords string `json:"keywords"`
Webhook string `json:"webhook"`
RecoverScript string `json:"recover_script"`
}
/*
request :+ http.NewRequest("POST"
curl -s --user 'api:YOUR_API_KEY' \
https://api.mailgun.net/v3/YOUR_DOMAIN_NAME/messages \
-F from='Excited User <mailgun@YOUR_DOMAIN_NAME>' \
-F to=YOU@YOUR_DOMAIN_NAME \
-F to=bar@example.com \
-F subject='Hello' \
-F text='Testing some Mailgun awesomeness!'
*/