package watchdog import ( "bytes" "context" "encoding/json" "fmt" "io/ioutil" "net" "net/http" "net/url" "os/exec" "strings" "time" ) type Dog struct { Name string CheckURL string Keywords string Recover string Webhooks []string AllWebhooks map[string]Webhook Logger chan string error error failures int passes int lastFailed time.Time lastPassed time.Time lastNotified time.Time } func New(d *Dog) *Dog { d.lastPassed = time.Now().Add(-5 * time.Minute) return d } func (d *Dog) Watch() { d.watch() for { // TODO set cancellable callback ? time.Sleep(5 * time.Minute) d.watch() } } func (d *Dog) watch() { d.Logger <- fmt.Sprintf("Check: '%s'", d.Name) err := d.check() if nil == err { return } time.Sleep(time.Duration(2) * time.Second) err2 := d.check() if nil != err2 { d.Logger <- fmt.Sprintf("Down: '%s': %s", d.Name, err2) } else { d.Logger <- fmt.Sprintf("Hiccup: '%s': %s", d.Name, err) return } failure := false t := 10 for { d.recover() time.Sleep(time.Duration(t) * time.Second) // backoff t *= 2 err := d.check() if nil != err { d.Logger <- fmt.Sprintf("Unrecoverable: '%s': %s", d.Name, err) failure = true } else { failure = false } // We should notify if // * We've had success since the last notification // * It's been at least 5 minutes since the last notification fiveMinutesAgo := time.Now().Add(-5 * time.Minute) if d.lastPassed.After(d.lastNotified) && d.lastNotified.Before(fiveMinutesAgo) { d.notify(failure) } if !failure || d.failures >= 5 { // go back to the main 5-minute loop break } } } func (d *Dog) check() error { var err error defer func() { if nil != err { d.failures += 1 d.lastFailed = time.Now() } else { d.lastPassed = time.Now() d.passes += 1 } }() client := NewHTTPClient() response, err := client.Get(d.CheckURL) if nil != err { d.error = fmt.Errorf("Connection Failure: " + err.Error()) return err } b, err := ioutil.ReadAll(response.Body) if nil != err { d.error = fmt.Errorf("Network Failure: " + err.Error()) return err } if !bytes.Contains(b, []byte(d.Keywords)) { err = fmt.Errorf("Down: '%s' Not Found for '%s'", d.Keywords, d.Name) d.Logger <- fmt.Sprintf("%s", err) d.error = err return err } else { d.Logger <- fmt.Sprintf("Up: '%s'", d.Name) } return nil } func (d *Dog) recover() { if "" == d.Recover { return } ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) cmd := exec.CommandContext(ctx, "bash") pipe, err := cmd.StdinPipe() pipe.Write([]byte(d.Recover)) if nil != err { d.Logger <- fmt.Sprintf("[Recover] Could not write to bash '%s': %s", d.Recover, err) } err = cmd.Start() if nil != err { d.Logger <- fmt.Sprintf("[Recover] Could not start '%s': %s", d.Recover, err) } err = pipe.Close() if nil != err { d.Logger <- fmt.Sprintf("[Recover] Could not close '%s': %s", d.Recover, err) } err = cmd.Wait() cancel() if nil != err { d.Logger <- fmt.Sprintf("[Recover] '%s' failed for '%s': %s", d.Recover, d.Name, err) } } func (d *Dog) notify(hardFail bool) { d.Logger <- fmt.Sprintf("Notifying the authorities of %s's failure", d.Name) d.lastNotified = time.Now() for i := range d.Webhooks { name := d.Webhooks[i] if "" == name { continue } h, ok := d.AllWebhooks[name] if !ok { // TODO check in main when config is read d.Webhooks[i] = "" d.Logger <- fmt.Sprintf("[Warning] Could not find webhook '%s' for '%s'", name, h.Name) continue } d.notifyOne(h, hardFail) } } func (d *Dog) notifyOne(h Webhook, hardFail bool) { // TODO do this in main on config init if "" == h.Method { h.Method = "POST" } var body *strings.Reader var err error // TODO real templates if 0 != len(h.Form) { form := url.Values{} for k := range h.Form { v := h.Form[k] // because `{{` gets urlencoded //k = strings.Replace(k, "{{ .Name }}", d.Name, -1) v = strings.Replace(v, "{{ .Name }}", d.Name, -1) d.Logger <- fmt.Sprintf("[HEADER] %s: %s", k, v) form.Set(k, v) } body = strings.NewReader(form.Encode()) } else if 0 != len(h.JSON) { bodyBuf, err := json.Marshal(h.JSON) if nil != err { d.Logger <- fmt.Sprintf("[Notify] JSON Marshal Error for '%s': %s", h.Name, err) return } // `{{` should be left alone body = strings.NewReader(strings.Replace(string(bodyBuf), "{{ .Name }}", d.Name, -1)) } client := NewHTTPClient() req, err := http.NewRequest(h.Method, h.URL, body) if nil != err { d.Logger <- fmt.Sprintf("[Notify] HTTP Client Network Error for '%s': %s", h.Name, err) return } if 0 != len(h.Form) { req.Header.Set("Content-Type", "application/x-www-form-urlencoded") } else if 0 != len(h.JSON) { req.Header.Set("Content-Type", "application/json") } if 0 != len(h.Auth) { user := h.Auth["user"] if "" == user { user = h.Auth["username"] } pass := h.Auth["pass"] if "" == user { pass = h.Auth["password"] } req.SetBasicAuth(user, pass) } req.Header.Set("User-Agent", "Watchdog/1.0") for k := range h.Headers { req.Header.Set(k, h.Headers[k]) } resp, err := client.Do(req) if nil != err { d.Logger <- fmt.Sprintf("[Notify] HTTP Client Error for '%s': %s", h.Name, err) return } if !(resp.StatusCode >= 200 && resp.StatusCode < 300) { d.Logger <- fmt.Sprintf("[Notify] Response Error for '%s': %s", h.Name, resp.Status) return } // TODO json vs xml vs txt var data map[string]interface{} req.Header.Add("Accept", "application/json") decoder := json.NewDecoder(resp.Body) err = decoder.Decode(&data) if err != nil { d.Logger <- fmt.Sprintf("[Notify] Response Body Error for '%s': %s", h.Name, resp.Status) return } // TODO some sort of way to determine if data is successful (keywords) d.Logger <- fmt.Sprintf("[Notify] Success? %#v", data) } type Config struct { Watches []ConfigWatch `json:"watches"` Webhooks []Webhook `json:"webhooks"` } type ConfigWatch struct { Name string `json:"name"` URL string `json:"url"` Keywords string `json:"keywords"` Webhooks []string `json:"webhooks"` RecoverScript string `json:"recover_script"` } type Webhook struct { Name string `json:"name"` Method string `json:"method"` URL string `json:"url"` Auth map[string]string `json:"auth"` Headers map[string]string `json:"headers"` Form map[string]string `json:"form"` JSON map[string]string `json:"json"` Config map[string]string `json:"config"` Configs []map[string]string `json:"configs"` } // The default http client uses unsafe defaults func NewHTTPClient() *http.Client { transport := &http.Transport{ Dial: (&net.Dialer{ Timeout: 10 * time.Second, }).Dial, TLSHandshakeTimeout: 5 * time.Second, } client := &http.Client{ Timeout: time.Second * 5, Transport: transport, } return client }