package watchdog import ( "bytes" "context" "encoding/json" "fmt" "io/ioutil" "net" "net/http" "net/url" "os/exec" "strings" "time" ) type Status int const ( StatusDown Status = iota StatusUp ) func (s Status) String() string { // ... just wishing Go had enums like Rust... switch s { case StatusUp: return "up" case StatusDown: return "down" default: return "[[internal error]]" } } type Dog struct { Watchdog string Name string CheckURL string Keywords string Badwords string Localizations map[string]string Recover string Webhooks []string AllWebhooks map[string]Webhook Logger chan string status Status changed bool error error failures int passes int lastFailed time.Time lastPassed time.Time lastNotified time.Time } func New(d *Dog) *Dog { d.lastPassed = time.Now().Add(-5 * time.Minute) d.status = StatusUp d.changed = false return d } func (d *Dog) Watch() { d.watch() for { // TODO set cancellable callback ? time.Sleep(5 * time.Minute) d.watch() } } func (d *Dog) watch() { d.Logger <- fmt.Sprintf("Check: '%s'", d.Name) err := d.check() if nil == err { if d.changed { d.notify("came back up") } return } time.Sleep(time.Duration(5) * time.Second) err2 := d.check() if nil != err2 { d.Logger <- fmt.Sprintf("Down: '%s': %s", d.Name, err2) } else { d.Logger <- fmt.Sprintf("Hiccup: '%s': %s", d.Name, err) return } t := 10 for { d.recover() time.Sleep(time.Duration(t) * time.Second) // backoff t *= 2 err := d.check() if nil != err { d.Logger <- fmt.Sprintf("Unrecoverable: '%s': %s", d.Name, err) } // We should notify if // * The status has changed // // TODO what if the server is flip-flopping rapidly? // how to rate limit? // "{{ .Server }} is on cooldown for 30 minutes" if d.changed { d.notify("went down") if StatusUp == d.status { break } // * We've had success since the last notification // * It's been at least 5 minutes since the last notification //fiveMinutesAgo := time.Now().Add(-5 * time.Minute) //if d.lastPassed.After(d.lastNotified) && d.lastNotified.Before(fiveMinutesAgo) { //} //if !failure || d.failures >= 5 { // go back to the main 5-minute loop // break //} } } } func (d *Dog) check() error { previousStatus := d.status var err error defer func() { // Are we up, or down? if nil != err { d.status = StatusDown d.failures += 1 d.lastFailed = time.Now() } else { d.status = StatusUp d.lastPassed = time.Now() d.passes += 1 d.Logger <- fmt.Sprintf("Up: '%s'", d.Name) } // Has that changed? if previousStatus != d.status { d.changed = true } else { d.changed = false } }() client := NewHTTPClient() response, err := client.Get(d.CheckURL) if nil != err { d.error = fmt.Errorf("Connection Failure: " + err.Error()) return err } b, err := ioutil.ReadAll(response.Body) if nil != err { d.error = fmt.Errorf("Network Failure: " + err.Error()) return err } // Note: empty matches empty as true, so this works for checking redirects if !bytes.Contains(b, []byte(d.Keywords)) { err = fmt.Errorf("Down: '%s' Not Found for '%s'", d.Keywords, d.Name) d.Logger <- fmt.Sprintf("%s", err) d.error = err return err } if "" != d.Badwords { if !bytes.Contains(b, []byte(d.Badwords)) { err = fmt.Errorf("Down: '%s' Found for '%s'", d.Badwords, d.Name) d.Logger <- fmt.Sprintf("%s", err) d.error = err return err } } return nil } func (d *Dog) recover() { if "" == d.Recover { return } ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) cmd := exec.CommandContext(ctx, "bash") pipe, err := cmd.StdinPipe() pipe.Write([]byte(d.Recover)) if nil != err { d.Logger <- fmt.Sprintf("[Recover] Could not write to bash '%s': %s", d.Recover, err) } err = cmd.Start() if nil != err { d.Logger <- fmt.Sprintf("[Recover] Could not start '%s': %s", d.Recover, err) } err = pipe.Close() if nil != err { d.Logger <- fmt.Sprintf("[Recover] Could not close '%s': %s", d.Recover, err) } err = cmd.Wait() cancel() if nil != err { d.Logger <- fmt.Sprintf("[Recover] '%s' failed for '%s': %s", d.Recover, d.Name, err) } } func (d *Dog) notify(msg string) { d.Logger <- fmt.Sprintf("Notifying the authorities of %s's status change", d.Name) d.lastNotified = time.Now() for i := range d.Webhooks { name := d.Webhooks[i] if "" == name { continue } h, ok := d.AllWebhooks[name] if !ok { // TODO check in main when config is read d.Webhooks[i] = "" d.Logger <- fmt.Sprintf("[Warning] Could not find webhook '%s' for '%s'", name, h.Name) continue } d.notifyOne(h, msg) } } func (d *Dog) notifyOne(h Webhook, msg string) { // TODO do this in main on config init if "" == h.Method { h.Method = "POST" } var body *strings.Reader var err error // TODO real templates if 0 != len(h.Form) { form := url.Values{} for k := range h.Form { v := h.Form[k] // because `{{` gets urlencoded //k = strings.Replace(k, "{{ .Name }}", d.Name, -1) v = strings.Replace(v, "{{ .Watchdog }}", d.Watchdog, -1) v = strings.Replace(v, "{{ .Name }}", d.Name, -1) v = strings.Replace(v, "{{ .Status }}", d.localize(d.status.String()), -1) v = strings.Replace(v, "{{ .Message }}", d.localize(msg), -1) d.Logger <- fmt.Sprintf("[HEADER] %s: %s", k, v) form.Set(k, v) } body = strings.NewReader(form.Encode()) } else if 0 != len(h.JSON) { bodyBuf, err := json.Marshal(h.JSON) if nil != err { d.Logger <- fmt.Sprintf("[Notify] JSON Marshal Error for '%s': %s", h.Name, err) return } // `{{` should be left alone v := string(bodyBuf) v = strings.Replace(v, "{{ .Watchdog }}", d.Watchdog, -1) v = strings.Replace(v, "{{ .Name }}", d.Name, -1) v = strings.Replace(v, "{{ .Status }}", d.localize(d.status.String()), -1) v = strings.Replace(v, "{{ .Message }}", d.localize(msg), -1) body = strings.NewReader(v) } client := NewHTTPClient() req, err := http.NewRequest(h.Method, h.URL, body) if nil != err { d.Logger <- fmt.Sprintf("[Notify] HTTP Client Network Error for '%s': %s", h.Name, err) return } if 0 != len(h.Form) { req.Header.Set("Content-Type", "application/x-www-form-urlencoded") } else if 0 != len(h.JSON) { req.Header.Set("Content-Type", "application/json") } if 0 != len(h.Auth) { user := h.Auth["user"] if "" == user { user = h.Auth["username"] } pass := h.Auth["pass"] if "" == user { pass = h.Auth["password"] } req.SetBasicAuth(user, pass) } req.Header.Set("User-Agent", "Watchdog/1.0") for k := range h.Headers { req.Header.Set(k, h.Headers[k]) } resp, err := client.Do(req) if nil != err { d.Logger <- fmt.Sprintf("[Notify] HTTP Client Error for '%s': %s", h.Name, err) return } if !(resp.StatusCode >= 200 && resp.StatusCode < 300) { d.Logger <- fmt.Sprintf("[Notify] Response Error for '%s': %s", h.Name, resp.Status) return } // TODO json vs xml vs txt var data map[string]interface{} req.Header.Add("Accept", "application/json") decoder := json.NewDecoder(resp.Body) err = decoder.Decode(&data) if err != nil { d.Logger <- fmt.Sprintf("[Notify] Response Body Error for '%s': %s", h.Name, resp.Status) return } // TODO some sort of way to determine if data is successful (keywords) d.Logger <- fmt.Sprintf("[Notify] Success? %#v", data) } func (d *Dog) localize(msg string) string { for k := range d.Localizations { if k == msg { return d.Localizations[k] } } return msg } type Config struct { Watchdog string `json:"watchdog"` Watches []ConfigWatch `json:"watches"` Webhooks []Webhook `json:"webhooks"` Localizations map[string]string `json:"localizations"` } type ConfigWatch struct { Name string `json:"name"` URL string `json:"url"` Keywords string `json:"keywords"` Badwords string `json:"badwords"` Webhooks []string `json:"webhooks"` RecoverScript string `json:"recover_script"` } type Webhook struct { Name string `json:"name"` Method string `json:"method"` URL string `json:"url"` Auth map[string]string `json:"auth"` Headers map[string]string `json:"headers"` Form map[string]string `json:"form"` JSON map[string]string `json:"json"` Config map[string]string `json:"config"` Configs []map[string]string `json:"configs"` } // The default http client uses unsafe defaults func NewHTTPClient() *http.Client { transport := &http.Transport{ Dial: (&net.Dialer{ Timeout: 10 * time.Second, }).Dial, TLSHandshakeTimeout: 5 * time.Second, } client := &http.Client{ Timeout: time.Second * 5, Transport: transport, } return client }