From 297fba10f55b1efcee6a2fc42e0786faa289d895 Mon Sep 17 00:00:00 2001 From: AJ ONeal Date: Mon, 20 Apr 2026 10:04:56 -0600 Subject: [PATCH] feat: persist ETag/Last-Modified to sidecar file; add integration tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit httpcache: write .meta JSON sidecar after each successful download; load it on first Fetch so conditional GETs work after process restarts. Tests verify: download, sidecar written, same-cacher 304, fresh-cacher 304 (the last being the key case — no in-memory state, sidecar drives ETag). MaxMind integration test reads GeoIP.conf, downloads City+ASN, verifies fresh-cacher conditional GET skips re-download via sidecar ETag. --- net/geoip/geoip_integration_test.go | 111 ++++++++++++++++++++ net/httpcache/httpcache.go | 58 +++++++++- net/httpcache/httpcache_integration_test.go | 61 +++++++++-- 3 files changed, 220 insertions(+), 10 deletions(-) create mode 100644 net/geoip/geoip_integration_test.go diff --git a/net/geoip/geoip_integration_test.go b/net/geoip/geoip_integration_test.go new file mode 100644 index 0000000..6e486ab --- /dev/null +++ b/net/geoip/geoip_integration_test.go @@ -0,0 +1,111 @@ +//go:build integration + +package geoip_test + +import ( + "os" + "path/filepath" + "testing" + + "github.com/therootcompany/golib/net/geoip" +) + +func testdataDir(t *testing.T) string { + t.Helper() + dir, _ := filepath.Abs(".") + for { + if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil { + return filepath.Join(dir, "testdata") + } + parent := filepath.Dir(dir) + if parent == dir { + t.Fatal("could not find module root (go.mod)") + } + dir = parent + } +} + +func geoipConf(t *testing.T) *geoip.Conf { + t.Helper() + // Look for GeoIP.conf relative to the module root. + dir, _ := filepath.Abs(".") + for { + p := filepath.Join(dir, "GeoIP.conf") + if _, err := os.Stat(p); err == nil { + cfg, err := geoip.ParseConf(p) + if err != nil { + t.Fatalf("GeoIP.conf: %v", err) + } + return cfg + } + parent := filepath.Dir(dir) + if parent == dir { + break + } + dir = parent + } + t.Skip("GeoIP.conf not found; skipping MaxMind integration test") + return nil +} + +func TestDownloader_CityAndASN(t *testing.T) { + cfg := geoipConf(t) + td := testdataDir(t) + + d := geoip.New(cfg.AccountID, cfg.LicenseKey) + + for _, edition := range []string{geoip.CityEdition, geoip.ASNEdition} { + path := filepath.Join(td, edition+".mmdb") + os.Remove(path) + os.Remove(path + ".meta") + + cacher := d.NewCacher(edition, path) + updated, err := cacher.Fetch() + if err != nil { + t.Fatalf("%s Fetch: %v", edition, err) + } + if !updated { + t.Errorf("%s: expected updated=true on first fetch", edition) + } + + info, err := os.Stat(path) + if err != nil { + t.Fatalf("%s: file not created: %v", edition, err) + } + if info.Size() == 0 { + t.Errorf("%s: downloaded file is empty", edition) + } + t.Logf("%s: %d bytes", edition, info.Size()) + + if _, err := os.Stat(path + ".meta"); err != nil { + t.Errorf("%s: sidecar not written: %v", edition, err) + } + } +} + +func TestDownloader_ConditionalGet_FreshCacher(t *testing.T) { + cfg := geoipConf(t) + td := testdataDir(t) + + d := geoip.New(cfg.AccountID, cfg.LicenseKey) + + for _, edition := range []string{geoip.CityEdition, geoip.ASNEdition} { + path := filepath.Join(td, edition+".mmdb") + + // Ensure downloaded. + if _, err := d.NewCacher(edition, path).Fetch(); err != nil { + t.Fatalf("%s initial Fetch: %v", edition, err) + } + + // Fresh cacher — no in-memory ETag, must use sidecar. + fresh := d.NewCacher(edition, path) + updated, err := fresh.Fetch() + if err != nil { + t.Fatalf("%s fresh Fetch: %v", edition, err) + } + if updated { + t.Errorf("%s: fresh cacher expected updated=false (sidecar ETag should have been used)", edition) + } + t.Logf("%s: fresh-cacher conditional GET correctly skipped re-download", edition) + } +} diff --git a/net/httpcache/httpcache.go b/net/httpcache/httpcache.go index 498c9ea..436cd9f 100644 --- a/net/httpcache/httpcache.go +++ b/net/httpcache/httpcache.go @@ -1,6 +1,7 @@ package httpcache import ( + "encoding/json" "fmt" "io" "net" @@ -11,8 +12,8 @@ import ( ) const ( - defaultConnTimeout = 5 * time.Second // TCP connect + TLS handshake - defaultTimeout = 5 * time.Minute // overall including body read + defaultConnTimeout = 5 * time.Second // TCP connect + TLS handshake + defaultTimeout = 5 * time.Minute // overall including body read ) // Syncer is implemented by any value that can fetch a remote resource and @@ -37,8 +38,12 @@ func (NopSyncer) Fetch() (bool, error) { return false, nil } // - MinInterval: skips if Fetch was called within this duration (in-memory). // Guards against tight poll loops hammering a rate-limited API. // -// Auth — AuthHeader/AuthValue set a request header on every attempt, including -// redirects. Use any scheme: "Authorization"/"Bearer token", +// Caching — ETag and Last-Modified values are persisted to a .meta +// sidecar file so conditional GETs survive process restarts. +// +// Auth — AuthHeader/AuthValue set a request header on every attempt. Auth is +// stripped before following redirects so presigned targets (e.g. S3/R2 URLs) +// never receive credentials. Use any scheme: "Authorization"/"Bearer token", // "X-API-Key"/"secret", "Authorization"/"Basic base64(user:pass)", etc. // // Transform — if set, called with the response body instead of the default @@ -59,6 +64,44 @@ type Cacher struct { etag string lastMod string lastChecked time.Time + metaLoaded bool +} + +// cacheMeta is the sidecar format persisted alongside the downloaded file. +type cacheMeta struct { + ETag string `json:"etag,omitempty"` + LastMod string `json:"last_modified,omitempty"` +} + +func (c *Cacher) metaPath() string { return c.Path + ".meta" } + +// loadMeta reads etag/lastMod from the sidecar file. Errors are silently +// ignored — a missing or corrupt sidecar just means a full download next time. +func (c *Cacher) loadMeta() { + data, err := os.ReadFile(c.metaPath()) + if err != nil { + return + } + var m cacheMeta + if err := json.Unmarshal(data, &m); err != nil { + return + } + c.etag = m.ETag + c.lastMod = m.LastMod +} + +// saveMeta writes etag/lastMod to the sidecar file atomically. +func (c *Cacher) saveMeta() { + m := cacheMeta{ETag: c.etag, LastMod: c.lastMod} + data, err := json.Marshal(m) + if err != nil { + return + } + tmp := c.metaPath() + ".tmp" + if err := os.WriteFile(tmp, data, 0o644); err != nil { + return + } + os.Rename(tmp, c.metaPath()) } // New creates a Cacher that fetches URL and writes it to path. @@ -83,6 +126,12 @@ func (c *Cacher) Fetch() (updated bool, err error) { c.mu.Lock() defer c.mu.Unlock() + // Load sidecar once so conditional GETs work after a process restart. + if !c.metaLoaded { + c.loadMeta() + c.metaLoaded = true + } + // MinInterval: in-memory last-checked gate. if c.MinInterval > 0 && !c.lastChecked.IsZero() { if time.Since(c.lastChecked) < c.MinInterval { @@ -176,6 +225,7 @@ func (c *Cacher) Fetch() (updated bool, err error) { if lm := resp.Header.Get("Last-Modified"); lm != "" { c.lastMod = lm } + c.saveMeta() return true, nil } diff --git a/net/httpcache/httpcache_integration_test.go b/net/httpcache/httpcache_integration_test.go index e3598e4..6362943 100644 --- a/net/httpcache/httpcache_integration_test.go +++ b/net/httpcache/httpcache_integration_test.go @@ -3,6 +3,7 @@ package httpcache_test import ( + "encoding/json" "os" "path/filepath" "testing" @@ -32,7 +33,8 @@ func testdataDir(t *testing.T) string { func TestCacher_Download(t *testing.T) { path := filepath.Join(testdataDir(t), testFile) - os.Remove(path) // start fresh + os.Remove(path) + os.Remove(path + ".meta") c := httpcache.New(testURL, path) @@ -54,22 +56,69 @@ func TestCacher_Download(t *testing.T) { t.Logf("downloaded %d bytes to %s", info.Size(), path) } -func TestCacher_ConditionalGet(t *testing.T) { +func TestCacher_SidecarWritten(t *testing.T) { + path := filepath.Join(testdataDir(t), testFile) + os.Remove(path) + os.Remove(path + ".meta") + + c := httpcache.New(testURL, path) + if _, err := c.Fetch(); err != nil { + t.Fatalf("Fetch: %v", err) + } + + data, err := os.ReadFile(path + ".meta") + if err != nil { + t.Fatalf("sidecar not written: %v", err) + } + var meta map[string]string + if err := json.Unmarshal(data, &meta); err != nil { + t.Fatalf("sidecar not valid JSON: %v", err) + } + if meta["etag"] == "" && meta["last_modified"] == "" { + t.Error("sidecar has neither etag nor last_modified") + } + t.Logf("sidecar: %s", data) +} + +func TestCacher_ConditionalGet_SameCacher(t *testing.T) { path := filepath.Join(testdataDir(t), testFile) - // Ensure file exists from a prior download (or download it now). c := httpcache.New(testURL, path) if _, err := c.Fetch(); err != nil { t.Fatalf("initial Fetch: %v", err) } - // Second fetch on the same Cacher should use ETag/Last-Modified. + // Second call on the same instance — ETag already in memory. updated, err := c.Fetch() if err != nil { t.Fatalf("second Fetch: %v", err) } if updated { - t.Error("second Fetch: expected updated=false (content unchanged)") + t.Error("same-cacher second Fetch: expected updated=false") } - t.Log("conditional GET correctly returned 304 / not-modified") + t.Log("same-cacher conditional GET correctly skipped re-download") +} + +func TestCacher_ConditionalGet_FreshCacher(t *testing.T) { + path := filepath.Join(testdataDir(t), testFile) + + // Ensure file + sidecar exist. + first := httpcache.New(testURL, path) + if _, err := first.Fetch(); err != nil { + t.Fatalf("initial Fetch: %v", err) + } + if _, err := os.Stat(path + ".meta"); err != nil { + t.Fatalf("sidecar missing after first fetch: %v", err) + } + + // New Cacher with no in-memory state — must read sidecar and send conditional GET. + fresh := httpcache.New(testURL, path) + updated, err := fresh.Fetch() + if err != nil { + t.Fatalf("fresh-cacher Fetch: %v", err) + } + if updated { + t.Error("fresh-cacher Fetch: expected updated=false (sidecar should have provided ETag)") + } + t.Log("fresh-cacher conditional GET correctly used sidecar ETag") }