fix: remove double-fetch, add httpcache.NopSyncer, drop Sources.Init

Sources.Init() was redundant: gitshallow.Repo.Fetch() already clones
if missing via syncGit()->clone(). Removing it means blGroup.Init()
is the single entry point, no duplicate network calls.

httpcache.NopSyncer{} replaces the private nopSyncer in the cmd —
exported so any caller can build a file-only Dataset without a syncer.
This commit is contained in:
AJ ONeal 2026-04-20 09:31:58 -06:00
parent 673d084bd2
commit aeb94fc26b
No known key found for this signature in database
3 changed files with 11 additions and 28 deletions

View File

@ -17,6 +17,12 @@ type Syncer interface {
Fetch() (updated bool, err error) Fetch() (updated bool, err error)
} }
// NopSyncer is a Syncer that always reports no update and no error.
// Use for datasets backed by local files managed externally (no download).
type NopSyncer struct{}
func (NopSyncer) Fetch() (bool, error) { return false, nil }
// Cacher fetches a URL to a local file, using ETag/Last-Modified to skip // Cacher fetches a URL to a local file, using ETag/Last-Modified to skip
// unchanged responses. // unchanged responses.
// //

View File

@ -22,8 +22,7 @@ type Sources struct {
inboundPaths []string inboundPaths []string
outboundPaths []string outboundPaths []string
gitRepo *gitshallow.Repo // non-nil for git source; used by Init for clone-if-missing syncs []httpcache.Syncer // all syncable sources
syncs []httpcache.Syncer // all syncable sources
} }
func newFileSources(whitelist, inbound, outbound []string) *Sources { func newFileSources(whitelist, inbound, outbound []string) *Sources {
@ -47,7 +46,6 @@ func newGitSources(gitURL, repoDir string, whitelist, inboundRel, outboundRel []
whitelistPaths: whitelist, whitelistPaths: whitelist,
inboundPaths: abs(inboundRel), inboundPaths: abs(inboundRel),
outboundPaths: abs(outboundRel), outboundPaths: abs(outboundRel),
gitRepo: repo,
syncs: []httpcache.Syncer{repo}, syncs: []httpcache.Syncer{repo},
} }
} }
@ -79,20 +77,6 @@ func (s *Sources) Fetch() (bool, error) {
return anyUpdated, nil return anyUpdated, nil
} }
// Init ensures remotes are ready: clones git if missing, or fetches HTTP files.
func (s *Sources) Init() error {
if s.gitRepo != nil {
_, err := s.gitRepo.Init()
return err
}
for _, syn := range s.syncs {
if _, err := syn.Fetch(); err != nil {
return err
}
}
return nil
}
// Datasets builds a dataset.Group backed by this Sources and returns typed // Datasets builds a dataset.Group backed by this Sources and returns typed
// datasets for whitelist, inbound, and outbound cohorts. Either whitelist or // datasets for whitelist, inbound, and outbound cohorts. Either whitelist or
// outbound may be nil if no paths were configured. // outbound may be nil if no paths were configured.

View File

@ -13,6 +13,7 @@ import (
"github.com/oschwald/geoip2-golang" "github.com/oschwald/geoip2-golang"
"github.com/therootcompany/golib/net/dataset" "github.com/therootcompany/golib/net/dataset"
"github.com/therootcompany/golib/net/geoip" "github.com/therootcompany/golib/net/geoip"
"github.com/therootcompany/golib/net/httpcache"
"github.com/therootcompany/golib/net/ipcohort" "github.com/therootcompany/golib/net/ipcohort"
) )
@ -75,10 +76,7 @@ func main() {
} }
// Build typed datasets from the source. // Build typed datasets from the source.
if err := src.Init(); err != nil { // blGroup.Init() calls src.Fetch() which handles initial git clone and HTTP download.
fmt.Fprintf(os.Stderr, "error: %v\n", err)
os.Exit(1)
}
blGroup, whitelistDS, inboundDS, outboundDS := src.Datasets() blGroup, whitelistDS, inboundDS, outboundDS := src.Datasets()
if err := blGroup.Init(); err != nil { if err := blGroup.Init(); err != nil {
fmt.Fprintf(os.Stderr, "error: %v\n", err) fmt.Fprintf(os.Stderr, "error: %v\n", err)
@ -172,11 +170,11 @@ func main() {
// newGeoIPDataset creates a Dataset[geoip2.Reader]. If d is nil, only // newGeoIPDataset creates a Dataset[geoip2.Reader]. If d is nil, only
// opens the existing file (no download). Close is wired to Reader.Close. // opens the existing file (no download). Close is wired to Reader.Close.
func newGeoIPDataset(d *geoip.Downloader, edition, path string) *dataset.Dataset[geoip2.Reader] { func newGeoIPDataset(d *geoip.Downloader, edition, path string) *dataset.Dataset[geoip2.Reader] {
var syncer interface{ Fetch() (bool, error) } var syncer httpcache.Syncer
if d != nil { if d != nil {
syncer = d.NewCacher(edition, path) syncer = d.NewCacher(edition, path)
} else { } else {
syncer = &nopSyncer{} syncer = httpcache.NopSyncer{}
} }
ds := dataset.New(syncer, func() (*geoip2.Reader, error) { ds := dataset.New(syncer, func() (*geoip2.Reader, error) {
return geoip2.Open(path) return geoip2.Open(path)
@ -186,11 +184,6 @@ func newGeoIPDataset(d *geoip.Downloader, edition, path string) *dataset.Dataset
return ds return ds
} }
// nopSyncer satisfies httpcache.Syncer for file-only datasets (no download).
type nopSyncer struct{}
func (n *nopSyncer) Fetch() (bool, error) { return false, nil }
func containsInbound(ip string, func containsInbound(ip string,
whitelist, inbound *dataset.Dataset[ipcohort.Cohort], whitelist, inbound *dataset.Dataset[ipcohort.Cohort],
) bool { ) bool {