fix: remove double-fetch, add httpcache.NopSyncer, drop Sources.Init

Sources.Init() was redundant: gitshallow.Repo.Fetch() already clones
if missing via syncGit()->clone(). Removing it means blGroup.Init()
is the single entry point, no duplicate network calls.

httpcache.NopSyncer{} replaces the private nopSyncer in the cmd —
exported so any caller can build a file-only Dataset without a syncer.
This commit is contained in:
AJ ONeal 2026-04-20 09:31:58 -06:00
parent 673d084bd2
commit aeb94fc26b
No known key found for this signature in database
3 changed files with 11 additions and 28 deletions

View File

@ -17,6 +17,12 @@ type Syncer interface {
Fetch() (updated bool, err error)
}
// NopSyncer is a Syncer that always reports no update and no error.
// Use for datasets backed by local files managed externally (no download).
type NopSyncer struct{}
func (NopSyncer) Fetch() (bool, error) { return false, nil }
// Cacher fetches a URL to a local file, using ETag/Last-Modified to skip
// unchanged responses.
//

View File

@ -22,8 +22,7 @@ type Sources struct {
inboundPaths []string
outboundPaths []string
gitRepo *gitshallow.Repo // non-nil for git source; used by Init for clone-if-missing
syncs []httpcache.Syncer // all syncable sources
syncs []httpcache.Syncer // all syncable sources
}
func newFileSources(whitelist, inbound, outbound []string) *Sources {
@ -47,7 +46,6 @@ func newGitSources(gitURL, repoDir string, whitelist, inboundRel, outboundRel []
whitelistPaths: whitelist,
inboundPaths: abs(inboundRel),
outboundPaths: abs(outboundRel),
gitRepo: repo,
syncs: []httpcache.Syncer{repo},
}
}
@ -79,20 +77,6 @@ func (s *Sources) Fetch() (bool, error) {
return anyUpdated, nil
}
// Init ensures remotes are ready: clones git if missing, or fetches HTTP files.
func (s *Sources) Init() error {
if s.gitRepo != nil {
_, err := s.gitRepo.Init()
return err
}
for _, syn := range s.syncs {
if _, err := syn.Fetch(); err != nil {
return err
}
}
return nil
}
// Datasets builds a dataset.Group backed by this Sources and returns typed
// datasets for whitelist, inbound, and outbound cohorts. Either whitelist or
// outbound may be nil if no paths were configured.

View File

@ -13,6 +13,7 @@ import (
"github.com/oschwald/geoip2-golang"
"github.com/therootcompany/golib/net/dataset"
"github.com/therootcompany/golib/net/geoip"
"github.com/therootcompany/golib/net/httpcache"
"github.com/therootcompany/golib/net/ipcohort"
)
@ -75,10 +76,7 @@ func main() {
}
// Build typed datasets from the source.
if err := src.Init(); err != nil {
fmt.Fprintf(os.Stderr, "error: %v\n", err)
os.Exit(1)
}
// blGroup.Init() calls src.Fetch() which handles initial git clone and HTTP download.
blGroup, whitelistDS, inboundDS, outboundDS := src.Datasets()
if err := blGroup.Init(); err != nil {
fmt.Fprintf(os.Stderr, "error: %v\n", err)
@ -172,11 +170,11 @@ func main() {
// newGeoIPDataset creates a Dataset[geoip2.Reader]. If d is nil, only
// opens the existing file (no download). Close is wired to Reader.Close.
func newGeoIPDataset(d *geoip.Downloader, edition, path string) *dataset.Dataset[geoip2.Reader] {
var syncer interface{ Fetch() (bool, error) }
var syncer httpcache.Syncer
if d != nil {
syncer = d.NewCacher(edition, path)
} else {
syncer = &nopSyncer{}
syncer = httpcache.NopSyncer{}
}
ds := dataset.New(syncer, func() (*geoip2.Reader, error) {
return geoip2.Open(path)
@ -186,11 +184,6 @@ func newGeoIPDataset(d *geoip.Downloader, edition, path string) *dataset.Dataset
return ds
}
// nopSyncer satisfies httpcache.Syncer for file-only datasets (no download).
type nopSyncer struct{}
func (n *nopSyncer) Fetch() (bool, error) { return false, nil }
func containsInbound(ip string,
whitelist, inbound *dataset.Dataset[ipcohort.Cohort],
) bool {