golib/net/ipcohort/cmd/check-ip/blacklist.go
AJ ONeal aeb94fc26b
fix: remove double-fetch, add httpcache.NopSyncer, drop Sources.Init
Sources.Init() was redundant: gitshallow.Repo.Fetch() already clones
if missing via syncGit()->clone(). Removing it means blGroup.Init()
is the single entry point, no duplicate network calls.

httpcache.NopSyncer{} replaces the private nopSyncer in the cmd —
exported so any caller can build a file-only Dataset without a syncer.
2026-04-20 09:31:58 -06:00

110 lines
3.0 KiB
Go

package main
import (
"path/filepath"
"github.com/therootcompany/golib/net/dataset"
"github.com/therootcompany/golib/net/gitshallow"
"github.com/therootcompany/golib/net/httpcache"
"github.com/therootcompany/golib/net/ipcohort"
)
// HTTPSource pairs a remote URL with a local cache path.
type HTTPSource struct {
URL string
Path string
}
// Sources holds fetch configuration for the three blocklist cohorts.
// It knows how to pull data from git or HTTP, but owns no atomic state.
type Sources struct {
whitelistPaths []string
inboundPaths []string
outboundPaths []string
syncs []httpcache.Syncer // all syncable sources
}
func newFileSources(whitelist, inbound, outbound []string) *Sources {
return &Sources{
whitelistPaths: whitelist,
inboundPaths: inbound,
outboundPaths: outbound,
}
}
func newGitSources(gitURL, repoDir string, whitelist, inboundRel, outboundRel []string) *Sources {
abs := func(rel []string) []string {
out := make([]string, len(rel))
for i, p := range rel {
out[i] = filepath.Join(repoDir, p)
}
return out
}
repo := gitshallow.New(gitURL, repoDir, 1, "")
return &Sources{
whitelistPaths: whitelist,
inboundPaths: abs(inboundRel),
outboundPaths: abs(outboundRel),
syncs: []httpcache.Syncer{repo},
}
}
func newHTTPSources(whitelist []string, inbound, outbound []HTTPSource) *Sources {
s := &Sources{whitelistPaths: whitelist}
for _, src := range inbound {
s.inboundPaths = append(s.inboundPaths, src.Path)
s.syncs = append(s.syncs, httpcache.New(src.URL, src.Path))
}
for _, src := range outbound {
s.outboundPaths = append(s.outboundPaths, src.Path)
s.syncs = append(s.syncs, httpcache.New(src.URL, src.Path))
}
return s
}
// Fetch pulls updates from all sources. Returns whether any new data arrived.
// Satisfies httpcache.Syncer.
func (s *Sources) Fetch() (bool, error) {
var anyUpdated bool
for _, syn := range s.syncs {
updated, err := syn.Fetch()
if err != nil {
return anyUpdated, err
}
anyUpdated = anyUpdated || updated
}
return anyUpdated, nil
}
// Datasets builds a dataset.Group backed by this Sources and returns typed
// datasets for whitelist, inbound, and outbound cohorts. Either whitelist or
// outbound may be nil if no paths were configured.
func (s *Sources) Datasets() (
g *dataset.Group,
whitelist *dataset.Dataset[ipcohort.Cohort],
inbound *dataset.Dataset[ipcohort.Cohort],
outbound *dataset.Dataset[ipcohort.Cohort],
) {
g = dataset.NewGroup(s)
if len(s.whitelistPaths) > 0 {
paths := s.whitelistPaths
whitelist = dataset.Add(g, func() (*ipcohort.Cohort, error) {
return ipcohort.LoadFiles(paths...)
})
}
if len(s.inboundPaths) > 0 {
paths := s.inboundPaths
inbound = dataset.Add(g, func() (*ipcohort.Cohort, error) {
return ipcohort.LoadFiles(paths...)
})
}
if len(s.outboundPaths) > 0 {
paths := s.outboundPaths
outbound = dataset.Add(g, func() (*ipcohort.Cohort, error) {
return ipcohort.LoadFiles(paths...)
})
}
return g, whitelist, inbound, outbound
}