From 6b420badbc24cc987532a402489d86f62d29854a Mon Sep 17 00:00:00 2001 From: AJ ONeal Date: Mon, 20 Apr 2026 12:23:13 -0600 Subject: [PATCH] refactor: merge blacklist.go into main.go via dataset.MultiSyncer --- cmd/check-ip/blacklist.go | 140 -------------------------------------- cmd/check-ip/main.go | 105 ++++++++++++++++++++++------ net/dataset/dataset.go | 16 +++++ 3 files changed, 99 insertions(+), 162 deletions(-) delete mode 100644 cmd/check-ip/blacklist.go diff --git a/cmd/check-ip/blacklist.go b/cmd/check-ip/blacklist.go deleted file mode 100644 index bc46b3d..0000000 --- a/cmd/check-ip/blacklist.go +++ /dev/null @@ -1,140 +0,0 @@ -package main - -import ( - "path/filepath" - "strings" - - "github.com/therootcompany/golib/net/dataset" - "github.com/therootcompany/golib/net/gitshallow" - "github.com/therootcompany/golib/net/httpcache" - "github.com/therootcompany/golib/net/ipcohort" -) - -// Sources holds fetch configuration for the blacklist cohorts. -type Sources struct { - whitelistPaths []string - inboundPaths []string - outboundPaths []string - syncs []dataset.Syncer -} - -// buildSources constructs the right Sources from CLI flags. -// -// - gitURL set → clone/pull the bitwire-it repo; inbound/outbound from known relative paths -// - inbound/outbound set → use those explicit file paths, no network sync -// - neither set → HTTP-fetch the bitwire-it files into dataDir (or default cache dir) -func buildSources(gitURL, dataDir, whitelistFlag, inboundFlag, outboundFlag string) *Sources { - // Explicit file paths always win. - if inboundFlag != "" || outboundFlag != "" { - return &Sources{ - whitelistPaths: splitPaths(whitelistFlag), - inboundPaths: splitPaths(inboundFlag), - outboundPaths: splitPaths(outboundFlag), - } - } - - cacheDir := dataDir - if cacheDir == "" { - cacheDir = defaultCacheDir("bitwire-it") - } - - if gitURL != "" { - repo := gitshallow.New(gitURL, cacheDir, 1, "") - return &Sources{ - whitelistPaths: splitPaths(whitelistFlag), - inboundPaths: []string{ - filepath.Join(cacheDir, "tables/inbound/single_ips.txt"), - filepath.Join(cacheDir, "tables/inbound/networks.txt"), - }, - outboundPaths: []string{ - filepath.Join(cacheDir, "tables/outbound/single_ips.txt"), - filepath.Join(cacheDir, "tables/outbound/networks.txt"), - }, - syncs: []dataset.Syncer{repo}, - } - } - - // Default: HTTP fetch from bitwire-it into cacheDir. - inboundSingle := filepath.Join(cacheDir, "inbound_single_ips.txt") - inboundNetwork := filepath.Join(cacheDir, "inbound_networks.txt") - outboundSingle := filepath.Join(cacheDir, "outbound_single_ips.txt") - outboundNetwork := filepath.Join(cacheDir, "outbound_networks.txt") - return &Sources{ - whitelistPaths: splitPaths(whitelistFlag), - inboundPaths: []string{inboundSingle, inboundNetwork}, - outboundPaths: []string{outboundSingle, outboundNetwork}, - syncs: []dataset.Syncer{ - httpcache.New(inboundSingleURL, inboundSingle), - httpcache.New(inboundNetworkURL, inboundNetwork), - httpcache.New(outboundSingleURL, outboundSingle), - httpcache.New(outboundNetworkURL, outboundNetwork), - }, - } -} - -func splitPaths(s string) []string { - if s == "" { - return nil - } - return strings.Split(s, ",") -} - -// Fetch pulls updates from all sources. Satisfies dataset.Syncer. -func (s *Sources) Fetch() (bool, error) { - var anyUpdated bool - for _, syn := range s.syncs { - updated, err := syn.Fetch() - if err != nil { - return anyUpdated, err - } - anyUpdated = anyUpdated || updated - } - return anyUpdated, nil -} - -// Datasets builds a dataset.Group and returns typed views for each cohort. -func (s *Sources) Datasets() ( - g *dataset.Group, - whitelist *dataset.View[ipcohort.Cohort], - inbound *dataset.View[ipcohort.Cohort], - outbound *dataset.View[ipcohort.Cohort], -) { - g = dataset.NewGroup(s) - if len(s.whitelistPaths) > 0 { - paths := s.whitelistPaths - whitelist = dataset.Add(g, func() (*ipcohort.Cohort, error) { - return ipcohort.LoadFiles(paths...) - }) - } - if len(s.inboundPaths) > 0 { - paths := s.inboundPaths - inbound = dataset.Add(g, func() (*ipcohort.Cohort, error) { - return ipcohort.LoadFiles(paths...) - }) - } - if len(s.outboundPaths) > 0 { - paths := s.outboundPaths - outbound = dataset.Add(g, func() (*ipcohort.Cohort, error) { - return ipcohort.LoadFiles(paths...) - }) - } - return g, whitelist, inbound, outbound -} - -// isBlocked returns true if ip is in cohort and not in whitelist. -func isBlocked(ip string, whitelist, cohort *dataset.View[ipcohort.Cohort]) bool { - if cohort == nil { - return false - } - if whitelist != nil && whitelist.Load().Contains(ip) { - return false - } - return cohort.Load().Contains(ip) -} - -func cohortSize(ds *dataset.View[ipcohort.Cohort]) int { - if ds == nil { - return 0 - } - return ds.Load().Size() -} diff --git a/cmd/check-ip/main.go b/cmd/check-ip/main.go index 41f8fa2..d8d73a0 100644 --- a/cmd/check-ip/main.go +++ b/cmd/check-ip/main.go @@ -6,38 +6,31 @@ import ( "fmt" "os" "path/filepath" + "strings" "time" + + "github.com/therootcompany/golib/net/dataset" + "github.com/therootcompany/golib/net/gitshallow" + "github.com/therootcompany/golib/net/httpcache" + "github.com/therootcompany/golib/net/ipcohort" ) -// Default HTTP sources for the bitwire-it blacklist. const ( - inboundSingleURL = "https://github.com/bitwire-it/ipblocklist/raw/refs/heads/main/tables/inbound/single_ips.txt" - inboundNetworkURL = "https://github.com/bitwire-it/ipblocklist/raw/refs/heads/main/tables/inbound/networks.txt" + inboundSingleURL = "https://github.com/bitwire-it/ipblocklist/raw/refs/heads/main/tables/inbound/single_ips.txt" + inboundNetworkURL = "https://github.com/bitwire-it/ipblocklist/raw/refs/heads/main/tables/inbound/networks.txt" outboundSingleURL = "https://github.com/bitwire-it/ipblocklist/raw/refs/heads/main/tables/outbound/single_ips.txt" outboundNetworkURL = "https://github.com/bitwire-it/ipblocklist/raw/refs/heads/main/tables/outbound/networks.txt" ) -func defaultCacheDir(sub string) string { - base, err := os.UserCacheDir() - if err != nil { - base = filepath.Join(os.Getenv("HOME"), ".cache") - } - return filepath.Join(base, sub) -} - func main() { - // Blacklist source flags — all optional; defaults pull from bitwire-it via HTTP. dataDir := flag.String("data-dir", "", "blacklist cache dir (default ~/.cache/bitwire-it)") - gitURL := flag.String("git", "", "git URL to clone/pull blacklist from (alternative to HTTP)") - whitelist := flag.String("whitelist", "", "path to whitelist file (overrides block)") + gitURL := flag.String("git", "", "git URL to clone/pull blacklist from") + whitelist := flag.String("whitelist", "", "path to whitelist file") inbound := flag.String("inbound", "", "comma-separated paths to inbound blacklist files") outbound := flag.String("outbound", "", "comma-separated paths to outbound blacklist files") - - // GeoIP flags — auto-discovered from ./GeoIP.conf or ~/.config/maxmind/GeoIP.conf. geoipConf := flag.String("geoip-conf", "", "path to GeoIP.conf (auto-discovered if absent)") cityDB := flag.String("city-db", "", "path to GeoLite2-City.mmdb (skips auto-download)") asnDB := flag.String("asn-db", "", "path to GeoLite2-ASN.mmdb (skips auto-download)") - flag.Usage = func() { fmt.Fprintf(os.Stderr, "Usage: %s [flags] \n", os.Args[0]) flag.PrintDefaults() @@ -52,14 +45,57 @@ func main() { // -- Blacklist ---------------------------------------------------------- - src := buildSources(*gitURL, *dataDir, *whitelist, *inbound, *outbound) - blGroup, whitelistDS, inboundDS, outboundDS := src.Datasets() - if err := blGroup.Init(); err != nil { + var syncs dataset.MultiSyncer + var inboundPaths, outboundPaths []string + + switch { + case *inbound != "" || *outbound != "": + inboundPaths = splitPaths(*inbound) + outboundPaths = splitPaths(*outbound) + + case *gitURL != "": + cacheDir := cacheDir(*dataDir, "bitwire-it") + syncs = dataset.MultiSyncer{gitshallow.New(*gitURL, cacheDir, 1, "")} + inboundPaths = []string{ + filepath.Join(cacheDir, "tables/inbound/single_ips.txt"), + filepath.Join(cacheDir, "tables/inbound/networks.txt"), + } + outboundPaths = []string{ + filepath.Join(cacheDir, "tables/outbound/single_ips.txt"), + filepath.Join(cacheDir, "tables/outbound/networks.txt"), + } + + default: + dir := cacheDir(*dataDir, "bitwire-it") + inboundSingle := filepath.Join(dir, "inbound_single_ips.txt") + inboundNetwork := filepath.Join(dir, "inbound_networks.txt") + outboundSingle := filepath.Join(dir, "outbound_single_ips.txt") + outboundNetwork := filepath.Join(dir, "outbound_networks.txt") + syncs = dataset.MultiSyncer{ + httpcache.New(inboundSingleURL, inboundSingle), + httpcache.New(inboundNetworkURL, inboundNetwork), + httpcache.New(outboundSingleURL, outboundSingle), + httpcache.New(outboundNetworkURL, outboundNetwork), + } + inboundPaths = []string{inboundSingle, inboundNetwork} + outboundPaths = []string{outboundSingle, outboundNetwork} + } + + g := dataset.NewGroup(syncs) + var whitelistDS *dataset.View[ipcohort.Cohort] + if *whitelist != "" { + paths := splitPaths(*whitelist) + whitelistDS = dataset.Add(g, func() (*ipcohort.Cohort, error) { return ipcohort.LoadFiles(paths...) }) + } + inboundDS := dataset.Add(g, func() (*ipcohort.Cohort, error) { return ipcohort.LoadFiles(inboundPaths...) }) + outboundDS := dataset.Add(g, func() (*ipcohort.Cohort, error) { return ipcohort.LoadFiles(outboundPaths...) }) + + if err := g.Init(); err != nil { fmt.Fprintf(os.Stderr, "error: blacklist: %v\n", err) os.Exit(1) } fmt.Fprintf(os.Stderr, "Loaded inbound=%d outbound=%d\n", - cohortSize(inboundDS), cohortSize(outboundDS)) + inboundDS.Load().Size(), outboundDS.Load().Size()) // -- GeoIP (optional) -------------------------------------------------- @@ -77,7 +113,7 @@ func main() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - go blGroup.Run(ctx, 47*time.Minute) + go g.Run(ctx, 47*time.Minute) geo.Run(ctx, 47*time.Minute) // -- Check and report -------------------------------------------------- @@ -101,3 +137,28 @@ func main() { os.Exit(1) } } + +func isBlocked(ip string, whitelist, cohort *dataset.View[ipcohort.Cohort]) bool { + if cohort == nil { + return false + } + if whitelist != nil && whitelist.Load().Contains(ip) { + return false + } + return cohort.Load().Contains(ip) +} + +func cacheDir(override, sub string) string { + if override != "" { + return override + } + base, err := os.UserCacheDir() + if err != nil { + base = filepath.Join(os.Getenv("HOME"), ".cache") + } + return filepath.Join(base, sub) +} + +func splitPaths(s string) []string { + return strings.Split(s, ",") +} diff --git a/net/dataset/dataset.go b/net/dataset/dataset.go index b84a272..f9a88e7 100644 --- a/net/dataset/dataset.go +++ b/net/dataset/dataset.go @@ -34,6 +34,22 @@ type Syncer interface { Fetch() (updated bool, err error) } +// MultiSyncer fans out Fetch to multiple Syncers, returning updated=true if +// any reports a change. Stops and returns the first error. +type MultiSyncer []Syncer + +func (ms MultiSyncer) Fetch() (bool, error) { + var anyUpdated bool + for _, s := range ms { + updated, err := s.Fetch() + if err != nil { + return anyUpdated, err + } + anyUpdated = anyUpdated || updated + } + return anyUpdated, nil +} + // NopSyncer is a Syncer that always reports no update and no error. // Use for datasets backed by local files with no remote source. type NopSyncer struct{}