refactor: merge blacklist.go into main.go via dataset.MultiSyncer

This commit is contained in:
AJ ONeal 2026-04-20 12:23:13 -06:00
parent 3ac9683015
commit 6b420badbc
No known key found for this signature in database
3 changed files with 99 additions and 162 deletions

View File

@ -1,140 +0,0 @@
package main
import (
"path/filepath"
"strings"
"github.com/therootcompany/golib/net/dataset"
"github.com/therootcompany/golib/net/gitshallow"
"github.com/therootcompany/golib/net/httpcache"
"github.com/therootcompany/golib/net/ipcohort"
)
// Sources holds fetch configuration for the blacklist cohorts.
type Sources struct {
whitelistPaths []string
inboundPaths []string
outboundPaths []string
syncs []dataset.Syncer
}
// buildSources constructs the right Sources from CLI flags.
//
// - gitURL set → clone/pull the bitwire-it repo; inbound/outbound from known relative paths
// - inbound/outbound set → use those explicit file paths, no network sync
// - neither set → HTTP-fetch the bitwire-it files into dataDir (or default cache dir)
func buildSources(gitURL, dataDir, whitelistFlag, inboundFlag, outboundFlag string) *Sources {
// Explicit file paths always win.
if inboundFlag != "" || outboundFlag != "" {
return &Sources{
whitelistPaths: splitPaths(whitelistFlag),
inboundPaths: splitPaths(inboundFlag),
outboundPaths: splitPaths(outboundFlag),
}
}
cacheDir := dataDir
if cacheDir == "" {
cacheDir = defaultCacheDir("bitwire-it")
}
if gitURL != "" {
repo := gitshallow.New(gitURL, cacheDir, 1, "")
return &Sources{
whitelistPaths: splitPaths(whitelistFlag),
inboundPaths: []string{
filepath.Join(cacheDir, "tables/inbound/single_ips.txt"),
filepath.Join(cacheDir, "tables/inbound/networks.txt"),
},
outboundPaths: []string{
filepath.Join(cacheDir, "tables/outbound/single_ips.txt"),
filepath.Join(cacheDir, "tables/outbound/networks.txt"),
},
syncs: []dataset.Syncer{repo},
}
}
// Default: HTTP fetch from bitwire-it into cacheDir.
inboundSingle := filepath.Join(cacheDir, "inbound_single_ips.txt")
inboundNetwork := filepath.Join(cacheDir, "inbound_networks.txt")
outboundSingle := filepath.Join(cacheDir, "outbound_single_ips.txt")
outboundNetwork := filepath.Join(cacheDir, "outbound_networks.txt")
return &Sources{
whitelistPaths: splitPaths(whitelistFlag),
inboundPaths: []string{inboundSingle, inboundNetwork},
outboundPaths: []string{outboundSingle, outboundNetwork},
syncs: []dataset.Syncer{
httpcache.New(inboundSingleURL, inboundSingle),
httpcache.New(inboundNetworkURL, inboundNetwork),
httpcache.New(outboundSingleURL, outboundSingle),
httpcache.New(outboundNetworkURL, outboundNetwork),
},
}
}
func splitPaths(s string) []string {
if s == "" {
return nil
}
return strings.Split(s, ",")
}
// Fetch pulls updates from all sources. Satisfies dataset.Syncer.
func (s *Sources) Fetch() (bool, error) {
var anyUpdated bool
for _, syn := range s.syncs {
updated, err := syn.Fetch()
if err != nil {
return anyUpdated, err
}
anyUpdated = anyUpdated || updated
}
return anyUpdated, nil
}
// Datasets builds a dataset.Group and returns typed views for each cohort.
func (s *Sources) Datasets() (
g *dataset.Group,
whitelist *dataset.View[ipcohort.Cohort],
inbound *dataset.View[ipcohort.Cohort],
outbound *dataset.View[ipcohort.Cohort],
) {
g = dataset.NewGroup(s)
if len(s.whitelistPaths) > 0 {
paths := s.whitelistPaths
whitelist = dataset.Add(g, func() (*ipcohort.Cohort, error) {
return ipcohort.LoadFiles(paths...)
})
}
if len(s.inboundPaths) > 0 {
paths := s.inboundPaths
inbound = dataset.Add(g, func() (*ipcohort.Cohort, error) {
return ipcohort.LoadFiles(paths...)
})
}
if len(s.outboundPaths) > 0 {
paths := s.outboundPaths
outbound = dataset.Add(g, func() (*ipcohort.Cohort, error) {
return ipcohort.LoadFiles(paths...)
})
}
return g, whitelist, inbound, outbound
}
// isBlocked returns true if ip is in cohort and not in whitelist.
func isBlocked(ip string, whitelist, cohort *dataset.View[ipcohort.Cohort]) bool {
if cohort == nil {
return false
}
if whitelist != nil && whitelist.Load().Contains(ip) {
return false
}
return cohort.Load().Contains(ip)
}
func cohortSize(ds *dataset.View[ipcohort.Cohort]) int {
if ds == nil {
return 0
}
return ds.Load().Size()
}

View File

@ -6,38 +6,31 @@ import (
"fmt"
"os"
"path/filepath"
"strings"
"time"
"github.com/therootcompany/golib/net/dataset"
"github.com/therootcompany/golib/net/gitshallow"
"github.com/therootcompany/golib/net/httpcache"
"github.com/therootcompany/golib/net/ipcohort"
)
// Default HTTP sources for the bitwire-it blacklist.
const (
inboundSingleURL = "https://github.com/bitwire-it/ipblocklist/raw/refs/heads/main/tables/inbound/single_ips.txt"
inboundNetworkURL = "https://github.com/bitwire-it/ipblocklist/raw/refs/heads/main/tables/inbound/networks.txt"
inboundSingleURL = "https://github.com/bitwire-it/ipblocklist/raw/refs/heads/main/tables/inbound/single_ips.txt"
inboundNetworkURL = "https://github.com/bitwire-it/ipblocklist/raw/refs/heads/main/tables/inbound/networks.txt"
outboundSingleURL = "https://github.com/bitwire-it/ipblocklist/raw/refs/heads/main/tables/outbound/single_ips.txt"
outboundNetworkURL = "https://github.com/bitwire-it/ipblocklist/raw/refs/heads/main/tables/outbound/networks.txt"
)
func defaultCacheDir(sub string) string {
base, err := os.UserCacheDir()
if err != nil {
base = filepath.Join(os.Getenv("HOME"), ".cache")
}
return filepath.Join(base, sub)
}
func main() {
// Blacklist source flags — all optional; defaults pull from bitwire-it via HTTP.
dataDir := flag.String("data-dir", "", "blacklist cache dir (default ~/.cache/bitwire-it)")
gitURL := flag.String("git", "", "git URL to clone/pull blacklist from (alternative to HTTP)")
whitelist := flag.String("whitelist", "", "path to whitelist file (overrides block)")
gitURL := flag.String("git", "", "git URL to clone/pull blacklist from")
whitelist := flag.String("whitelist", "", "path to whitelist file")
inbound := flag.String("inbound", "", "comma-separated paths to inbound blacklist files")
outbound := flag.String("outbound", "", "comma-separated paths to outbound blacklist files")
// GeoIP flags — auto-discovered from ./GeoIP.conf or ~/.config/maxmind/GeoIP.conf.
geoipConf := flag.String("geoip-conf", "", "path to GeoIP.conf (auto-discovered if absent)")
cityDB := flag.String("city-db", "", "path to GeoLite2-City.mmdb (skips auto-download)")
asnDB := flag.String("asn-db", "", "path to GeoLite2-ASN.mmdb (skips auto-download)")
flag.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: %s [flags] <ip-address>\n", os.Args[0])
flag.PrintDefaults()
@ -52,14 +45,57 @@ func main() {
// -- Blacklist ----------------------------------------------------------
src := buildSources(*gitURL, *dataDir, *whitelist, *inbound, *outbound)
blGroup, whitelistDS, inboundDS, outboundDS := src.Datasets()
if err := blGroup.Init(); err != nil {
var syncs dataset.MultiSyncer
var inboundPaths, outboundPaths []string
switch {
case *inbound != "" || *outbound != "":
inboundPaths = splitPaths(*inbound)
outboundPaths = splitPaths(*outbound)
case *gitURL != "":
cacheDir := cacheDir(*dataDir, "bitwire-it")
syncs = dataset.MultiSyncer{gitshallow.New(*gitURL, cacheDir, 1, "")}
inboundPaths = []string{
filepath.Join(cacheDir, "tables/inbound/single_ips.txt"),
filepath.Join(cacheDir, "tables/inbound/networks.txt"),
}
outboundPaths = []string{
filepath.Join(cacheDir, "tables/outbound/single_ips.txt"),
filepath.Join(cacheDir, "tables/outbound/networks.txt"),
}
default:
dir := cacheDir(*dataDir, "bitwire-it")
inboundSingle := filepath.Join(dir, "inbound_single_ips.txt")
inboundNetwork := filepath.Join(dir, "inbound_networks.txt")
outboundSingle := filepath.Join(dir, "outbound_single_ips.txt")
outboundNetwork := filepath.Join(dir, "outbound_networks.txt")
syncs = dataset.MultiSyncer{
httpcache.New(inboundSingleURL, inboundSingle),
httpcache.New(inboundNetworkURL, inboundNetwork),
httpcache.New(outboundSingleURL, outboundSingle),
httpcache.New(outboundNetworkURL, outboundNetwork),
}
inboundPaths = []string{inboundSingle, inboundNetwork}
outboundPaths = []string{outboundSingle, outboundNetwork}
}
g := dataset.NewGroup(syncs)
var whitelistDS *dataset.View[ipcohort.Cohort]
if *whitelist != "" {
paths := splitPaths(*whitelist)
whitelistDS = dataset.Add(g, func() (*ipcohort.Cohort, error) { return ipcohort.LoadFiles(paths...) })
}
inboundDS := dataset.Add(g, func() (*ipcohort.Cohort, error) { return ipcohort.LoadFiles(inboundPaths...) })
outboundDS := dataset.Add(g, func() (*ipcohort.Cohort, error) { return ipcohort.LoadFiles(outboundPaths...) })
if err := g.Init(); err != nil {
fmt.Fprintf(os.Stderr, "error: blacklist: %v\n", err)
os.Exit(1)
}
fmt.Fprintf(os.Stderr, "Loaded inbound=%d outbound=%d\n",
cohortSize(inboundDS), cohortSize(outboundDS))
inboundDS.Load().Size(), outboundDS.Load().Size())
// -- GeoIP (optional) --------------------------------------------------
@ -77,7 +113,7 @@ func main() {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go blGroup.Run(ctx, 47*time.Minute)
go g.Run(ctx, 47*time.Minute)
geo.Run(ctx, 47*time.Minute)
// -- Check and report --------------------------------------------------
@ -101,3 +137,28 @@ func main() {
os.Exit(1)
}
}
func isBlocked(ip string, whitelist, cohort *dataset.View[ipcohort.Cohort]) bool {
if cohort == nil {
return false
}
if whitelist != nil && whitelist.Load().Contains(ip) {
return false
}
return cohort.Load().Contains(ip)
}
func cacheDir(override, sub string) string {
if override != "" {
return override
}
base, err := os.UserCacheDir()
if err != nil {
base = filepath.Join(os.Getenv("HOME"), ".cache")
}
return filepath.Join(base, sub)
}
func splitPaths(s string) []string {
return strings.Split(s, ",")
}

View File

@ -34,6 +34,22 @@ type Syncer interface {
Fetch() (updated bool, err error)
}
// MultiSyncer fans out Fetch to multiple Syncers, returning updated=true if
// any reports a change. Stops and returns the first error.
type MultiSyncer []Syncer
func (ms MultiSyncer) Fetch() (bool, error) {
var anyUpdated bool
for _, s := range ms {
updated, err := s.Fetch()
if err != nil {
return anyUpdated, err
}
anyUpdated = anyUpdated || updated
}
return anyUpdated, nil
}
// NopSyncer is a Syncer that always reports no update and no error.
// Use for datasets backed by local files with no remote source.
type NopSyncer struct{}