From ff224c5bb150222bc59d212dc6deb007bdb2627d Mon Sep 17 00:00:00 2001 From: AJ ONeal Date: Sun, 19 Apr 2026 23:01:51 -0600 Subject: [PATCH] feat: support split single_ips/networks files; ipcohort.LoadFiles variadic --- .../cmd/check-ip-blacklist/blacklist.go | 64 ++++++++++++++----- net/ipcohort/cmd/check-ip-blacklist/main.go | 32 +++++++--- net/ipcohort/ipcohort.go | 29 +++++++++ 3 files changed, 100 insertions(+), 25 deletions(-) diff --git a/net/ipcohort/cmd/check-ip-blacklist/blacklist.go b/net/ipcohort/cmd/check-ip-blacklist/blacklist.go index 4d937c7..b743474 100644 --- a/net/ipcohort/cmd/check-ip-blacklist/blacklist.go +++ b/net/ipcohort/cmd/check-ip-blacklist/blacklist.go @@ -13,28 +13,45 @@ import ( "github.com/therootcompany/golib/net/ipcohort" ) +// HTTPSource pairs a remote URL with a local cache path. +type HTTPSource struct { + URL string + Path string +} + type Blacklist struct { atomic.Pointer[ipcohort.Cohort] - path string - git *gitshallow.Repo - http *httpcache.Cacher + paths []string + git *gitshallow.Repo + http []*httpcache.Cacher } -func NewBlacklist(path string) *Blacklist { - return &Blacklist{path: path} +// NewBlacklist loads from one or more local files. +func NewBlacklist(paths ...string) *Blacklist { + return &Blacklist{paths: paths} } -func NewGitBlacklist(gitURL, path string) *Blacklist { - repo := gitshallow.New(gitURL, filepath.Dir(path), 1, "") - b := &Blacklist{path: path, git: repo} +// NewGitBlacklist clones/pulls gitURL into repoDir and loads relPaths on each update. +func NewGitBlacklist(gitURL, repoDir string, relPaths ...string) *Blacklist { + repo := gitshallow.New(gitURL, repoDir, 1, "") + paths := make([]string, len(relPaths)) + for i, p := range relPaths { + paths[i] = filepath.Join(repoDir, p) + } + b := &Blacklist{paths: paths, git: repo} repo.Register(b.reload) return b } -func NewHTTPBlacklist(url, path string) *Blacklist { - cacher := httpcache.New(url, path) - b := &Blacklist{path: path, http: cacher} - cacher.Register(b.reload) +// NewHTTPBlacklist fetches each source URL to its local path, reloading on any change. +func NewHTTPBlacklist(sources ...HTTPSource) *Blacklist { + b := &Blacklist{} + for _, src := range sources { + b.paths = append(b.paths, src.Path) + c := httpcache.New(src.URL, src.Path) + c.Register(b.reload) + b.http = append(b.http, c) + } return b } @@ -42,8 +59,13 @@ func (b *Blacklist) Init(lightGC bool) error { switch { case b.git != nil: return b.git.Init(lightGC) - case b.http != nil: - return b.http.Init() + case len(b.http) > 0: + for _, c := range b.http { + if err := c.Init(); err != nil { + return err + } + } + return nil default: return b.reload() } @@ -72,8 +94,16 @@ func (b *Blacklist) sync(lightGC bool) (bool, error) { switch { case b.git != nil: return b.git.Sync(lightGC) - case b.http != nil: - return b.http.Sync() + case len(b.http) > 0: + var anyUpdated bool + for _, c := range b.http { + updated, err := c.Sync() + if err != nil { + return anyUpdated, err + } + anyUpdated = anyUpdated || updated + } + return anyUpdated, nil default: return false, nil } @@ -88,7 +118,7 @@ func (b *Blacklist) Size() int { } func (b *Blacklist) reload() error { - c, err := ipcohort.LoadFile(b.path) + c, err := ipcohort.LoadFiles(b.paths...) if err != nil { return err } diff --git a/net/ipcohort/cmd/check-ip-blacklist/main.go b/net/ipcohort/cmd/check-ip-blacklist/main.go index b086afc..54cb3fb 100644 --- a/net/ipcohort/cmd/check-ip-blacklist/main.go +++ b/net/ipcohort/cmd/check-ip-blacklist/main.go @@ -6,27 +6,43 @@ import ( "strings" ) +// inbound blocklist - pre-separated by type for independent ETag caching +const ( + inboundSingleURL = "https://github.com/bitwire-it/ipblocklist/raw/refs/heads/main/tables/inbound/single_ips.txt" + inboundNetworkURL = "https://github.com/bitwire-it/ipblocklist/raw/refs/heads/main/tables/inbound/networks.txt" +) + func main() { if len(os.Args) < 3 { - fmt.Fprintf(os.Stderr, "Usage: %s [git-url|http-url]\n", os.Args[0]) + fmt.Fprintf(os.Stderr, "Usage: %s [git-url]\n", os.Args[0]) + fmt.Fprintf(os.Stderr, " No remote: load from \n") + fmt.Fprintf(os.Stderr, " git URL: clone/pull into \n") + fmt.Fprintf(os.Stderr, " (default): fetch via HTTP into \n") os.Exit(1) } dataPath := os.Args[1] ipStr := os.Args[2] - remoteURL := "" + gitURL := "" if len(os.Args) >= 4 { - remoteURL = os.Args[3] + gitURL = os.Args[3] } var bl *Blacklist switch { - case strings.HasPrefix(remoteURL, "http://") || strings.HasPrefix(remoteURL, "https://"): - bl = NewHTTPBlacklist(remoteURL, dataPath) - case remoteURL != "": - bl = NewGitBlacklist(remoteURL, dataPath) - default: + case gitURL != "": + bl = NewGitBlacklist(gitURL, dataPath, + "tables/inbound/single_ips.txt", + "tables/inbound/networks.txt", + ) + case strings.HasSuffix(dataPath, ".txt") || strings.HasSuffix(dataPath, ".csv"): bl = NewBlacklist(dataPath) + default: + // dataPath is a cache directory; fetch the pre-split files via HTTP + bl = NewHTTPBlacklist( + HTTPSource{inboundSingleURL, dataPath + "/single_ips.txt"}, + HTTPSource{inboundNetworkURL, dataPath + "/networks.txt"}, + ) } if err := bl.Init(false); err != nil { diff --git a/net/ipcohort/ipcohort.go b/net/ipcohort/ipcohort.go index 61ff414..8887466 100644 --- a/net/ipcohort/ipcohort.go +++ b/net/ipcohort/ipcohort.go @@ -140,6 +140,35 @@ func LoadFile(path string) (*Cohort, error) { return ParseCSV(f) } +// LoadFiles loads and merges multiple files into one Cohort. +// Useful when hosts and networks are stored in separate files. +func LoadFiles(paths ...string) (*Cohort, error) { + var hosts []uint32 + var nets []IPv4Net + + for _, path := range paths { + c, err := LoadFile(path) + if err != nil { + return nil, err + } + hosts = append(hosts, c.hosts...) + nets = append(nets, c.nets...) + } + + slices.Sort(hosts) + slices.SortFunc(nets, func(a, b IPv4Net) int { + if a.networkBE < b.networkBE { + return -1 + } + if a.networkBE > b.networkBE { + return 1 + } + return 0 + }) + + return &Cohort{hosts: hosts, nets: nets}, nil +} + func ParseCSV(f io.Reader) (*Cohort, error) { r := csv.NewReader(f) r.FieldsPerRecord = -1