feat: support split single_ips/networks files; ipcohort.LoadFiles variadic

This commit is contained in:
AJ ONeal 2026-04-19 23:01:51 -06:00
parent a9adc3dc18
commit ff224c5bb1
No known key found for this signature in database
3 changed files with 100 additions and 25 deletions

View File

@ -13,28 +13,45 @@ import (
"github.com/therootcompany/golib/net/ipcohort"
)
// HTTPSource pairs a remote URL with a local cache path.
type HTTPSource struct {
URL string
Path string
}
type Blacklist struct {
atomic.Pointer[ipcohort.Cohort]
path string
paths []string
git *gitshallow.Repo
http *httpcache.Cacher
http []*httpcache.Cacher
}
func NewBlacklist(path string) *Blacklist {
return &Blacklist{path: path}
// NewBlacklist loads from one or more local files.
func NewBlacklist(paths ...string) *Blacklist {
return &Blacklist{paths: paths}
}
func NewGitBlacklist(gitURL, path string) *Blacklist {
repo := gitshallow.New(gitURL, filepath.Dir(path), 1, "")
b := &Blacklist{path: path, git: repo}
// NewGitBlacklist clones/pulls gitURL into repoDir and loads relPaths on each update.
func NewGitBlacklist(gitURL, repoDir string, relPaths ...string) *Blacklist {
repo := gitshallow.New(gitURL, repoDir, 1, "")
paths := make([]string, len(relPaths))
for i, p := range relPaths {
paths[i] = filepath.Join(repoDir, p)
}
b := &Blacklist{paths: paths, git: repo}
repo.Register(b.reload)
return b
}
func NewHTTPBlacklist(url, path string) *Blacklist {
cacher := httpcache.New(url, path)
b := &Blacklist{path: path, http: cacher}
cacher.Register(b.reload)
// NewHTTPBlacklist fetches each source URL to its local path, reloading on any change.
func NewHTTPBlacklist(sources ...HTTPSource) *Blacklist {
b := &Blacklist{}
for _, src := range sources {
b.paths = append(b.paths, src.Path)
c := httpcache.New(src.URL, src.Path)
c.Register(b.reload)
b.http = append(b.http, c)
}
return b
}
@ -42,8 +59,13 @@ func (b *Blacklist) Init(lightGC bool) error {
switch {
case b.git != nil:
return b.git.Init(lightGC)
case b.http != nil:
return b.http.Init()
case len(b.http) > 0:
for _, c := range b.http {
if err := c.Init(); err != nil {
return err
}
}
return nil
default:
return b.reload()
}
@ -72,8 +94,16 @@ func (b *Blacklist) sync(lightGC bool) (bool, error) {
switch {
case b.git != nil:
return b.git.Sync(lightGC)
case b.http != nil:
return b.http.Sync()
case len(b.http) > 0:
var anyUpdated bool
for _, c := range b.http {
updated, err := c.Sync()
if err != nil {
return anyUpdated, err
}
anyUpdated = anyUpdated || updated
}
return anyUpdated, nil
default:
return false, nil
}
@ -88,7 +118,7 @@ func (b *Blacklist) Size() int {
}
func (b *Blacklist) reload() error {
c, err := ipcohort.LoadFile(b.path)
c, err := ipcohort.LoadFiles(b.paths...)
if err != nil {
return err
}

View File

@ -6,27 +6,43 @@ import (
"strings"
)
// inbound blocklist - pre-separated by type for independent ETag caching
const (
inboundSingleURL = "https://github.com/bitwire-it/ipblocklist/raw/refs/heads/main/tables/inbound/single_ips.txt"
inboundNetworkURL = "https://github.com/bitwire-it/ipblocklist/raw/refs/heads/main/tables/inbound/networks.txt"
)
func main() {
if len(os.Args) < 3 {
fmt.Fprintf(os.Stderr, "Usage: %s <blacklist.csv> <ip-address> [git-url|http-url]\n", os.Args[0])
fmt.Fprintf(os.Stderr, "Usage: %s <cache-dir|blacklist.csv> <ip-address> [git-url]\n", os.Args[0])
fmt.Fprintf(os.Stderr, " No remote: load from <blacklist.csv>\n")
fmt.Fprintf(os.Stderr, " git URL: clone/pull into <cache-dir>\n")
fmt.Fprintf(os.Stderr, " (default): fetch via HTTP into <cache-dir>\n")
os.Exit(1)
}
dataPath := os.Args[1]
ipStr := os.Args[2]
remoteURL := ""
gitURL := ""
if len(os.Args) >= 4 {
remoteURL = os.Args[3]
gitURL = os.Args[3]
}
var bl *Blacklist
switch {
case strings.HasPrefix(remoteURL, "http://") || strings.HasPrefix(remoteURL, "https://"):
bl = NewHTTPBlacklist(remoteURL, dataPath)
case remoteURL != "":
bl = NewGitBlacklist(remoteURL, dataPath)
default:
case gitURL != "":
bl = NewGitBlacklist(gitURL, dataPath,
"tables/inbound/single_ips.txt",
"tables/inbound/networks.txt",
)
case strings.HasSuffix(dataPath, ".txt") || strings.HasSuffix(dataPath, ".csv"):
bl = NewBlacklist(dataPath)
default:
// dataPath is a cache directory; fetch the pre-split files via HTTP
bl = NewHTTPBlacklist(
HTTPSource{inboundSingleURL, dataPath + "/single_ips.txt"},
HTTPSource{inboundNetworkURL, dataPath + "/networks.txt"},
)
}
if err := bl.Init(false); err != nil {

View File

@ -140,6 +140,35 @@ func LoadFile(path string) (*Cohort, error) {
return ParseCSV(f)
}
// LoadFiles loads and merges multiple files into one Cohort.
// Useful when hosts and networks are stored in separate files.
func LoadFiles(paths ...string) (*Cohort, error) {
var hosts []uint32
var nets []IPv4Net
for _, path := range paths {
c, err := LoadFile(path)
if err != nil {
return nil, err
}
hosts = append(hosts, c.hosts...)
nets = append(nets, c.nets...)
}
slices.Sort(hosts)
slices.SortFunc(nets, func(a, b IPv4Net) int {
if a.networkBE < b.networkBE {
return -1
}
if a.networkBE > b.networkBE {
return 1
}
return 0
})
return &Cohort{hosts: hosts, nets: nets}, nil
}
func ParseCSV(f io.Reader) (*Cohort, error) {
r := csv.NewReader(f)
r.FieldsPerRecord = -1