feat: support split single_ips/networks files; ipcohort.LoadFiles variadic

This commit is contained in:
AJ ONeal 2026-04-19 23:01:51 -06:00
parent a9adc3dc18
commit ff224c5bb1
No known key found for this signature in database
3 changed files with 100 additions and 25 deletions

View File

@ -13,28 +13,45 @@ import (
"github.com/therootcompany/golib/net/ipcohort" "github.com/therootcompany/golib/net/ipcohort"
) )
// HTTPSource pairs a remote URL with a local cache path.
type HTTPSource struct {
URL string
Path string
}
type Blacklist struct { type Blacklist struct {
atomic.Pointer[ipcohort.Cohort] atomic.Pointer[ipcohort.Cohort]
path string paths []string
git *gitshallow.Repo git *gitshallow.Repo
http *httpcache.Cacher http []*httpcache.Cacher
} }
func NewBlacklist(path string) *Blacklist { // NewBlacklist loads from one or more local files.
return &Blacklist{path: path} func NewBlacklist(paths ...string) *Blacklist {
return &Blacklist{paths: paths}
} }
func NewGitBlacklist(gitURL, path string) *Blacklist { // NewGitBlacklist clones/pulls gitURL into repoDir and loads relPaths on each update.
repo := gitshallow.New(gitURL, filepath.Dir(path), 1, "") func NewGitBlacklist(gitURL, repoDir string, relPaths ...string) *Blacklist {
b := &Blacklist{path: path, git: repo} repo := gitshallow.New(gitURL, repoDir, 1, "")
paths := make([]string, len(relPaths))
for i, p := range relPaths {
paths[i] = filepath.Join(repoDir, p)
}
b := &Blacklist{paths: paths, git: repo}
repo.Register(b.reload) repo.Register(b.reload)
return b return b
} }
func NewHTTPBlacklist(url, path string) *Blacklist { // NewHTTPBlacklist fetches each source URL to its local path, reloading on any change.
cacher := httpcache.New(url, path) func NewHTTPBlacklist(sources ...HTTPSource) *Blacklist {
b := &Blacklist{path: path, http: cacher} b := &Blacklist{}
cacher.Register(b.reload) for _, src := range sources {
b.paths = append(b.paths, src.Path)
c := httpcache.New(src.URL, src.Path)
c.Register(b.reload)
b.http = append(b.http, c)
}
return b return b
} }
@ -42,8 +59,13 @@ func (b *Blacklist) Init(lightGC bool) error {
switch { switch {
case b.git != nil: case b.git != nil:
return b.git.Init(lightGC) return b.git.Init(lightGC)
case b.http != nil: case len(b.http) > 0:
return b.http.Init() for _, c := range b.http {
if err := c.Init(); err != nil {
return err
}
}
return nil
default: default:
return b.reload() return b.reload()
} }
@ -72,8 +94,16 @@ func (b *Blacklist) sync(lightGC bool) (bool, error) {
switch { switch {
case b.git != nil: case b.git != nil:
return b.git.Sync(lightGC) return b.git.Sync(lightGC)
case b.http != nil: case len(b.http) > 0:
return b.http.Sync() var anyUpdated bool
for _, c := range b.http {
updated, err := c.Sync()
if err != nil {
return anyUpdated, err
}
anyUpdated = anyUpdated || updated
}
return anyUpdated, nil
default: default:
return false, nil return false, nil
} }
@ -88,7 +118,7 @@ func (b *Blacklist) Size() int {
} }
func (b *Blacklist) reload() error { func (b *Blacklist) reload() error {
c, err := ipcohort.LoadFile(b.path) c, err := ipcohort.LoadFiles(b.paths...)
if err != nil { if err != nil {
return err return err
} }

View File

@ -6,27 +6,43 @@ import (
"strings" "strings"
) )
// inbound blocklist - pre-separated by type for independent ETag caching
const (
inboundSingleURL = "https://github.com/bitwire-it/ipblocklist/raw/refs/heads/main/tables/inbound/single_ips.txt"
inboundNetworkURL = "https://github.com/bitwire-it/ipblocklist/raw/refs/heads/main/tables/inbound/networks.txt"
)
func main() { func main() {
if len(os.Args) < 3 { if len(os.Args) < 3 {
fmt.Fprintf(os.Stderr, "Usage: %s <blacklist.csv> <ip-address> [git-url|http-url]\n", os.Args[0]) fmt.Fprintf(os.Stderr, "Usage: %s <cache-dir|blacklist.csv> <ip-address> [git-url]\n", os.Args[0])
fmt.Fprintf(os.Stderr, " No remote: load from <blacklist.csv>\n")
fmt.Fprintf(os.Stderr, " git URL: clone/pull into <cache-dir>\n")
fmt.Fprintf(os.Stderr, " (default): fetch via HTTP into <cache-dir>\n")
os.Exit(1) os.Exit(1)
} }
dataPath := os.Args[1] dataPath := os.Args[1]
ipStr := os.Args[2] ipStr := os.Args[2]
remoteURL := "" gitURL := ""
if len(os.Args) >= 4 { if len(os.Args) >= 4 {
remoteURL = os.Args[3] gitURL = os.Args[3]
} }
var bl *Blacklist var bl *Blacklist
switch { switch {
case strings.HasPrefix(remoteURL, "http://") || strings.HasPrefix(remoteURL, "https://"): case gitURL != "":
bl = NewHTTPBlacklist(remoteURL, dataPath) bl = NewGitBlacklist(gitURL, dataPath,
case remoteURL != "": "tables/inbound/single_ips.txt",
bl = NewGitBlacklist(remoteURL, dataPath) "tables/inbound/networks.txt",
default: )
case strings.HasSuffix(dataPath, ".txt") || strings.HasSuffix(dataPath, ".csv"):
bl = NewBlacklist(dataPath) bl = NewBlacklist(dataPath)
default:
// dataPath is a cache directory; fetch the pre-split files via HTTP
bl = NewHTTPBlacklist(
HTTPSource{inboundSingleURL, dataPath + "/single_ips.txt"},
HTTPSource{inboundNetworkURL, dataPath + "/networks.txt"},
)
} }
if err := bl.Init(false); err != nil { if err := bl.Init(false); err != nil {

View File

@ -140,6 +140,35 @@ func LoadFile(path string) (*Cohort, error) {
return ParseCSV(f) return ParseCSV(f)
} }
// LoadFiles loads and merges multiple files into one Cohort.
// Useful when hosts and networks are stored in separate files.
func LoadFiles(paths ...string) (*Cohort, error) {
var hosts []uint32
var nets []IPv4Net
for _, path := range paths {
c, err := LoadFile(path)
if err != nil {
return nil, err
}
hosts = append(hosts, c.hosts...)
nets = append(nets, c.nets...)
}
slices.Sort(hosts)
slices.SortFunc(nets, func(a, b IPv4Net) int {
if a.networkBE < b.networkBE {
return -1
}
if a.networkBE > b.networkBE {
return 1
}
return 0
})
return &Cohort{hosts: hosts, nets: nets}, nil
}
func ParseCSV(f io.Reader) (*Cohort, error) { func ParseCSV(f io.Reader) (*Cohort, error) {
r := csv.NewReader(f) r := csv.NewReader(f)
r.FieldsPerRecord = -1 r.FieldsPerRecord = -1