mirror of
https://github.com/therootcompany/golib.git
synced 2026-04-24 20:58:00 +00:00
refactor(check-ip): drop dataset pkg, inline atomic-swap + ticker
Uses atomic.Pointer[ipcohort.Cohort] directly and builds a per-source refresh closure (files / git / http). One goroutine drives the ticker. Exercises what the dataset pkg was abstracting so we can judge which bits are worth a shared pkg.
This commit is contained in:
parent
9e9bd98540
commit
990b9e430c
@ -3,7 +3,6 @@ module github.com/therootcompany/golib/cmd/check-ip
|
|||||||
go 1.26.0
|
go 1.26.0
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/therootcompany/golib/net/dataset v0.0.0
|
|
||||||
github.com/therootcompany/golib/net/geoip v0.0.0
|
github.com/therootcompany/golib/net/geoip v0.0.0
|
||||||
github.com/therootcompany/golib/net/gitshallow v0.0.0
|
github.com/therootcompany/golib/net/gitshallow v0.0.0
|
||||||
github.com/therootcompany/golib/net/httpcache v0.0.0
|
github.com/therootcompany/golib/net/httpcache v0.0.0
|
||||||
|
|||||||
@ -7,8 +7,8 @@
|
|||||||
// - --git URL shallow-clone a git repo of blocklists
|
// - --git URL shallow-clone a git repo of blocklists
|
||||||
// - (default) fetch raw blocklist files over HTTP with caching
|
// - (default) fetch raw blocklist files over HTTP with caching
|
||||||
//
|
//
|
||||||
// Exercises net/dataset (atomic-swap + background refresh), net/gitshallow,
|
// Cohorts are held in atomic.Pointers and hot-swapped on refresh so callers
|
||||||
// and net/httpcache.
|
// never see a partial view. A single goroutine reloads on a ticker.
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
@ -19,9 +19,9 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/therootcompany/golib/net/dataset"
|
|
||||||
"github.com/therootcompany/golib/net/geoip"
|
"github.com/therootcompany/golib/net/geoip"
|
||||||
"github.com/therootcompany/golib/net/gitshallow"
|
"github.com/therootcompany/golib/net/gitshallow"
|
||||||
"github.com/therootcompany/golib/net/httpcache"
|
"github.com/therootcompany/golib/net/httpcache"
|
||||||
@ -31,10 +31,6 @@ import (
|
|||||||
const (
|
const (
|
||||||
bitwireGitURL = "https://github.com/bitwire-it/ipblocklist.git"
|
bitwireGitURL = "https://github.com/bitwire-it/ipblocklist.git"
|
||||||
bitwireRawBase = "https://github.com/bitwire-it/ipblocklist/raw/refs/heads/main/tables"
|
bitwireRawBase = "https://github.com/bitwire-it/ipblocklist/raw/refs/heads/main/tables"
|
||||||
inboundSingleURL = bitwireRawBase + "/inbound/single_ips.txt"
|
|
||||||
inboundNetworkURL = bitwireRawBase + "/inbound/networks.txt"
|
|
||||||
outboundSingleURL = bitwireRawBase + "/outbound/single_ips.txt"
|
|
||||||
outboundNetworkURL = bitwireRawBase + "/outbound/networks.txt"
|
|
||||||
|
|
||||||
refreshInterval = 47 * time.Minute
|
refreshInterval = 47 * time.Minute
|
||||||
)
|
)
|
||||||
@ -106,13 +102,20 @@ func run(cfg Config, ipStr string) (blocked bool, err error) {
|
|||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
inbound, outbound, err := newBlocklists(ctx, cfg)
|
var inbound, outbound atomic.Pointer[ipcohort.Cohort]
|
||||||
|
|
||||||
|
refresh, err := buildRefresher(cfg, &inbound, &outbound)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
|
if err := refresh(); err != nil {
|
||||||
|
return false, fmt.Errorf("blacklist: %w", err)
|
||||||
|
}
|
||||||
fmt.Fprintf(os.Stderr, "Loaded inbound=%d outbound=%d\n",
|
fmt.Fprintf(os.Stderr, "Loaded inbound=%d outbound=%d\n",
|
||||||
inbound.Load().Size(), outbound.Load().Size())
|
inbound.Load().Size(), outbound.Load().Size())
|
||||||
|
|
||||||
|
go tick(ctx, refreshInterval, "blacklist", refresh)
|
||||||
|
|
||||||
whitelist, err := loadWhitelist(cfg.Whitelist)
|
whitelist, err := loadWhitelist(cfg.Whitelist)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, fmt.Errorf("whitelist: %w", err)
|
return false, fmt.Errorf("whitelist: %w", err)
|
||||||
@ -145,62 +148,112 @@ func run(cfg Config, ipStr string) (blocked bool, err error) {
|
|||||||
return blockedIn || blockedOut, nil
|
return blockedIn || blockedOut, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// newBlocklists wires a dataset.Group to the configured source (local files,
|
// buildRefresher wires the chosen source (files/git/http) to the inbound and
|
||||||
// git, or HTTP-cached raw files), calls Init once to populate, and starts a
|
// outbound atomic pointers, and returns a function that performs one refresh
|
||||||
// background refresh loop on ctx.
|
// cycle: fetch upstream, and if anything changed (or on the first call),
|
||||||
func newBlocklists(ctx context.Context, cfg Config) (inbound, outbound *dataset.Dataset[ipcohort.Cohort], err error) {
|
// reload both cohorts and atomically swap them in.
|
||||||
syncer, inPaths, outPaths, err := newSource(cfg)
|
func buildRefresher(
|
||||||
|
cfg Config,
|
||||||
|
inbound, outbound *atomic.Pointer[ipcohort.Cohort],
|
||||||
|
) (func() error, error) {
|
||||||
|
loadAndSwap := func(inPaths, outPaths []string) error {
|
||||||
|
in, err := ipcohort.LoadFiles(inPaths...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, err
|
return fmt.Errorf("inbound: %w", err)
|
||||||
|
}
|
||||||
|
out, err := ipcohort.LoadFiles(outPaths...)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("outbound: %w", err)
|
||||||
|
}
|
||||||
|
inbound.Store(in)
|
||||||
|
outbound.Store(out)
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
g := dataset.NewGroup(syncer)
|
|
||||||
inbound = dataset.Add(g, loadCohort(inPaths))
|
|
||||||
outbound = dataset.Add(g, loadCohort(outPaths))
|
|
||||||
if err := g.Init(); err != nil {
|
|
||||||
return nil, nil, fmt.Errorf("blacklist: %w", err)
|
|
||||||
}
|
|
||||||
go g.Run(ctx, refreshInterval)
|
|
||||||
return inbound, outbound, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// newSource picks a Syncer based on cfg and returns the file paths each
|
|
||||||
// dataset should load after a sync.
|
|
||||||
func newSource(cfg Config) (syncer dataset.Syncer, inPaths, outPaths []string, err error) {
|
|
||||||
switch {
|
switch {
|
||||||
case cfg.Inbound != "" || cfg.Outbound != "":
|
case cfg.Inbound != "" || cfg.Outbound != "":
|
||||||
return dataset.NopSyncer{}, splitCSV(cfg.Inbound), splitCSV(cfg.Outbound), nil
|
inPaths, outPaths := splitCSV(cfg.Inbound), splitCSV(cfg.Outbound)
|
||||||
|
loaded := false
|
||||||
|
return func() error {
|
||||||
|
if loaded {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
loaded = true
|
||||||
|
return loadAndSwap(inPaths, outPaths)
|
||||||
|
}, nil
|
||||||
|
|
||||||
case cfg.GitURL != "":
|
case cfg.GitURL != "":
|
||||||
dir, err := cacheDir(cfg.DataDir)
|
dir, err := cacheDir(cfg.DataDir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
repo := gitshallow.New(cfg.GitURL, dir, 1, "")
|
repo := gitshallow.New(cfg.GitURL, dir, 1, "")
|
||||||
return repo,
|
inPaths := []string{
|
||||||
[]string{
|
|
||||||
repo.FilePath("tables/inbound/single_ips.txt"),
|
repo.FilePath("tables/inbound/single_ips.txt"),
|
||||||
repo.FilePath("tables/inbound/networks.txt"),
|
repo.FilePath("tables/inbound/networks.txt"),
|
||||||
},
|
}
|
||||||
[]string{
|
outPaths := []string{
|
||||||
repo.FilePath("tables/outbound/single_ips.txt"),
|
repo.FilePath("tables/outbound/single_ips.txt"),
|
||||||
repo.FilePath("tables/outbound/networks.txt"),
|
repo.FilePath("tables/outbound/networks.txt"),
|
||||||
},
|
}
|
||||||
nil
|
first := true
|
||||||
|
return func() error {
|
||||||
|
updated, err := repo.Sync()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if !first && !updated {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
first = false
|
||||||
|
return loadAndSwap(inPaths, outPaths)
|
||||||
|
}, nil
|
||||||
|
|
||||||
default:
|
default:
|
||||||
dir, err := cacheDir(cfg.DataDir)
|
dir, err := cacheDir(cfg.DataDir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, nil, err
|
return nil, err
|
||||||
|
}
|
||||||
|
cachers := []*httpcache.Cacher{
|
||||||
|
httpcache.New(bitwireRawBase+"/inbound/single_ips.txt", filepath.Join(dir, "inbound_single_ips.txt")),
|
||||||
|
httpcache.New(bitwireRawBase+"/inbound/networks.txt", filepath.Join(dir, "inbound_networks.txt")),
|
||||||
|
httpcache.New(bitwireRawBase+"/outbound/single_ips.txt", filepath.Join(dir, "outbound_single_ips.txt")),
|
||||||
|
httpcache.New(bitwireRawBase+"/outbound/networks.txt", filepath.Join(dir, "outbound_networks.txt")),
|
||||||
|
}
|
||||||
|
inPaths := []string{cachers[0].Path, cachers[1].Path}
|
||||||
|
outPaths := []string{cachers[2].Path, cachers[3].Path}
|
||||||
|
first := true
|
||||||
|
return func() error {
|
||||||
|
var anyUpdated bool
|
||||||
|
for _, c := range cachers {
|
||||||
|
u, err := c.Fetch()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
anyUpdated = anyUpdated || u
|
||||||
|
}
|
||||||
|
if !first && !anyUpdated {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
first = false
|
||||||
|
return loadAndSwap(inPaths, outPaths)
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// tick calls fn every interval until ctx is done. Errors are logged, not fatal.
|
||||||
|
func tick(ctx context.Context, interval time.Duration, name string, fn func() error) {
|
||||||
|
t := time.NewTicker(interval)
|
||||||
|
defer t.Stop()
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case <-t.C:
|
||||||
|
if err := fn(); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "%s: refresh error: %v\n", name, err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
inSingle := httpcache.New(inboundSingleURL, filepath.Join(dir, "inbound_single_ips.txt"))
|
|
||||||
inNet := httpcache.New(inboundNetworkURL, filepath.Join(dir, "inbound_networks.txt"))
|
|
||||||
outSingle := httpcache.New(outboundSingleURL, filepath.Join(dir, "outbound_single_ips.txt"))
|
|
||||||
outNet := httpcache.New(outboundNetworkURL, filepath.Join(dir, "outbound_networks.txt"))
|
|
||||||
return dataset.MultiSyncer{inSingle, inNet, outSingle, outNet},
|
|
||||||
[]string{inSingle.Path, inNet.Path},
|
|
||||||
[]string{outSingle.Path, outNet.Path},
|
|
||||||
nil
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -211,12 +264,6 @@ func loadWhitelist(paths string) (*ipcohort.Cohort, error) {
|
|||||||
return ipcohort.LoadFiles(strings.Split(paths, ",")...)
|
return ipcohort.LoadFiles(strings.Split(paths, ",")...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func loadCohort(paths []string) func() (*ipcohort.Cohort, error) {
|
|
||||||
return func() (*ipcohort.Cohort, error) {
|
|
||||||
return ipcohort.LoadFiles(paths...)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func cacheDir(override string) (string, error) {
|
func cacheDir(override string) (string, error) {
|
||||||
if override != "" {
|
if override != "" {
|
||||||
return override, nil
|
return override, nil
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user