From 990b9e430c6212f13403a8f00be74764b739ecc3 Mon Sep 17 00:00:00 2001 From: AJ ONeal Date: Mon, 20 Apr 2026 13:16:47 -0600 Subject: [PATCH] refactor(check-ip): drop dataset pkg, inline atomic-swap + ticker Uses atomic.Pointer[ipcohort.Cohort] directly and builds a per-source refresh closure (files / git / http). One goroutine drives the ticker. Exercises what the dataset pkg was abstracting so we can judge which bits are worth a shared pkg. --- cmd/check-ip/go.mod | 1 - cmd/check-ip/main.go | 161 ++++++++++++++++++++++++++++--------------- 2 files changed, 104 insertions(+), 58 deletions(-) diff --git a/cmd/check-ip/go.mod b/cmd/check-ip/go.mod index 4b5bd07..f7ce278 100644 --- a/cmd/check-ip/go.mod +++ b/cmd/check-ip/go.mod @@ -3,7 +3,6 @@ module github.com/therootcompany/golib/cmd/check-ip go 1.26.0 require ( - github.com/therootcompany/golib/net/dataset v0.0.0 github.com/therootcompany/golib/net/geoip v0.0.0 github.com/therootcompany/golib/net/gitshallow v0.0.0 github.com/therootcompany/golib/net/httpcache v0.0.0 diff --git a/cmd/check-ip/main.go b/cmd/check-ip/main.go index 21523eb..e2b5ea3 100644 --- a/cmd/check-ip/main.go +++ b/cmd/check-ip/main.go @@ -7,8 +7,8 @@ // - --git URL shallow-clone a git repo of blocklists // - (default) fetch raw blocklist files over HTTP with caching // -// Exercises net/dataset (atomic-swap + background refresh), net/gitshallow, -// and net/httpcache. +// Cohorts are held in atomic.Pointers and hot-swapped on refresh so callers +// never see a partial view. A single goroutine reloads on a ticker. package main import ( @@ -19,9 +19,9 @@ import ( "os" "path/filepath" "strings" + "sync/atomic" "time" - "github.com/therootcompany/golib/net/dataset" "github.com/therootcompany/golib/net/geoip" "github.com/therootcompany/golib/net/gitshallow" "github.com/therootcompany/golib/net/httpcache" @@ -29,12 +29,8 @@ import ( ) const ( - bitwireGitURL = "https://github.com/bitwire-it/ipblocklist.git" - bitwireRawBase = "https://github.com/bitwire-it/ipblocklist/raw/refs/heads/main/tables" - inboundSingleURL = bitwireRawBase + "/inbound/single_ips.txt" - inboundNetworkURL = bitwireRawBase + "/inbound/networks.txt" - outboundSingleURL = bitwireRawBase + "/outbound/single_ips.txt" - outboundNetworkURL = bitwireRawBase + "/outbound/networks.txt" + bitwireGitURL = "https://github.com/bitwire-it/ipblocklist.git" + bitwireRawBase = "https://github.com/bitwire-it/ipblocklist/raw/refs/heads/main/tables" refreshInterval = 47 * time.Minute ) @@ -106,13 +102,20 @@ func run(cfg Config, ipStr string) (blocked bool, err error) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - inbound, outbound, err := newBlocklists(ctx, cfg) + var inbound, outbound atomic.Pointer[ipcohort.Cohort] + + refresh, err := buildRefresher(cfg, &inbound, &outbound) if err != nil { return false, err } + if err := refresh(); err != nil { + return false, fmt.Errorf("blacklist: %w", err) + } fmt.Fprintf(os.Stderr, "Loaded inbound=%d outbound=%d\n", inbound.Load().Size(), outbound.Load().Size()) + go tick(ctx, refreshInterval, "blacklist", refresh) + whitelist, err := loadWhitelist(cfg.Whitelist) if err != nil { return false, fmt.Errorf("whitelist: %w", err) @@ -145,62 +148,112 @@ func run(cfg Config, ipStr string) (blocked bool, err error) { return blockedIn || blockedOut, nil } -// newBlocklists wires a dataset.Group to the configured source (local files, -// git, or HTTP-cached raw files), calls Init once to populate, and starts a -// background refresh loop on ctx. -func newBlocklists(ctx context.Context, cfg Config) (inbound, outbound *dataset.Dataset[ipcohort.Cohort], err error) { - syncer, inPaths, outPaths, err := newSource(cfg) - if err != nil { - return nil, nil, err +// buildRefresher wires the chosen source (files/git/http) to the inbound and +// outbound atomic pointers, and returns a function that performs one refresh +// cycle: fetch upstream, and if anything changed (or on the first call), +// reload both cohorts and atomically swap them in. +func buildRefresher( + cfg Config, + inbound, outbound *atomic.Pointer[ipcohort.Cohort], +) (func() error, error) { + loadAndSwap := func(inPaths, outPaths []string) error { + in, err := ipcohort.LoadFiles(inPaths...) + if err != nil { + return fmt.Errorf("inbound: %w", err) + } + out, err := ipcohort.LoadFiles(outPaths...) + if err != nil { + return fmt.Errorf("outbound: %w", err) + } + inbound.Store(in) + outbound.Store(out) + return nil } - g := dataset.NewGroup(syncer) - inbound = dataset.Add(g, loadCohort(inPaths)) - outbound = dataset.Add(g, loadCohort(outPaths)) - if err := g.Init(); err != nil { - return nil, nil, fmt.Errorf("blacklist: %w", err) - } - go g.Run(ctx, refreshInterval) - return inbound, outbound, nil -} - -// newSource picks a Syncer based on cfg and returns the file paths each -// dataset should load after a sync. -func newSource(cfg Config) (syncer dataset.Syncer, inPaths, outPaths []string, err error) { switch { case cfg.Inbound != "" || cfg.Outbound != "": - return dataset.NopSyncer{}, splitCSV(cfg.Inbound), splitCSV(cfg.Outbound), nil + inPaths, outPaths := splitCSV(cfg.Inbound), splitCSV(cfg.Outbound) + loaded := false + return func() error { + if loaded { + return nil + } + loaded = true + return loadAndSwap(inPaths, outPaths) + }, nil case cfg.GitURL != "": dir, err := cacheDir(cfg.DataDir) if err != nil { - return nil, nil, nil, err + return nil, err } repo := gitshallow.New(cfg.GitURL, dir, 1, "") - return repo, - []string{ - repo.FilePath("tables/inbound/single_ips.txt"), - repo.FilePath("tables/inbound/networks.txt"), - }, - []string{ - repo.FilePath("tables/outbound/single_ips.txt"), - repo.FilePath("tables/outbound/networks.txt"), - }, - nil + inPaths := []string{ + repo.FilePath("tables/inbound/single_ips.txt"), + repo.FilePath("tables/inbound/networks.txt"), + } + outPaths := []string{ + repo.FilePath("tables/outbound/single_ips.txt"), + repo.FilePath("tables/outbound/networks.txt"), + } + first := true + return func() error { + updated, err := repo.Sync() + if err != nil { + return err + } + if !first && !updated { + return nil + } + first = false + return loadAndSwap(inPaths, outPaths) + }, nil default: dir, err := cacheDir(cfg.DataDir) if err != nil { - return nil, nil, nil, err + return nil, err + } + cachers := []*httpcache.Cacher{ + httpcache.New(bitwireRawBase+"/inbound/single_ips.txt", filepath.Join(dir, "inbound_single_ips.txt")), + httpcache.New(bitwireRawBase+"/inbound/networks.txt", filepath.Join(dir, "inbound_networks.txt")), + httpcache.New(bitwireRawBase+"/outbound/single_ips.txt", filepath.Join(dir, "outbound_single_ips.txt")), + httpcache.New(bitwireRawBase+"/outbound/networks.txt", filepath.Join(dir, "outbound_networks.txt")), + } + inPaths := []string{cachers[0].Path, cachers[1].Path} + outPaths := []string{cachers[2].Path, cachers[3].Path} + first := true + return func() error { + var anyUpdated bool + for _, c := range cachers { + u, err := c.Fetch() + if err != nil { + return err + } + anyUpdated = anyUpdated || u + } + if !first && !anyUpdated { + return nil + } + first = false + return loadAndSwap(inPaths, outPaths) + }, nil + } +} + +// tick calls fn every interval until ctx is done. Errors are logged, not fatal. +func tick(ctx context.Context, interval time.Duration, name string, fn func() error) { + t := time.NewTicker(interval) + defer t.Stop() + for { + select { + case <-ctx.Done(): + return + case <-t.C: + if err := fn(); err != nil { + fmt.Fprintf(os.Stderr, "%s: refresh error: %v\n", name, err) + } } - inSingle := httpcache.New(inboundSingleURL, filepath.Join(dir, "inbound_single_ips.txt")) - inNet := httpcache.New(inboundNetworkURL, filepath.Join(dir, "inbound_networks.txt")) - outSingle := httpcache.New(outboundSingleURL, filepath.Join(dir, "outbound_single_ips.txt")) - outNet := httpcache.New(outboundNetworkURL, filepath.Join(dir, "outbound_networks.txt")) - return dataset.MultiSyncer{inSingle, inNet, outSingle, outNet}, - []string{inSingle.Path, inNet.Path}, - []string{outSingle.Path, outNet.Path}, - nil } } @@ -211,12 +264,6 @@ func loadWhitelist(paths string) (*ipcohort.Cohort, error) { return ipcohort.LoadFiles(strings.Split(paths, ",")...) } -func loadCohort(paths []string) func() (*ipcohort.Cohort, error) { - return func() (*ipcohort.Cohort, error) { - return ipcohort.LoadFiles(paths...) - } -} - func cacheDir(override string) (string, error) { if override != "" { return override, nil