refactor: dataset.Sync() = fetch+conditional-swap, no public Swap()

Callers only need Init() + Run() + Load(). Sync() handles the full
fetch→swap cycle internally when the source reports a change.
This commit is contained in:
AJ ONeal 2026-04-20 12:46:02 -06:00
parent 03ea6934e9
commit cc945b0c09
No known key found for this signature in database

View File

@ -2,7 +2,7 @@
// atomic.Pointer (hot-swap), providing a generic periodically-updated // atomic.Pointer (hot-swap), providing a generic periodically-updated
// in-memory dataset with lock-free reads. // in-memory dataset with lock-free reads.
// //
// Standalone dataset (one syncer, one value): // Standalone dataset:
// //
// ds := dataset.New(cacher, func() (*MyType, error) { // ds := dataset.New(cacher, func() (*MyType, error) {
// return mytype.LoadFile(path) // return mytype.LoadFile(path)
@ -11,11 +11,11 @@
// go ds.Run(ctx, 47*time.Minute) // go ds.Run(ctx, 47*time.Minute)
// val := ds.Load() // *MyType, lock-free // val := ds.Load() // *MyType, lock-free
// //
// Group (one syncer, multiple values — e.g. inbound+outbound from one git repo): // Group (one syncer, multiple values):
// //
// g := dataset.NewGroup(repo) // g := dataset.NewGroup(repo)
// inbound := dataset.Add(g, func() (*ipcohort.Cohort, error) { return ipcohort.LoadFiles(inboundPaths...) }) // inbound := dataset.Add(g, func() (*ipcohort.Cohort, error) { ... })
// outbound := dataset.Add(g, func() (*ipcohort.Cohort, error) { return ipcohort.LoadFiles(outboundPaths...) }) // outbound := dataset.Add(g, func() (*ipcohort.Cohort, error) { ... })
// if err := g.Init(); err != nil { ... } // if err := g.Init(); err != nil { ... }
// go g.Run(ctx, 47*time.Minute) // go g.Run(ctx, 47*time.Minute)
package dataset package dataset
@ -28,8 +28,7 @@ import (
"time" "time"
) )
// Syncer is implemented by any value that can fetch a remote resource and // Syncer reports whether a remote resource has changed.
// report whether it changed.
type Syncer interface { type Syncer interface {
Fetch() (updated bool, err error) Fetch() (updated bool, err error)
} }
@ -50,8 +49,7 @@ func (ms MultiSyncer) Fetch() (bool, error) {
return anyUpdated, nil return anyUpdated, nil
} }
// NopSyncer is a Syncer that always reports no update and no error. // NopSyncer always reports no update. Use for local-file datasets.
// Use for datasets backed by local files with no remote source.
type NopSyncer struct{} type NopSyncer struct{}
func (NopSyncer) Fetch() (bool, error) { return false, nil } func (NopSyncer) Fetch() (bool, error) { return false, nil }
@ -59,10 +57,9 @@ func (NopSyncer) Fetch() (bool, error) { return false, nil }
// Dataset couples a Syncer, a load function, and an atomic.Pointer[T]. // Dataset couples a Syncer, a load function, and an atomic.Pointer[T].
// Load is safe for concurrent use without locks. // Load is safe for concurrent use without locks.
type Dataset[T any] struct { type Dataset[T any] struct {
// Name is used in error messages. Optional. // Name is used in error messages.
Name string Name string
// Close is called with the previous value after each successful swap. // Close is called with the old value after each successful swap.
// Use this for values that hold resources, e.g. func(r *geoip2.Reader) { r.Close() }.
Close func(*T) Close func(*T)
syncer Syncer syncer Syncer
@ -70,8 +67,7 @@ type Dataset[T any] struct {
ptr atomic.Pointer[T] ptr atomic.Pointer[T]
} }
// New creates a Dataset. The syncer fetches updates; load produces the value. // New creates a Dataset. The syncer reports changes; load produces the value.
// load is a closure — it captures whatever paths or config it needs.
func New[T any](syncer Syncer, load func() (*T, error)) *Dataset[T] { func New[T any](syncer Syncer, load func() (*T, error)) *Dataset[T] {
return &Dataset[T]{syncer: syncer, load: load} return &Dataset[T]{syncer: syncer, load: load}
} }
@ -81,26 +77,41 @@ func (d *Dataset[T]) Load() *T {
return d.ptr.Load() return d.ptr.Load()
} }
// Init fetches (if needed) then always loads, ensuring the dataset is func (d *Dataset[T]) swap() error {
// populated on startup from an existing local file even if nothing changed. val, err := d.load()
if err != nil {
return err
}
if old := d.ptr.Swap(val); old != nil && d.Close != nil {
d.Close(old)
}
return nil
}
// Sync calls the syncer and, if updated, reloads and atomically installs the
// new value. Returns whether the source changed.
func (d *Dataset[T]) Sync() (bool, error) {
updated, err := d.syncer.Fetch()
if err != nil {
return false, err
}
if !updated {
return false, nil
}
return true, d.swap()
}
// Init syncs and always loads, ensuring the dataset is populated from an
// existing local file even if nothing changed upstream.
func (d *Dataset[T]) Init() error { func (d *Dataset[T]) Init() error {
if _, err := d.syncer.Fetch(); err != nil { if _, err := d.syncer.Fetch(); err != nil {
return err return err
} }
return d.reload() return d.swap()
} }
// Sync fetches and reloads if the content changed. Returns whether updated. // Run calls Sync on every interval. Errors are written to stderr and do not
func (d *Dataset[T]) Sync() (bool, error) { // stop the loop.
updated, err := d.syncer.Fetch()
if err != nil || !updated {
return updated, err
}
return true, d.reload()
}
// Run calls Sync on every interval until ctx is done.
// Errors are written to stderr and do not stop the loop.
func (d *Dataset[T]) Run(ctx context.Context, interval time.Duration) { func (d *Dataset[T]) Run(ctx context.Context, interval time.Duration) {
ticker := time.NewTicker(interval) ticker := time.NewTicker(interval)
defer ticker.Stop() defer ticker.Stop()
@ -120,26 +131,15 @@ func (d *Dataset[T]) Run(ctx context.Context, interval time.Duration) {
} }
} }
func (d *Dataset[T]) reload() error {
val, err := d.load()
if err != nil {
return err
}
if old := d.ptr.Swap(val); old != nil && d.Close != nil {
d.Close(old)
}
return nil
}
// -- Group: one Syncer driving multiple datasets --------------------------- // -- Group: one Syncer driving multiple datasets ---------------------------
// member is the type-erased reload handle stored in a Group. // member is the type-erased swap handle stored in a Group.
type member interface { type member interface {
reload() error swap() error
} }
// Group ties one Syncer to multiple datasets so a single Fetch drives all // Group ties one Syncer to multiple datasets so a single Fetch drives all
// reloads — no redundant network calls when datasets share a source. // swaps — no redundant network calls when datasets share a source.
type Group struct { type Group struct {
syncer Syncer syncer Syncer
members []member members []member
@ -150,44 +150,38 @@ func NewGroup(syncer Syncer) *Group {
return &Group{syncer: syncer} return &Group{syncer: syncer}
} }
// View is the read-only handle returned by Add. It exposes only Load — func (g *Group) swapAll() error {
// fetch and reload are driven by the owning Group. for _, m := range g.members {
type View[T any] struct { if err := m.swap(); err != nil {
d *Dataset[T] return err
}
}
return nil
} }
// Load returns the current value. Returns nil before the Group is initialised. // Sync calls the syncer and, if updated, reloads all member datasets.
func (v *View[T]) Load() *T { return v.d.ptr.Load() } // Returns whether the source changed.
func (g *Group) Sync() (bool, error) {
func (v *View[T]) reload() error { return v.d.reload() } updated, err := g.syncer.Fetch()
if err != nil {
// Add registers a new dataset in g and returns a View. Call Load to read the return false, err
// current value. Drive updates by calling Init/Sync/Run on the Group. }
// load is a closure capturing whatever paths or config it needs. if !updated {
func Add[T any](g *Group, load func() (*T, error)) *View[T] { return false, nil
v := &View[T]{d: &Dataset[T]{load: load}} }
g.members = append(g.members, v) return true, g.swapAll()
return v
} }
// Init fetches once then reloads all registered datasets. // Init syncs and always loads all datasets.
func (g *Group) Init() error { func (g *Group) Init() error {
if _, err := g.syncer.Fetch(); err != nil { if _, err := g.syncer.Fetch(); err != nil {
return err return err
} }
return g.reloadAll() return g.swapAll()
} }
// Sync fetches and reloads all datasets if the syncer reports an update. // Run calls Sync on every interval; reloads all datasets only when the source
func (g *Group) Sync() (bool, error) { // reports a change.
updated, err := g.syncer.Fetch()
if err != nil || !updated {
return updated, err
}
return true, g.reloadAll()
}
// Run calls Sync on every interval until ctx is done.
func (g *Group) Run(ctx context.Context, interval time.Duration) { func (g *Group) Run(ctx context.Context, interval time.Duration) {
ticker := time.NewTicker(interval) ticker := time.NewTicker(interval)
defer ticker.Stop() defer ticker.Stop()
@ -203,11 +197,20 @@ func (g *Group) Run(ctx context.Context, interval time.Duration) {
} }
} }
func (g *Group) reloadAll() error { // View is the read-only handle returned by Add. Sync is driven by the owning
for _, m := range g.members { // Group.
if err := m.reload(); err != nil { type View[T any] struct {
return err d *Dataset[T]
} }
}
return nil // Load returns the current value. Returns nil before the Group is initialised.
func (v *View[T]) Load() *T { return v.d.ptr.Load() }
func (v *View[T]) swap() error { return v.d.swap() }
// Add registers a new dataset in g and returns a View for reading.
func Add[T any](g *Group, load func() (*T, error)) *View[T] {
v := &View[T]{d: &Dataset[T]{load: load}}
g.members = append(g.members, v)
return v
} }