mirror of
https://github.com/therootcompany/golib.git
synced 2026-04-24 20:58:00 +00:00
gitshallow: fix double-fetch (pull already fetches), drop redundant -C flags gitdataset: split into GitDataset[T] (file+atomic) and GitRepo (git+multi-dataset) - NewDataset for file-only use, AddDataset to register with a GitRepo - one clone/fetch per repo regardless of how many datasets it has ipcohort: split Cohort into hosts (sorted /32, binary search) + nets (CIDRs, linear) - fixes false negatives when broad CIDRs (e.g. /8) precede specific entries - fixes Parse() sort-before-copy order bug - ReadAll always sorts; unsorted param removed (was dead code)
135 lines
3.7 KiB
Go
135 lines
3.7 KiB
Go
package dataset
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/therootcompany/golib/net/gitshallow"
|
|
)
|
|
|
|
// File holds an atomically-swappable pointer to a value loaded from a file.
|
|
// Reads are lock-free. Use NewFile for file-only use, or AddFile to attach
|
|
// to a GitRepo so the value refreshes whenever the repo is updated.
|
|
type File[T any] struct {
|
|
atomic.Pointer[T]
|
|
path string
|
|
loadFile func(string) (*T, error)
|
|
}
|
|
|
|
// NewFile creates a file-backed dataset with no git dependency.
|
|
// Call Reload to do the initial load and after any file change.
|
|
func NewFile[T any](path string, loadFile func(string) (*T, error)) *File[T] {
|
|
d := &File[T]{
|
|
path: path,
|
|
loadFile: loadFile,
|
|
}
|
|
d.Store(new(T))
|
|
return d
|
|
}
|
|
|
|
// Reload reads the file and atomically replaces the stored value.
|
|
func (d *File[T]) Reload() error {
|
|
v, err := d.loadFile(d.path)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
d.Store(v)
|
|
return nil
|
|
}
|
|
|
|
func (d *File[T]) reloadFile() error {
|
|
return d.Reload()
|
|
}
|
|
|
|
// reloader is the internal interface GitRepo uses to trigger file reloads.
|
|
type reloader interface {
|
|
reloadFile() error
|
|
}
|
|
|
|
// GitRepo manages a shallow git clone and reloads all registered files
|
|
// whenever the repo is updated. Multiple files from the same repo share
|
|
// one clone and one pull, avoiding git file-lock conflicts.
|
|
type GitRepo struct {
|
|
path string
|
|
shallowRepo *gitshallow.ShallowRepo
|
|
files []reloader
|
|
}
|
|
|
|
// NewRepo creates a GitRepo backed by the given git URL, cloning into repoPath.
|
|
func NewRepo(gitURL, repoPath string) *GitRepo {
|
|
return &GitRepo{
|
|
path: repoPath,
|
|
shallowRepo: gitshallow.New(gitURL, repoPath, 1, ""),
|
|
}
|
|
}
|
|
|
|
// AddFile registers a file inside this repo and returns its handle.
|
|
// relPath is relative to the repo root. The file is reloaded automatically
|
|
// whenever the repo is synced via Init or Run.
|
|
func AddFile[T any](repo *GitRepo, relPath string, loadFile func(string) (*T, error)) *File[T] {
|
|
d := NewFile(filepath.Join(repo.path, relPath), loadFile)
|
|
repo.files = append(repo.files, d)
|
|
return d
|
|
}
|
|
|
|
// Init clones the repo if missing, syncs once, and loads all registered files.
|
|
// Always runs aggressive GC — acceptable as a one-time startup cost.
|
|
func (r *GitRepo) Init() error {
|
|
gitDir := filepath.Join(r.path, ".git")
|
|
if _, err := os.Stat(gitDir); err != nil {
|
|
if _, err := r.shallowRepo.Clone(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
_, err := r.sync(false, true)
|
|
return err
|
|
}
|
|
|
|
// Run periodically syncs the repo and reloads files. Blocks until ctx is done.
|
|
// lightGC=false (zero value) runs aggressive GC with immediate pruning to keep footprint minimal.
|
|
// Pass true to skip both when the periodic GC is too slow for your workload.
|
|
func (r *GitRepo) Run(ctx context.Context, lightGC bool) {
|
|
ticker := time.NewTicker(47 * time.Minute)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-ticker.C:
|
|
if updated, err := r.sync(lightGC, false); err != nil {
|
|
fmt.Fprintf(os.Stderr, "error: git repo sync: %v\n", err)
|
|
} else if updated {
|
|
fmt.Fprintf(os.Stderr, "git repo: files reloaded\n")
|
|
}
|
|
case <-ctx.Done():
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// Sync pulls the latest commits and reloads all files if HEAD changed.
|
|
// lightGC=false (zero value) runs aggressive GC with immediate pruning to keep footprint minimal.
|
|
func (r *GitRepo) Sync(lightGC bool) (bool, error) {
|
|
return r.sync(lightGC, false)
|
|
}
|
|
|
|
func (r *GitRepo) sync(lightGC, force bool) (bool, error) {
|
|
updated, err := r.shallowRepo.Sync(lightGC)
|
|
if err != nil {
|
|
return false, fmt.Errorf("git sync: %w", err)
|
|
}
|
|
if !updated && !force {
|
|
return false, nil
|
|
}
|
|
|
|
for _, f := range r.files {
|
|
if err := f.reloadFile(); err != nil {
|
|
fmt.Fprintf(os.Stderr, "error: reload file: %v\n", err)
|
|
}
|
|
}
|
|
return true, nil
|
|
}
|