wip: feat: add net/gitdataset for data that updates via git

This commit is contained in:
AJ ONeal 2026-01-22 02:10:28 -07:00
parent e2a50ae20e
commit 9cd08ff2b8
No known key found for this signature in database
2 changed files with 92 additions and 87 deletions

View File

@ -0,0 +1,92 @@
package gitdataset
import (
"context"
"fmt"
"os"
"path/filepath"
"sync/atomic"
"time"
"github.com/therootcompany/golib/net/gitshallow"
)
// TODO maybe a GitRepo should contain GitDatasets such that loading
// multiple datasets from the same GitRepo won't cause issues with file locking?
type GitDataset[T any] struct {
LoadFile func(path string) (*T, error)
atomic.Pointer[T]
gitRepo string
shallowRepo *gitshallow.ShallowRepo
path string
}
func New[T any](gitURL, path string, loadFile func(path string) (*T, error)) *GitDataset[T] {
gitRepo := filepath.Dir(path)
gitDepth := 1
gitBranch := ""
shallowRepo := gitshallow.New(gitURL, gitRepo, gitDepth, gitBranch)
b := &GitDataset[T]{
Pointer: atomic.Pointer[T]{},
LoadFile: loadFile,
gitRepo: gitRepo,
shallowRepo: shallowRepo,
path: path,
}
b.Store(new(T))
return b
}
func (b *GitDataset[T]) Init(skipGC bool) (updated bool, err error) {
gitDir := filepath.Join(b.gitRepo, ".git")
if _, err := os.Stat(gitDir); err != nil {
if _, err := b.shallowRepo.Clone(); err != nil {
return false, err
}
}
force := true
return b.reload(skipGC, force)
}
func (b *GitDataset[T]) Run(ctx context.Context) {
ticker := time.NewTicker(47 * time.Minute)
defer ticker.Stop()
for {
select {
case <-ticker.C:
if ok, err := b.reload(false, false); err != nil {
fmt.Fprintf(os.Stderr, "error: git data: %v\n", err)
} else if ok {
fmt.Fprintf(os.Stderr, "git data: loaded repo\n")
} else {
fmt.Fprintf(os.Stderr, "git data: already up-to-date\n")
}
case <-ctx.Done():
return
}
}
}
func (b *GitDataset[T]) reload(skipGC, force bool) (updated bool, err error) {
laxGC := skipGC
lazyPrune := skipGC
updated, err = b.shallowRepo.Sync(laxGC, lazyPrune)
if err != nil {
return false, fmt.Errorf("git sync: %w", err)
}
if !updated && !force {
return false, nil
}
nextDataset, err := b.LoadFile(b.path)
if err != nil {
return false, err
}
_ = b.Swap(nextDataset)
return true, nil
}

View File

@ -1,87 +0,0 @@
package main
import (
"context"
"fmt"
"log"
"os"
"path/filepath"
"time"
"github.com/therootcompany/golib/net/gitshallow"
"github.com/therootcompany/golib/net/ipcohort"
)
type Blacklist struct {
*ipcohort.Cohort
gitRepo string
shallowRepo *gitshallow.ShallowRepo
path string
}
func NewBlacklist(gitURL, path string) *Blacklist {
gitRepo := filepath.Dir(path)
gitDepth := 1
gitBranch := ""
shallowRepo := gitshallow.New(gitURL, gitRepo, gitDepth, gitBranch)
return &Blacklist{
Cohort: ipcohort.New(),
gitRepo: gitRepo,
shallowRepo: shallowRepo,
path: path,
}
}
func (b *Blacklist) Init(skipGC bool) (int, error) {
gitDir := filepath.Join(b.gitRepo, ".git")
if _, err := os.Stat(gitDir); err != nil {
if _, err := b.shallowRepo.Clone(); err != nil {
log.Fatalf("Failed to load blacklist: %v", err)
fmt.Printf("%q is not a git repo, skipping sync\n", b.gitRepo)
return b.Size(), nil
}
}
force := true
return b.reload(skipGC, force)
}
func (r Blacklist) Run(ctx context.Context) {
ticker := time.NewTicker(47 * time.Minute)
defer ticker.Stop()
for {
select {
case <-ticker.C:
if n, err := r.reload(false, false); err != nil {
fmt.Fprintf(os.Stderr, "error: ip cohort: %v\n", err)
} else {
fmt.Fprintf(os.Stderr, "ip cohort: loaded %d blacklist entries\n", n)
}
case <-ctx.Done():
return
}
}
}
func (b Blacklist) reload(skipGC, force bool) (int, error) {
laxGC := skipGC
lazyPrune := skipGC
updated, err := b.shallowRepo.Sync(laxGC, lazyPrune)
if err != nil {
return 0, fmt.Errorf("git sync: %w", err)
}
if !updated && !force {
return 0, nil
}
needsSort := false
nextCohort, err := ipcohort.LoadFile(b.path, needsSort)
if err != nil {
return 0, fmt.Errorf("ip cohort: %w", err)
}
b.Swap(nextCohort)
return b.Size(), nil
}