feat: gitshallow.File for per-file path/open/sync; use in check-ip git case

This commit is contained in:
AJ ONeal 2026-04-20 12:39:24 -06:00
parent 6b420badbc
commit 7b71dec445
No known key found for this signature in database
2 changed files with 70 additions and 24 deletions

View File

@ -45,43 +45,43 @@ func main() {
// -- Blacklist ----------------------------------------------------------
var syncs dataset.MultiSyncer
var inboundPaths, outboundPaths []string
var (
syncer dataset.Syncer
inboundPaths []string
outboundPaths []string
)
switch {
case *inbound != "" || *outbound != "":
syncer = dataset.NopSyncer{}
inboundPaths = splitPaths(*inbound)
outboundPaths = splitPaths(*outbound)
case *gitURL != "":
cacheDir := cacheDir(*dataDir, "bitwire-it")
syncs = dataset.MultiSyncer{gitshallow.New(*gitURL, cacheDir, 1, "")}
dir := cacheDir(*dataDir, "bitwire-it")
gr := gitshallow.New(*gitURL, dir, 1, "")
syncer = gr
inboundPaths = []string{
filepath.Join(cacheDir, "tables/inbound/single_ips.txt"),
filepath.Join(cacheDir, "tables/inbound/networks.txt"),
gr.File("tables/inbound/single_ips.txt").Path(),
gr.File("tables/inbound/networks.txt").Path(),
}
outboundPaths = []string{
filepath.Join(cacheDir, "tables/outbound/single_ips.txt"),
filepath.Join(cacheDir, "tables/outbound/networks.txt"),
gr.File("tables/outbound/single_ips.txt").Path(),
gr.File("tables/outbound/networks.txt").Path(),
}
default:
dir := cacheDir(*dataDir, "bitwire-it")
inboundSingle := filepath.Join(dir, "inbound_single_ips.txt")
inboundNetwork := filepath.Join(dir, "inbound_networks.txt")
outboundSingle := filepath.Join(dir, "outbound_single_ips.txt")
outboundNetwork := filepath.Join(dir, "outbound_networks.txt")
syncs = dataset.MultiSyncer{
httpcache.New(inboundSingleURL, inboundSingle),
httpcache.New(inboundNetworkURL, inboundNetwork),
httpcache.New(outboundSingleURL, outboundSingle),
httpcache.New(outboundNetworkURL, outboundNetwork),
}
inboundPaths = []string{inboundSingle, inboundNetwork}
outboundPaths = []string{outboundSingle, outboundNetwork}
inSingle := httpcache.New(inboundSingleURL, filepath.Join(dir, "inbound_single_ips.txt"))
inNetwork := httpcache.New(inboundNetworkURL, filepath.Join(dir, "inbound_networks.txt"))
outSingle := httpcache.New(outboundSingleURL, filepath.Join(dir, "outbound_single_ips.txt"))
outNetwork:= httpcache.New(outboundNetworkURL, filepath.Join(dir, "outbound_networks.txt"))
syncer = dataset.MultiSyncer{inSingle, inNetwork, outSingle, outNetwork}
inboundPaths = []string{inSingle.Path, inNetwork.Path}
outboundPaths = []string{outSingle.Path, outNetwork.Path}
}
g := dataset.NewGroup(syncs)
g := dataset.NewGroup(syncer)
var whitelistDS *dataset.View[ipcohort.Cohort]
if *whitelist != "" {
paths := splitPaths(*whitelist)

View File

@ -168,11 +168,57 @@ func (r *Repo) Sync() (bool, error) {
return r.syncGit()
}
// Fetch satisfies httpcache.Syncer.
// Fetch satisfies dataset.Syncer.
func (r *Repo) Fetch() (bool, error) {
return r.syncGit()
}
// File returns a handle to relPath within this repo.
// The handle's Path and Open methods give access to the file; its Fetch method
// syncs the repo and reports whether this specific file changed (by mtime).
func (r *Repo) File(relPath string) *File {
return &File{repo: r, rel: relPath}
}
// File is a handle to a single file inside a Repo.
// It implements dataset.Syncer: Fetch syncs the repo (deduped across all File
// handles sharing the same Repo) then reports whether this file changed.
type File struct {
repo *Repo
rel string
mu sync.Mutex
lastMod time.Time
}
// Path returns the absolute path to the file.
func (f *File) Path() string {
return filepath.Join(f.repo.Path, f.rel)
}
// Open returns an open *os.File for reading. The caller must Close it.
func (f *File) Open() (*os.File, error) {
return os.Open(f.Path())
}
// Fetch syncs the repo and reports whether this file changed since last call.
// Implements dataset.Syncer; safe to call concurrently.
func (f *File) Fetch() (bool, error) {
if _, err := f.repo.syncGit(); err != nil {
return false, err
}
info, err := os.Stat(f.Path())
if err != nil {
return false, err
}
f.mu.Lock()
defer f.mu.Unlock()
if info.ModTime().Equal(f.lastMod) {
return false, nil
}
f.lastMod = info.ModTime()
return true, nil
}
func (r *Repo) syncGit() (updated bool, err error) {
r.mu.Lock()
defer r.mu.Unlock()