diff --git a/cmd/check-ip/main.go b/cmd/check-ip/main.go index d8d73a0..1abbc81 100644 --- a/cmd/check-ip/main.go +++ b/cmd/check-ip/main.go @@ -45,43 +45,43 @@ func main() { // -- Blacklist ---------------------------------------------------------- - var syncs dataset.MultiSyncer - var inboundPaths, outboundPaths []string + var ( + syncer dataset.Syncer + inboundPaths []string + outboundPaths []string + ) switch { case *inbound != "" || *outbound != "": - inboundPaths = splitPaths(*inbound) + syncer = dataset.NopSyncer{} + inboundPaths = splitPaths(*inbound) outboundPaths = splitPaths(*outbound) case *gitURL != "": - cacheDir := cacheDir(*dataDir, "bitwire-it") - syncs = dataset.MultiSyncer{gitshallow.New(*gitURL, cacheDir, 1, "")} - inboundPaths = []string{ - filepath.Join(cacheDir, "tables/inbound/single_ips.txt"), - filepath.Join(cacheDir, "tables/inbound/networks.txt"), + dir := cacheDir(*dataDir, "bitwire-it") + gr := gitshallow.New(*gitURL, dir, 1, "") + syncer = gr + inboundPaths = []string{ + gr.File("tables/inbound/single_ips.txt").Path(), + gr.File("tables/inbound/networks.txt").Path(), } outboundPaths = []string{ - filepath.Join(cacheDir, "tables/outbound/single_ips.txt"), - filepath.Join(cacheDir, "tables/outbound/networks.txt"), + gr.File("tables/outbound/single_ips.txt").Path(), + gr.File("tables/outbound/networks.txt").Path(), } default: dir := cacheDir(*dataDir, "bitwire-it") - inboundSingle := filepath.Join(dir, "inbound_single_ips.txt") - inboundNetwork := filepath.Join(dir, "inbound_networks.txt") - outboundSingle := filepath.Join(dir, "outbound_single_ips.txt") - outboundNetwork := filepath.Join(dir, "outbound_networks.txt") - syncs = dataset.MultiSyncer{ - httpcache.New(inboundSingleURL, inboundSingle), - httpcache.New(inboundNetworkURL, inboundNetwork), - httpcache.New(outboundSingleURL, outboundSingle), - httpcache.New(outboundNetworkURL, outboundNetwork), - } - inboundPaths = []string{inboundSingle, inboundNetwork} - outboundPaths = []string{outboundSingle, outboundNetwork} + inSingle := httpcache.New(inboundSingleURL, filepath.Join(dir, "inbound_single_ips.txt")) + inNetwork := httpcache.New(inboundNetworkURL, filepath.Join(dir, "inbound_networks.txt")) + outSingle := httpcache.New(outboundSingleURL, filepath.Join(dir, "outbound_single_ips.txt")) + outNetwork:= httpcache.New(outboundNetworkURL, filepath.Join(dir, "outbound_networks.txt")) + syncer = dataset.MultiSyncer{inSingle, inNetwork, outSingle, outNetwork} + inboundPaths = []string{inSingle.Path, inNetwork.Path} + outboundPaths = []string{outSingle.Path, outNetwork.Path} } - g := dataset.NewGroup(syncs) + g := dataset.NewGroup(syncer) var whitelistDS *dataset.View[ipcohort.Cohort] if *whitelist != "" { paths := splitPaths(*whitelist) diff --git a/net/gitshallow/gitshallow.go b/net/gitshallow/gitshallow.go index da5de2e..cb43597 100644 --- a/net/gitshallow/gitshallow.go +++ b/net/gitshallow/gitshallow.go @@ -168,11 +168,57 @@ func (r *Repo) Sync() (bool, error) { return r.syncGit() } -// Fetch satisfies httpcache.Syncer. +// Fetch satisfies dataset.Syncer. func (r *Repo) Fetch() (bool, error) { return r.syncGit() } +// File returns a handle to relPath within this repo. +// The handle's Path and Open methods give access to the file; its Fetch method +// syncs the repo and reports whether this specific file changed (by mtime). +func (r *Repo) File(relPath string) *File { + return &File{repo: r, rel: relPath} +} + +// File is a handle to a single file inside a Repo. +// It implements dataset.Syncer: Fetch syncs the repo (deduped across all File +// handles sharing the same Repo) then reports whether this file changed. +type File struct { + repo *Repo + rel string + mu sync.Mutex + lastMod time.Time +} + +// Path returns the absolute path to the file. +func (f *File) Path() string { + return filepath.Join(f.repo.Path, f.rel) +} + +// Open returns an open *os.File for reading. The caller must Close it. +func (f *File) Open() (*os.File, error) { + return os.Open(f.Path()) +} + +// Fetch syncs the repo and reports whether this file changed since last call. +// Implements dataset.Syncer; safe to call concurrently. +func (f *File) Fetch() (bool, error) { + if _, err := f.repo.syncGit(); err != nil { + return false, err + } + info, err := os.Stat(f.Path()) + if err != nil { + return false, err + } + f.mu.Lock() + defer f.mu.Unlock() + if info.ModTime().Equal(f.lastMod) { + return false, nil + } + f.lastMod = info.ModTime() + return true, nil +} + func (r *Repo) syncGit() (updated bool, err error) { r.mu.Lock() defer r.mu.Unlock()