refactor: HTTP datasets are independent, no Group; Group only for shared git repo

This commit is contained in:
AJ ONeal 2026-04-20 12:41:07 -06:00
parent 7b71dec445
commit 03ea6934e9
No known key found for this signature in database

View File

@ -45,30 +45,23 @@ func main() {
// -- Blacklist ---------------------------------------------------------- // -- Blacklist ----------------------------------------------------------
var ( var inboundDS, outboundDS *dataset.Dataset[ipcohort.Cohort]
syncer dataset.Syncer
inboundPaths []string
outboundPaths []string
)
switch { switch {
case *inbound != "" || *outbound != "": case *inbound != "" || *outbound != "":
syncer = dataset.NopSyncer{} inboundDS = cohortDataset(dataset.NopSyncer{}, splitPaths(*inbound)...)
inboundPaths = splitPaths(*inbound) outboundDS = cohortDataset(dataset.NopSyncer{}, splitPaths(*outbound)...)
outboundPaths = splitPaths(*outbound)
case *gitURL != "": case *gitURL != "":
dir := cacheDir(*dataDir, "bitwire-it") dir := cacheDir(*dataDir, "bitwire-it")
gr := gitshallow.New(*gitURL, dir, 1, "") gr := gitshallow.New(*gitURL, dir, 1, "")
syncer = gr inSingle := gr.File("tables/inbound/single_ips.txt")
inboundPaths = []string{ inNetwork := gr.File("tables/inbound/networks.txt")
gr.File("tables/inbound/single_ips.txt").Path(), outSingle := gr.File("tables/outbound/single_ips.txt")
gr.File("tables/inbound/networks.txt").Path(), outNetwork:= gr.File("tables/outbound/networks.txt")
} // Each File.Fetch deduplicates the git pull via the shared Repo.
outboundPaths = []string{ inboundDS = cohortDataset(dataset.MultiSyncer{inSingle, inNetwork}, inSingle.Path(), inNetwork.Path())
gr.File("tables/outbound/single_ips.txt").Path(), outboundDS = cohortDataset(dataset.MultiSyncer{outSingle, outNetwork}, outSingle.Path(), outNetwork.Path())
gr.File("tables/outbound/networks.txt").Path(),
}
default: default:
dir := cacheDir(*dataDir, "bitwire-it") dir := cacheDir(*dataDir, "bitwire-it")
@ -76,24 +69,24 @@ func main() {
inNetwork := httpcache.New(inboundNetworkURL, filepath.Join(dir, "inbound_networks.txt")) inNetwork := httpcache.New(inboundNetworkURL, filepath.Join(dir, "inbound_networks.txt"))
outSingle := httpcache.New(outboundSingleURL, filepath.Join(dir, "outbound_single_ips.txt")) outSingle := httpcache.New(outboundSingleURL, filepath.Join(dir, "outbound_single_ips.txt"))
outNetwork:= httpcache.New(outboundNetworkURL, filepath.Join(dir, "outbound_networks.txt")) outNetwork:= httpcache.New(outboundNetworkURL, filepath.Join(dir, "outbound_networks.txt"))
syncer = dataset.MultiSyncer{inSingle, inNetwork, outSingle, outNetwork} inboundDS = cohortDataset(dataset.MultiSyncer{inSingle, inNetwork}, inSingle.Path, inNetwork.Path)
inboundPaths = []string{inSingle.Path, inNetwork.Path} outboundDS = cohortDataset(dataset.MultiSyncer{outSingle, outNetwork}, outSingle.Path, outNetwork.Path)
outboundPaths = []string{outSingle.Path, outNetwork.Path}
} }
g := dataset.NewGroup(syncer) var whitelistDS *dataset.Dataset[ipcohort.Cohort]
var whitelistDS *dataset.View[ipcohort.Cohort]
if *whitelist != "" { if *whitelist != "" {
paths := splitPaths(*whitelist) whitelistDS = cohortDataset(dataset.NopSyncer{}, splitPaths(*whitelist)...)
whitelistDS = dataset.Add(g, func() (*ipcohort.Cohort, error) { return ipcohort.LoadFiles(paths...) })
} }
inboundDS := dataset.Add(g, func() (*ipcohort.Cohort, error) { return ipcohort.LoadFiles(inboundPaths...) })
outboundDS := dataset.Add(g, func() (*ipcohort.Cohort, error) { return ipcohort.LoadFiles(outboundPaths...) })
if err := g.Init(); err != nil { for _, ds := range []*dataset.Dataset[ipcohort.Cohort]{whitelistDS, inboundDS, outboundDS} {
if ds == nil {
continue
}
if err := ds.Init(); err != nil {
fmt.Fprintf(os.Stderr, "error: blacklist: %v\n", err) fmt.Fprintf(os.Stderr, "error: blacklist: %v\n", err)
os.Exit(1) os.Exit(1)
} }
}
fmt.Fprintf(os.Stderr, "Loaded inbound=%d outbound=%d\n", fmt.Fprintf(os.Stderr, "Loaded inbound=%d outbound=%d\n",
inboundDS.Load().Size(), outboundDS.Load().Size()) inboundDS.Load().Size(), outboundDS.Load().Size())
@ -113,7 +106,11 @@ func main() {
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
defer cancel() defer cancel()
go g.Run(ctx, 47*time.Minute) for _, ds := range []*dataset.Dataset[ipcohort.Cohort]{whitelistDS, inboundDS, outboundDS} {
if ds != nil {
go ds.Run(ctx, 47*time.Minute)
}
}
geo.Run(ctx, 47*time.Minute) geo.Run(ctx, 47*time.Minute)
// -- Check and report -------------------------------------------------- // -- Check and report --------------------------------------------------
@ -138,7 +135,14 @@ func main() {
} }
} }
func isBlocked(ip string, whitelist, cohort *dataset.View[ipcohort.Cohort]) bool { // cohortDataset creates a Dataset that fetches via syncer and loads paths as a Cohort.
func cohortDataset(syncer dataset.Syncer, paths ...string) *dataset.Dataset[ipcohort.Cohort] {
return dataset.New(syncer, func() (*ipcohort.Cohort, error) {
return ipcohort.LoadFiles(paths...)
})
}
func isBlocked(ip string, whitelist, cohort *dataset.Dataset[ipcohort.Cohort]) bool {
if cohort == nil { if cohort == nil {
return false return false
} }