feat: add net/gitshallow (for incremental updates to data repos)

This commit is contained in:
AJ ONeal 2026-01-21 14:58:31 -07:00
parent d3b59aebff
commit 8d1354f0da
No known key found for this signature in database
2 changed files with 275 additions and 0 deletions

View File

@ -0,0 +1,76 @@
// git-shallow-sync is a simple CLI tool to synchronize a shallow git repository
// using the github.com/therootcompany/golib/net/gitshallow package.
//
// Usage:
//
// git-shallow-sync <repository-url> <local-path>
//
// Example:
//
// git-shallow-sync git@github.com:bitwire-it/ipblocklist.git ~/srv/app/ipblocklist
package main
import (
"fmt"
"os"
"path/filepath"
"github.com/therootcompany/golib/net/gitshallow"
)
const (
defaultDepth = 1 // shallow by default
defaultBranch = "" // empty = default branch + --single-branch
laxGC = false // false = --aggressive
lazyPrune = false // false = --prune=now
)
func main() {
if len(os.Args) != 3 {
name := filepath.Base(os.Args[0])
fmt.Fprintf(os.Stderr, "Usage: %s <repository-url> <local-path>\n", name)
fmt.Fprintf(os.Stderr, "Example:\n")
fmt.Fprintf(os.Stderr, " %s git@github.com:bitwire-it/ipblocklist.git ~/srv/app/ipblocklist\n", name)
os.Exit(1)
}
url := os.Args[1]
path := os.Args[2]
// Expand ~ to home directory for Windows
if path[0] == '~' {
home, err := os.UserHomeDir()
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to get home directory: %v\n", err)
os.Exit(1)
}
path = filepath.Join(home, path[1:])
}
// Make path absolute
absPath, err := filepath.Abs(path)
if err != nil {
fmt.Fprintf(os.Stderr, "Invalid path: %v\n", err)
os.Exit(1)
}
fmt.Printf("Syncing repository:\n")
fmt.Printf(" URL: %s\n", url)
fmt.Printf(" Path: %s\n", absPath)
repo := gitshallow.New(url, absPath, defaultDepth, defaultBranch)
updated, err := repo.Sync(laxGC, lazyPrune)
if err != nil {
fmt.Fprintf(os.Stderr, "Sync failed: %v\n", err)
os.Exit(1)
}
if updated {
fmt.Println("Repository was updated (new commits fetched).")
} else {
fmt.Println("Repository is already up to date.")
}
fmt.Println("Sync complete.")
}

View File

@ -0,0 +1,199 @@
package gitshallow
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
)
// ShallowRepo represents a shallow Git repository manager.
type ShallowRepo struct {
URL string
Path string
Depth int // 0 defaults to 1, -1 for all
Branch string // Optional: specific branch to clone/fetch
//WithBranches bool
//WithTags bool
mu sync.Mutex // Mutex for in-process locking
}
// New creates a new ShallowRepo instance.
func New(url, path string, depth int, branch string) *ShallowRepo {
if depth == 0 {
depth = 1
}
return &ShallowRepo{
URL: url,
Path: path,
Depth: depth,
Branch: strings.TrimSpace(branch), // clean up accidental whitespace
}
}
// Clone performs a shallow clone (default --depth 0 --single-branch, --no-tags, etc).
func (r *ShallowRepo) Clone() (bool, error) {
r.mu.Lock()
defer r.mu.Unlock()
return r.clone()
}
func (r *ShallowRepo) clone() (bool, error) {
if r.exists() {
return false, nil
}
if r.URL == "" {
return false, fmt.Errorf("repository URL is required")
}
if r.Path == "" {
return false, fmt.Errorf("local path is required")
}
args := []string{"clone", "--no-tags"}
if r.Depth == 0 {
r.Depth = 1
}
if r.Depth >= 0 {
args = append(args, "--depth", fmt.Sprintf("%d", r.Depth))
}
args = append(args, "--single-branch")
if r.Branch != "" {
args = append(args, "--branch", r.Branch)
}
args = append(args, r.URL, filepath.Base(r.Path))
_, err := r.runGit(args...)
return true, err
}
// exists checks if the directory contains a .git folder.
func (r *ShallowRepo) exists() bool {
_, err := os.Stat(filepath.Join(r.Path, ".git"))
return err == nil
}
// runGit executes a git command.
// For clone it runs in the parent directory; otherwise inside the repo.
func (r *ShallowRepo) runGit(args ...string) (string, error) {
cmd := exec.Command("git", args...)
if _, err := os.Stat(r.Path); err == nil && r.exists() {
cmd.Dir = r.Path
} else {
cmd.Dir = filepath.Dir(r.Path)
}
output, err := cmd.CombinedOutput()
if err != nil {
return "", fmt.Errorf("git %s failed: %v\n%s", strings.Join(args, " "), err, output)
}
return strings.TrimSpace(string(output)), nil
}
// Fetch performs a shallow fetch and updates the working branch.
// Returns true if HEAD changed (i.e. meaningful update occurred).
// Uses --depth on fetch; branch filtering only when Branch is set.
func (r *ShallowRepo) Fetch() (updated bool, err error) {
r.mu.Lock()
defer r.mu.Unlock()
return r.fetch()
}
func (r *ShallowRepo) fetch() (updated bool, err error) {
if !r.exists() {
return false, fmt.Errorf("repository does not exist at %s", r.Path)
}
// Remember current HEAD
oldHead, err := r.runGit("-C", r.Path, "rev-parse", "HEAD")
if err != nil {
return false, err
}
// Update local branch (git pull --ff-only is safer in shallow context)
pullArgs := []string{"-C", r.Path, "pull", "--ff-only"}
if r.Branch != "" {
pullArgs = append(pullArgs, "origin", r.Branch)
}
_, err = r.runGit(pullArgs...)
if err != nil {
return false, err
}
// Fetch
fetchArgs := []string{"-C", r.Path, "fetch", "--no-tags"}
if r.Depth == 0 {
r.Depth = 1
}
if r.Depth >= 0 {
fetchArgs = append(fetchArgs, "--depth", fmt.Sprintf("%d", r.Depth))
}
_, err = r.runGit(fetchArgs...)
if err != nil {
return false, err
}
newHead, err := r.runGit("-C", r.Path, "rev-parse", "HEAD")
if err != nil {
return false, err
}
return oldHead != newHead, nil
}
// GC runs git gc, defaulting to pruning immediately and aggressively
func (r *ShallowRepo) GC(lax, lazy bool) error {
r.mu.Lock()
defer r.mu.Unlock()
return r.gc(lax, lazy)
}
func (r *ShallowRepo) gc(lax, lazy bool) error {
if !r.exists() {
return fmt.Errorf("repository does not exist at %s", r.Path)
}
args := []string{"-C", r.Path, "gc"}
if !lax {
args = append(args, "--aggressive")
}
if !lazy {
args = append(args, "--prune=now")
}
_, err := r.runGit(args...)
return err
}
// Sync clones if missing, fetches, and runs GC.
// Returns whether fetch caused an update.
func (r *ShallowRepo) Sync(laxGC, lazyPrune bool) (updated bool, err error) {
r.mu.Lock()
defer r.mu.Unlock()
if updated, err := r.clone(); err != nil {
return false, err
} else if updated {
return updated, nil
}
if updated, err := r.fetch(); err != nil {
return updated, err
} else if !updated {
return false, nil
}
if err := r.gc(laxGC, lazyPrune); err != nil {
return updated, fmt.Errorf("gc failed but fetch succeeded: %w", err)
}
return updated, nil
}