From e0210d6dc01317df85423a2622ff3af490289028 Mon Sep 17 00:00:00 2001 From: Neha Sherpa Date: Wed, 25 Feb 2026 10:25:46 -0800 Subject: [PATCH] fix: restore correct remote URL in git snapshots When generating a snapshot, `git clone ` sets `remote.origin.url` to the local mirror path. After cloning, reset the origin URL to the actual upstream so clients can run `git pull` after extracting the snapshot. Co-Authored-By: Claude Sonnet 4.6 --- cmd/cachewd/main.go | 5 +++ internal/strategy/git/git.go | 3 ++ internal/strategy/git/snapshot.go | 36 +++++++++++++++++--- internal/strategy/git/snapshot_test.go | 47 ++++++++++++++++++++++++++ 4 files changed, 86 insertions(+), 5 deletions(-) diff --git a/cmd/cachewd/main.go b/cmd/cachewd/main.go index 3a71fad..99187b3 100644 --- a/cmd/cachewd/main.go +++ b/cmd/cachewd/main.go @@ -242,6 +242,10 @@ func loadGlobalConfig(ast *hcl.AST) (GlobalConfig, map[string]string, error) { // Inject state directory as CACHEW_STATE for provider config expansion. envars["CACHEW_STATE"] = cfg.State + // Also inject CACHEW_URL + if envars["CACHEW_URL"] == "" { + envars["CACHEW_URL"] = cfg.URL + } // Second pass: re-expand now that CACHEW_STATE is available. cfg = GlobalConfig{} @@ -251,6 +255,7 @@ func loadGlobalConfig(ast *hcl.AST) (GlobalConfig, map[string]string, error) { if err := hcl.UnmarshalAST(ast, &cfg, hcl.HydratedImplicitBlocks(true), expanding); err != nil { return cfg, nil, errors.Errorf("load global config: %w", err) } + return cfg, envars, nil } diff --git a/internal/strategy/git/git.go b/internal/strategy/git/git.go index 9852980..bd81604 100644 --- a/internal/strategy/git/git.go +++ b/internal/strategy/git/git.go @@ -36,6 +36,8 @@ func Register(r *strategy.Registry, scheduler jobscheduler.Provider, cloneManage type Config struct { SnapshotInterval time.Duration `hcl:"snapshot-interval,optional" help:"How often to generate tar.zstd snapshots. 0 disables snapshots." default:"0"` RepackInterval time.Duration `hcl:"repack-interval,optional" help:"How often to run full repack. 0 disables." default:"0"` + // ServerURL is embedded as remote.origin.url in snapshots so git pull goes through cachew. + ServerURL string `hcl:"server-url,optional" help:"Base URL of this cachew instance, embedded in snapshot remote URLs." default:"${CACHEW_URL}"` } type Strategy struct { @@ -109,6 +111,7 @@ func New( spools: make(map[string]*RepoSpools), tokenManager: tokenManager, } + s.config.ServerURL = strings.TrimRight(config.ServerURL, "/") existing, err := s.cloneManager.DiscoverExisting(ctx) if err != nil { diff --git a/internal/strategy/git/snapshot.go b/internal/strategy/git/snapshot.go index 4263d67..3985d35 100644 --- a/internal/strategy/git/snapshot.go +++ b/internal/strategy/git/snapshot.go @@ -25,6 +25,20 @@ func snapshotDirForURL(mirrorRoot, upstreamURL string) (string, error) { return filepath.Join(mirrorRoot, ".snapshots", repoPath), nil } +// remoteURLForSnapshot returns the URL to embed as remote.origin.url in snapshots. +// When a server URL is configured, it returns the cachew URL for the repo so that +// git pull goes through cachew. Otherwise it falls back to the upstream URL. +func (s *Strategy) remoteURLForSnapshot(upstream string) string { + if s.config.ServerURL == "" { + return upstream + } + repoPath, err := gitclone.RepoPathFromURL(upstream) + if err != nil { + return upstream + } + return s.config.ServerURL + "/git/" + repoPath +} + func (s *Strategy) generateAndUploadSnapshot(ctx context.Context, repo *gitclone.Repository) error { logger := logging.FromContext(ctx) upstream := repo.UpstreamURL() @@ -45,12 +59,24 @@ func (s *Strategy) generateAndUploadSnapshot(ctx context.Context, repo *gitclone return errors.Wrap(err, "create snapshot parent dir") } - // Local clone from the mirror — git hardlinks objects by default. - // #nosec G204 - repo.Path() and snapshotDir are controlled by us - cmd := exec.CommandContext(ctx, "git", "clone", repo.Path(), snapshotDir) - if output, err := cmd.CombinedOutput(); err != nil { + // Hold a read lock to exclude concurrent fetches while cloning. + if err := repo.WithReadLock(func() error { + // #nosec G204 - repo.Path() and snapshotDir are controlled by us + cmd := exec.CommandContext(ctx, "git", "clone", repo.Path(), snapshotDir) + if output, err := cmd.CombinedOutput(); err != nil { + return errors.Wrapf(err, "git clone for snapshot: %s", string(output)) + } + + // git clone from a local path sets remote.origin.url to that path; restore it. + // #nosec G204 - remoteURL is derived from controlled inputs + cmd = exec.CommandContext(ctx, "git", "-C", snapshotDir, "remote", "set-url", "origin", s.remoteURLForSnapshot(upstream)) + if output, err := cmd.CombinedOutput(); err != nil { + return errors.Wrapf(err, "fix snapshot remote URL: %s", string(output)) + } + return nil + }); err != nil { _ = os.RemoveAll(snapshotDir) - return errors.Wrapf(err, "git clone for snapshot: %s", string(output)) + return errors.WithStack(err) } cacheKey := cache.NewKey(upstream + ".snapshot") diff --git a/internal/strategy/git/snapshot_test.go b/internal/strategy/git/snapshot_test.go index d59566e..f9c5133 100644 --- a/internal/strategy/git/snapshot_test.go +++ b/internal/strategy/git/snapshot_test.go @@ -159,8 +159,55 @@ func TestSnapshotGenerationViaLocalClone(t *testing.T) { assert.NoError(t, err) assert.Equal(t, "hello\n", string(data)) + // The remote URL must point to the upstream, not the local mirror path. + cmd := exec.Command("git", "-C", restoreDir, "remote", "get-url", "origin") + output, err := cmd.CombinedOutput() + assert.NoError(t, err, string(output)) + assert.Equal(t, upstreamURL+"\n", string(output)) + // Snapshot working directory should have been cleaned up. snapshotWorkDir := filepath.Join(mirrorRoot, ".snapshots", "github.com", "org", "repo") _, err = os.Stat(snapshotWorkDir) assert.True(t, os.IsNotExist(err)) } + +func TestSnapshotRemoteURLUsesServerURL(t *testing.T) { + if _, err := exec.LookPath("git"); err != nil { + t.Skip("git not found in PATH") + } + + _, ctx := logging.Configure(context.Background(), logging.Config{}) + tmpDir := t.TempDir() + mirrorRoot := filepath.Join(tmpDir, "mirrors") + upstreamURL := "https://github.com/org/repo" + serverURL := "http://cachew.example.com" + + mirrorPath := filepath.Join(mirrorRoot, "github.com", "org", "repo") + createTestMirrorRepo(t, mirrorPath) + + memCache, err := cache.NewMemory(ctx, cache.MemoryConfig{}) + assert.NoError(t, err) + mux := newTestMux() + + cm := gitclone.NewManagerProvider(ctx, gitclone.Config{MirrorRoot: mirrorRoot}, nil) + s, err := git.New(ctx, git.Config{ServerURL: serverURL}, newTestScheduler(ctx, t), memCache, mux, cm, func() (*githubapp.TokenManager, error) { return nil, nil }) //nolint:nilnil + assert.NoError(t, err) + + manager, err := cm() + assert.NoError(t, err) + repo, err := manager.GetOrCreate(ctx, upstreamURL) + assert.NoError(t, err) + + err = s.GenerateAndUploadSnapshot(ctx, repo) + assert.NoError(t, err) + + cacheKey := cache.NewKey(upstreamURL + ".snapshot") + restoreDir := filepath.Join(tmpDir, "restored") + err = snapshot.Restore(ctx, memCache, cacheKey, restoreDir) + assert.NoError(t, err) + + cmd := exec.Command("git", "-C", restoreDir, "remote", "get-url", "origin") + output, err := cmd.CombinedOutput() + assert.NoError(t, err, string(output)) + assert.Equal(t, serverURL+"/git/github.com/org/repo\n", string(output)) +}