Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions cmd/cachewd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,10 @@ func loadGlobalConfig(ast *hcl.AST) (GlobalConfig, map[string]string, error) {

// Inject state directory as CACHEW_STATE for provider config expansion.
envars["CACHEW_STATE"] = cfg.State
// Also inject CACHEW_URL
if envars["CACHEW_URL"] == "" {
envars["CACHEW_URL"] = cfg.URL
}

// Second pass: re-expand now that CACHEW_STATE is available.
cfg = GlobalConfig{}
Expand All @@ -251,6 +255,7 @@ func loadGlobalConfig(ast *hcl.AST) (GlobalConfig, map[string]string, error) {
if err := hcl.UnmarshalAST(ast, &cfg, hcl.HydratedImplicitBlocks(true), expanding); err != nil {
return cfg, nil, errors.Errorf("load global config: %w", err)
}

return cfg, envars, nil
}

Expand Down
3 changes: 3 additions & 0 deletions internal/strategy/git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ func Register(r *strategy.Registry, scheduler jobscheduler.Provider, cloneManage
type Config struct {
SnapshotInterval time.Duration `hcl:"snapshot-interval,optional" help:"How often to generate tar.zstd snapshots. 0 disables snapshots." default:"0"`
RepackInterval time.Duration `hcl:"repack-interval,optional" help:"How often to run full repack. 0 disables." default:"0"`
// ServerURL is embedded as remote.origin.url in snapshots so git pull goes through cachew.
ServerURL string `hcl:"server-url,optional" help:"Base URL of this cachew instance, embedded in snapshot remote URLs." default:"${CACHEW_URL}"`
}

type Strategy struct {
Expand Down Expand Up @@ -109,6 +111,7 @@ func New(
spools: make(map[string]*RepoSpools),
tokenManager: tokenManager,
}
s.config.ServerURL = strings.TrimRight(config.ServerURL, "/")

existing, err := s.cloneManager.DiscoverExisting(ctx)
if err != nil {
Expand Down
36 changes: 31 additions & 5 deletions internal/strategy/git/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,20 @@ func snapshotDirForURL(mirrorRoot, upstreamURL string) (string, error) {
return filepath.Join(mirrorRoot, ".snapshots", repoPath), nil
}

// remoteURLForSnapshot returns the URL to embed as remote.origin.url in snapshots.
// When a server URL is configured, it returns the cachew URL for the repo so that
// git pull goes through cachew. Otherwise it falls back to the upstream URL.
func (s *Strategy) remoteURLForSnapshot(upstream string) string {
if s.config.ServerURL == "" {
return upstream
}
repoPath, err := gitclone.RepoPathFromURL(upstream)
if err != nil {
return upstream
}
return s.config.ServerURL + "/git/" + repoPath
}

func (s *Strategy) generateAndUploadSnapshot(ctx context.Context, repo *gitclone.Repository) error {
logger := logging.FromContext(ctx)
upstream := repo.UpstreamURL()
Expand All @@ -45,12 +59,24 @@ func (s *Strategy) generateAndUploadSnapshot(ctx context.Context, repo *gitclone
return errors.Wrap(err, "create snapshot parent dir")
}

// Local clone from the mirror — git hardlinks objects by default.
// #nosec G204 - repo.Path() and snapshotDir are controlled by us
cmd := exec.CommandContext(ctx, "git", "clone", repo.Path(), snapshotDir)
if output, err := cmd.CombinedOutput(); err != nil {
// Hold a read lock to exclude concurrent fetches while cloning.
if err := repo.WithReadLock(func() error {
// #nosec G204 - repo.Path() and snapshotDir are controlled by us
cmd := exec.CommandContext(ctx, "git", "clone", repo.Path(), snapshotDir)
if output, err := cmd.CombinedOutput(); err != nil {
return errors.Wrapf(err, "git clone for snapshot: %s", string(output))
}

// git clone from a local path sets remote.origin.url to that path; restore it.
// #nosec G204 - remoteURL is derived from controlled inputs
cmd = exec.CommandContext(ctx, "git", "-C", snapshotDir, "remote", "set-url", "origin", s.remoteURLForSnapshot(upstream))
if output, err := cmd.CombinedOutput(); err != nil {
return errors.Wrapf(err, "fix snapshot remote URL: %s", string(output))
}
return nil
}); err != nil {
_ = os.RemoveAll(snapshotDir)
return errors.Wrapf(err, "git clone for snapshot: %s", string(output))
return errors.WithStack(err)
}

cacheKey := cache.NewKey(upstream + ".snapshot")
Expand Down
47 changes: 47 additions & 0 deletions internal/strategy/git/snapshot_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,55 @@ func TestSnapshotGenerationViaLocalClone(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, "hello\n", string(data))

// The remote URL must point to the upstream, not the local mirror path.
cmd := exec.Command("git", "-C", restoreDir, "remote", "get-url", "origin")
output, err := cmd.CombinedOutput()
assert.NoError(t, err, string(output))
assert.Equal(t, upstreamURL+"\n", string(output))

// Snapshot working directory should have been cleaned up.
snapshotWorkDir := filepath.Join(mirrorRoot, ".snapshots", "github.com", "org", "repo")
_, err = os.Stat(snapshotWorkDir)
assert.True(t, os.IsNotExist(err))
}

func TestSnapshotRemoteURLUsesServerURL(t *testing.T) {
if _, err := exec.LookPath("git"); err != nil {
t.Skip("git not found in PATH")
}

_, ctx := logging.Configure(context.Background(), logging.Config{})
tmpDir := t.TempDir()
mirrorRoot := filepath.Join(tmpDir, "mirrors")
upstreamURL := "https://github.com/org/repo"
serverURL := "http://cachew.example.com"

mirrorPath := filepath.Join(mirrorRoot, "github.com", "org", "repo")
createTestMirrorRepo(t, mirrorPath)

memCache, err := cache.NewMemory(ctx, cache.MemoryConfig{})
assert.NoError(t, err)
mux := newTestMux()

cm := gitclone.NewManagerProvider(ctx, gitclone.Config{MirrorRoot: mirrorRoot}, nil)
s, err := git.New(ctx, git.Config{ServerURL: serverURL}, newTestScheduler(ctx, t), memCache, mux, cm, func() (*githubapp.TokenManager, error) { return nil, nil }) //nolint:nilnil
assert.NoError(t, err)

manager, err := cm()
assert.NoError(t, err)
repo, err := manager.GetOrCreate(ctx, upstreamURL)
assert.NoError(t, err)

err = s.GenerateAndUploadSnapshot(ctx, repo)
assert.NoError(t, err)

cacheKey := cache.NewKey(upstreamURL + ".snapshot")
restoreDir := filepath.Join(tmpDir, "restored")
err = snapshot.Restore(ctx, memCache, cacheKey, restoreDir)
assert.NoError(t, err)

cmd := exec.Command("git", "-C", restoreDir, "remote", "get-url", "origin")
output, err := cmd.CombinedOutput()
assert.NoError(t, err, string(output))
assert.Equal(t, serverURL+"/git/github.com/org/repo\n", string(output))
}