Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions packages/backend/src/repoIndexManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import { captureEvent } from './posthog.js';
import { PromClient } from './promClient.js';
import { RepoWithConnections, Settings } from "./types.js";
import { getAuthCredentialsForRepo, getShardPrefix, groupmqLifecycleExceptionWrapper, measure, setIntervalAsync } from './utils.js';
import { indexGitRepository } from './zoekt.js';
import { cleanupTempShards, indexGitRepository } from './zoekt.js';

const LOG_TAG = 'repo-index-manager';
const logger = createLogger(LOG_TAG);
Expand Down Expand Up @@ -438,9 +438,17 @@ export class RepoIndexManager {
}

logger.info(`Indexing ${repo.name} (id: ${repo.id})...`);
const { durationMs } = await measure(() => indexGitRepository(repo, this.settings, revisions, signal));
const indexDuration_s = durationMs / 1000;
logger.info(`Indexed ${repo.name} (id: ${repo.id}) in ${indexDuration_s}s`);
try {
const { durationMs } = await measure(() => indexGitRepository(repo, this.settings, revisions, signal));
const indexDuration_s = durationMs / 1000;
logger.info(`Indexed ${repo.name} (id: ${repo.id}) in ${indexDuration_s}s`);
Copy link
Contributor

@brendan-kellam brendan-kellam Jan 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is a fair workaround and we should probably have a mechanism for cleaning up these files, but it doesn't really address the root cause for why these files are being created in the first place.

} catch (error) {
// Clean up any temporary shard files left behind by the failed indexing operation.
// Zoekt creates .tmp files during indexing which can accumulate if indexing fails repeatedly.
logger.warn(`Indexing failed for ${repo.name} (id: ${repo.id}), cleaning up temp shard files...`);
await cleanupTempShards(repo);
throw error;
}

return revisions;
}
Expand Down
32 changes: 32 additions & 0 deletions packages/backend/src/zoekt.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { Repo } from "@sourcebot/db";
import { createLogger, env, getRepoPath } from "@sourcebot/shared";
import { exec } from "child_process";
import { readdir, rm } from "fs/promises";
import { INDEX_CACHE_DIR } from "./constants.js";
import { Settings } from "./types.js";
import { getShardPrefix } from "./utils.js";
Expand Down Expand Up @@ -54,3 +55,34 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, revisio
})
});
}

/**
* Cleans up temporary shard files left behind by a failed indexing operation.
* Zoekt creates temporary files (with `.tmp` suffix) during indexing, which
* can be left behind if the indexing process fails or is interrupted.
*
* @param repo - The repository whose temp shards should be cleaned up
*/
export const cleanupTempShards = async (repo: Repo) => {
const shardPrefix = getShardPrefix(repo.orgId, repo.id);

try {
const files = await readdir(INDEX_CACHE_DIR);
const tempFiles = files.filter(file =>
file.startsWith(shardPrefix) && file.includes('.tmp')
);

for (const file of tempFiles) {
const filePath = `${INDEX_CACHE_DIR}/${file}`;
logger.info(`Cleaning up temp shard file: ${filePath}`);
await rm(filePath, { force: true });
}

if (tempFiles.length > 0) {
logger.info(`Cleaned up ${tempFiles.length} temp shard file(s) for repo ${repo.id}`);
}
} catch (error) {
// Log but don't throw - cleanup is best effort
logger.warn(`Failed to cleanup temp shards for repo ${repo.id}:`, error);
}
}