Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed
- Fixed issue where certain file and folder names would cause type errors. [#862](https://github.com/sourcebot-dev/sourcebot/pull/862)
- Fixed token refresh error "Provider config not found or invalid for: x" when a sso is configured using deprecated env vars. [#841](https://github.com/sourcebot-dev/sourcebot/pull/841)
- Fixed issue where temporary shard files created on index failure were not being cleaned up. [#805](https://github.com/sourcebot-dev/sourcebot/pull/805)

## [4.10.27] - 2026-02-05

Expand Down
25 changes: 16 additions & 9 deletions packages/backend/src/repoIndexManager.ts
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
import * as Sentry from '@sentry/node';
import { PrismaClient, Repo, RepoIndexingJobStatus, RepoIndexingJobType } from "@sourcebot/db";
import { createLogger, Logger } from "@sourcebot/shared";
import { env, RepoIndexingJobMetadata, repoIndexingJobMetadataSchema, RepoMetadata, repoMetadataSchema, getRepoPath } from '@sourcebot/shared';
import { createLogger, env, getRepoPath, Logger, RepoIndexingJobMetadata, repoIndexingJobMetadataSchema, RepoMetadata, repoMetadataSchema } from "@sourcebot/shared";
import { DelayedError, Job, Queue, Worker } from "bullmq";
import { existsSync } from 'fs';
import { readdir, rm } from 'fs/promises';
import { DelayedError, Job, Queue, Worker } from "bullmq";
import { Redis } from 'ioredis';
import Redlock, { ExecutionError } from 'redlock';
import micromatch from 'micromatch';
import { WORKER_STOP_GRACEFUL_TIMEOUT_MS, INDEX_CACHE_DIR } from './constants.js';
import Redlock, { ExecutionError } from 'redlock';
import { INDEX_CACHE_DIR, WORKER_STOP_GRACEFUL_TIMEOUT_MS } from './constants.js';
import { cloneRepository, fetchRepository, getBranches, getCommitHashForRefName, getLatestCommitTimestamp, getLocalDefaultBranch, getTags, isPathAValidGitRepoRoot, unsetGitConfig, upsertGitConfig } from './git.js';
import { captureEvent } from './posthog.js';
import { PromClient } from './promClient.js';
import { RepoWithConnections, Settings } from "./types.js";
import { getAuthCredentialsForRepo, getShardPrefix, measure, setIntervalAsync } from './utils.js';
import { indexGitRepository } from './zoekt.js';
import { cleanupTempShards, indexGitRepository } from './zoekt.js';

const LOG_TAG = 'repo-index-manager';
const logger = createLogger(LOG_TAG);
Expand Down Expand Up @@ -478,9 +477,17 @@ export class RepoIndexManager {
}

logger.info(`Indexing ${repo.name} (id: ${repo.id})...`);
const { durationMs } = await measure(() => indexGitRepository(repo, this.settings, revisions, signal));
const indexDuration_s = durationMs / 1000;
logger.info(`Indexed ${repo.name} (id: ${repo.id}) in ${indexDuration_s}s`);
try {
const { durationMs } = await measure(() => indexGitRepository(repo, this.settings, revisions, signal));
const indexDuration_s = durationMs / 1000;
logger.info(`Indexed ${repo.name} (id: ${repo.id}) in ${indexDuration_s}s`);
} catch (error) {
// Clean up any temporary shard files left behind by the failed indexing operation.
// Zoekt creates .tmp files during indexing which can accumulate if indexing fails repeatedly.
logger.warn(`Indexing failed for ${repo.name} (id: ${repo.id}), cleaning up temp shard files...`);
await cleanupTempShards(repo);
throw error;
}

return revisions;
}
Expand Down
32 changes: 32 additions & 0 deletions packages/backend/src/zoekt.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { Repo } from "@sourcebot/db";
import { createLogger, env, getRepoPath } from "@sourcebot/shared";
import { exec } from "child_process";
import { readdir, rm } from "fs/promises";
import { INDEX_CACHE_DIR } from "./constants.js";
import { Settings } from "./types.js";
import { getShardPrefix } from "./utils.js";
Expand Down Expand Up @@ -54,3 +55,34 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, revisio
})
});
}

/**
* Cleans up temporary shard files left behind by a failed indexing operation.
* Zoekt creates temporary files (with `.tmp` suffix) during indexing, which
* can be left behind if the indexing process fails or is interrupted.
*
* @param repo - The repository whose temp shards should be cleaned up
*/
export const cleanupTempShards = async (repo: Repo) => {
const shardPrefix = getShardPrefix(repo.orgId, repo.id);

try {
const files = await readdir(INDEX_CACHE_DIR);
const tempFiles = files.filter(file =>
file.startsWith(shardPrefix) && file.includes('.tmp')
);

for (const file of tempFiles) {
const filePath = `${INDEX_CACHE_DIR}/${file}`;
logger.info(`Cleaning up temp shard file: ${filePath}`);
await rm(filePath, { force: true });
}

if (tempFiles.length > 0) {
logger.info(`Cleaned up ${tempFiles.length} temp shard file(s) for repo ${repo.id}`);
}
} catch (error) {
// Log but don't throw - cleanup is best effort
logger.warn(`Failed to cleanup temp shards for repo ${repo.id}:`, error);
}
}
Loading