continue/core/indexing/CodebaseIndexer.ts

210 lines
6.2 KiB
TypeScript

import { ConfigHandler } from "../config/ConfigHandler.js";
import { IContinueServerClient } from "../continueServer/interface.js";
import { IDE, IndexTag, IndexingProgressUpdate } from "../index.js";
import { CodeSnippetsCodebaseIndex } from "./CodeSnippetsIndex.js";
import { FullTextSearchCodebaseIndex } from "./FullTextSearch.js";
import { LanceDbIndex } from "./LanceDbIndex.js";
import { ChunkCodebaseIndex } from "./chunk/ChunkCodebaseIndex.js";
import { getComputeDeleteAddRemove } from "./refreshIndex.js";
import { CodebaseIndex, IndexResultType } from "./types.js";
import { walkDir } from "./walkDir.js";
export class PauseToken {
constructor(private _paused: boolean) {}
set paused(value: boolean) {
this._paused = value;
}
get paused(): boolean {
return this._paused;
}
}
export class CodebaseIndexer {
constructor(
private readonly configHandler: ConfigHandler,
private readonly ide: IDE,
private readonly pauseToken: PauseToken,
private readonly continueServerClient: IContinueServerClient,
) {}
private async getIndexesToBuild(): Promise<CodebaseIndex[]> {
const config = await this.configHandler.loadConfig();
const indexes = [
new ChunkCodebaseIndex(
this.ide.readFile.bind(this.ide),
this.continueServerClient,
config.embeddingsProvider.maxChunkSize,
), // Chunking must come first
new LanceDbIndex(
config.embeddingsProvider,
this.ide.readFile.bind(this.ide),
this.continueServerClient,
),
new FullTextSearchCodebaseIndex(),
new CodeSnippetsCodebaseIndex(this.ide),
];
return indexes;
}
async *refresh(
workspaceDirs: string[],
abortSignal: AbortSignal,
): AsyncGenerator<IndexingProgressUpdate> {
let progress = 0;
if (workspaceDirs.length === 0) {
yield {
progress,
desc: "Nothing to index",
status: "disabled",
};
return;
}
const config = await this.configHandler.loadConfig();
if (config.disableIndexing) {
yield {
progress,
desc: "Indexing is disabled in config.json",
status: "disabled",
};
return;
} else {
yield {
progress,
desc: "Starting indexing",
status: "loading",
};
}
const indexesToBuild = await this.getIndexesToBuild();
let completedDirs = 0;
const totalRelativeExpectedTime = indexesToBuild.reduce(
(sum, index) => sum + index.relativeExpectedTime,
0,
);
// Wait until Git Extension has loaded to report progress
// so we don't appear stuck at 0% while waiting
await this.ide.getRepoName(workspaceDirs[0]);
yield {
progress,
desc: "Starting indexing...",
status: "loading",
};
for (const directory of workspaceDirs) {
const files = await walkDir(directory, this.ide);
const stats = await this.ide.getLastModified(files);
const branch = await this.ide.getBranch(directory);
const repoName = await this.ide.getRepoName(directory);
let completedRelativeExpectedTime = 0;
for (const codebaseIndex of indexesToBuild) {
// TODO: IndexTag type should use repoName rather than directory
const tag: IndexTag = {
directory,
branch,
artifactId: codebaseIndex.artifactId,
};
const [results, lastUpdated, markComplete] = await getComputeDeleteAddRemove(
tag,
{ ...stats },
(filepath) => this.ide.readFile(filepath),
repoName,
);
try {
for await (let {
progress: indexProgress,
desc,
} of codebaseIndex.update(tag, results, markComplete, repoName)) {
// Handle pausing in this loop because it's the only one really taking time
if (abortSignal.aborted) {
yield {
progress: 1,
desc: "Indexing cancelled",
status: "disabled",
};
return;
}
if (this.pauseToken.paused) {
yield {
progress,
desc: "Paused",
status: "paused",
};
while (this.pauseToken.paused) {
await new Promise((resolve) => setTimeout(resolve, 100));
}
}
progress =
(completedDirs +
(completedRelativeExpectedTime +
Math.min(1.0, indexProgress) *
codebaseIndex.relativeExpectedTime) /
totalRelativeExpectedTime) /
workspaceDirs.length;
yield {
progress,
desc,
status: "indexing",
};
}
lastUpdated.forEach((lastUpdated, path) => {
markComplete([lastUpdated], IndexResultType.UpdateLastUpdated);
});
completedRelativeExpectedTime += codebaseIndex.relativeExpectedTime;
yield {
progress:
(completedDirs +
completedRelativeExpectedTime / totalRelativeExpectedTime) /
workspaceDirs.length,
desc: "Completed indexing " + codebaseIndex.artifactId,
status: "indexing",
};
} catch (e: any) {
let errMsg = `${e}`;
const errorRegex =
/Invalid argument error: Values length (\d+) is less than the length \((\d+)\) multiplied by the value size \(\d+\)/;
const match = e.message.match(errorRegex);
if (match) {
const [_, valuesLength, expectedLength] = match;
errMsg = `Generated embedding had length ${valuesLength} but was expected to be ${expectedLength}. This may be solved by deleting ~/.continue/index and refreshing the window to re-index.`;
}
yield {
progress: 0,
desc: errMsg,
status: "failed",
};
console.warn(
`Error updating the ${codebaseIndex.artifactId} index: ${e}`,
);
return;
}
}
completedDirs++;
progress = completedDirs / workspaceDirs.length;
yield {
progress,
desc: "Indexing Complete",
status: "done",
};
}
}
}