🚧 ChunkCodebaseIndex
This commit is contained in:
parent
11cf98d255
commit
1d289383c6
|
@ -1,2 +1,3 @@
|
|||
**/*.run.xml
|
||||
archive/**/*
|
||||
archive/**/*
|
||||
extensions/vscode/models/**/*
|
|
@ -6,7 +6,7 @@ import { MAX_CHUNK_SIZE } from "../llm/constants";
|
|||
import { getBasename } from "../util";
|
||||
import { getLanceDbPath } from "../util/paths";
|
||||
import { chunkDocument } from "./chunk/chunk";
|
||||
import { DatabaseConnection, SqliteDb } from "./refreshIndex";
|
||||
import { DatabaseConnection, SqliteDb, tagToString } from "./refreshIndex";
|
||||
import {
|
||||
CodebaseIndex,
|
||||
IndexResultType,
|
||||
|
@ -15,10 +15,6 @@ import {
|
|||
RefreshIndexResults,
|
||||
} from "./types";
|
||||
|
||||
export function tagToString(tag: IndexTag): string {
|
||||
return `${tag.directory}::${tag.branch}::${tag.artifactId}`;
|
||||
}
|
||||
|
||||
// LanceDB converts to lowercase, so names must all be lowercase
|
||||
interface LanceDbRow {
|
||||
uuid: string;
|
||||
|
|
|
@ -0,0 +1,127 @@
|
|||
import { IndexingProgressUpdate } from "../..";
|
||||
import { MAX_CHUNK_SIZE } from "../../llm/constants";
|
||||
import { getBasename } from "../../util";
|
||||
import { DatabaseConnection, SqliteDb, tagToString } from "../refreshIndex";
|
||||
import {
|
||||
CodebaseIndex,
|
||||
IndexResultType,
|
||||
IndexTag,
|
||||
MarkCompleteCallback,
|
||||
RefreshIndexResults,
|
||||
} from "../types";
|
||||
import { chunkDocument } from "./chunk";
|
||||
|
||||
export class ChunkCodebaseIndex implements CodebaseIndex {
|
||||
artifactId: string = "chunks";
|
||||
|
||||
readFile: (filepath: string) => Promise<string>;
|
||||
constructor(readFile: (filepath: string) => Promise<string>) {
|
||||
this.readFile = readFile;
|
||||
}
|
||||
|
||||
private async _createTables(db: DatabaseConnection) {
|
||||
await db.exec(`CREATE TABLE IF NOT EXISTS chunks (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
cacheKey TEXT NOT NULL,
|
||||
path TEXT NOT NULL,
|
||||
idx INTEGER NOT NULL,
|
||||
startLine INTEGER NOT NULL,
|
||||
endLine INTEGER NOT NULL,
|
||||
content TEXT NOT NULL
|
||||
)`);
|
||||
|
||||
await db.exec(`CREATE TABLE IF NOT EXISTS chunk_tags (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
tag TEXT NOT NULL,
|
||||
chunkId INTEGER NOT NULL,
|
||||
FOREIGN KEY (chunkId) REFERENCES chunks (id)
|
||||
)`);
|
||||
}
|
||||
|
||||
async *update(
|
||||
tag: IndexTag,
|
||||
results: RefreshIndexResults,
|
||||
markComplete: MarkCompleteCallback
|
||||
): AsyncGenerator<IndexingProgressUpdate, any, unknown> {
|
||||
const db = await SqliteDb.get();
|
||||
await this._createTables(db);
|
||||
const tagString = tagToString(tag);
|
||||
|
||||
// Compute chunks for new files
|
||||
const contents = await Promise.all(
|
||||
results.compute.map(({ path }) => this.readFile(path))
|
||||
);
|
||||
for (let i = 0; i < results.compute.length; i++) {
|
||||
const item = results.compute[i];
|
||||
|
||||
// Insert chunks
|
||||
for await (let chunk of chunkDocument(
|
||||
item.path,
|
||||
contents[i],
|
||||
MAX_CHUNK_SIZE,
|
||||
item.cacheKey
|
||||
)) {
|
||||
const { lastID } = await db.run(
|
||||
`INSERT INTO chunks (cacheKey, path, idx, startLine, endLine, content) VALUES (?, ?, ?, ?, ?, ?)`,
|
||||
[
|
||||
chunk.digest,
|
||||
chunk.filepath,
|
||||
chunk.index,
|
||||
chunk.startLine,
|
||||
chunk.endLine,
|
||||
chunk.content,
|
||||
]
|
||||
);
|
||||
|
||||
await db.run(`INSERT INTO chunk_tags (chunkId, tag) VALUES (?, ?)`, [
|
||||
lastID,
|
||||
tagString,
|
||||
]);
|
||||
}
|
||||
|
||||
yield {
|
||||
progress: i / results.compute.length,
|
||||
desc: `Chunking ${getBasename(item.path)}`,
|
||||
};
|
||||
markComplete([item], IndexResultType.Compute);
|
||||
i++;
|
||||
}
|
||||
|
||||
// Add tag
|
||||
for (const item of results.addTag) {
|
||||
const chunksWithPath = await db.all(
|
||||
`SELECT * FROM chunks WHERE cacheKey = ?`,
|
||||
[item.cacheKey]
|
||||
);
|
||||
|
||||
for (const chunk of chunksWithPath) {
|
||||
await db.run(`INSERT INTO chunk_tags (chunkId, tag) VALUES (?, ?)`, [
|
||||
chunk.id,
|
||||
tagString,
|
||||
]);
|
||||
}
|
||||
|
||||
markComplete([item], IndexResultType.AddTag);
|
||||
}
|
||||
|
||||
// Remove tag
|
||||
for (const item of results.removeTag) {
|
||||
await db.run(`DELETE FROM chunk_tags WHERE tag = ?`, [tagString]);
|
||||
markComplete([item], IndexResultType.RemoveTag);
|
||||
}
|
||||
|
||||
// Delete
|
||||
for (const item of results.del) {
|
||||
const deleted = await db.run(`DELETE FROM chunks WHERE cacheKey = ?`, [
|
||||
item.cacheKey,
|
||||
]);
|
||||
|
||||
// Delete from chunk_tags
|
||||
await db.run(`DELETE FROM chunk_tags WHERE chunkId = ?`, [
|
||||
deleted.lastID,
|
||||
]);
|
||||
|
||||
markComplete([item], IndexResultType.Delete);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -15,6 +15,10 @@ import {
|
|||
|
||||
export type DatabaseConnection = Database<sqlite3.Database>;
|
||||
|
||||
export function tagToString(tag: IndexTag): string {
|
||||
return `${tag.directory}::${tag.branch}::${tag.artifactId}`;
|
||||
}
|
||||
|
||||
export class SqliteDb {
|
||||
static db: DatabaseConnection | null = null;
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import { LanceDbIndex } from "core/indexing/LanceDbIndex";
|
||||
import { ChunkCodebaseIndex } from "core/indexing/chunk/ChunkCodebaseIndex";
|
||||
import { getComputeDeleteAddRemove } from "core/indexing/refreshIndex";
|
||||
import { CodebaseIndex, IndexTag, LastModifiedMap } from "core/indexing/types";
|
||||
import * as vscode from "vscode";
|
||||
|
@ -23,11 +24,13 @@ const vscodeGetStats = async (
|
|||
};
|
||||
|
||||
async function getIndexesToBuild(): Promise<CodebaseIndex[]> {
|
||||
const indexes = [];
|
||||
|
||||
const ide = new VsCodeIde();
|
||||
const config = await configHandler.loadConfig(ide);
|
||||
indexes.push(new LanceDbIndex(config.embeddingsProvider, ide.readFile));
|
||||
|
||||
const indexes = [
|
||||
new LanceDbIndex(config.embeddingsProvider, ide.readFile),
|
||||
new ChunkCodebaseIndex(ide.readFile),
|
||||
];
|
||||
|
||||
return indexes;
|
||||
}
|
||||
|
@ -66,7 +69,7 @@ export async function vsCodeIndexCodebase(workspaceDirs: string[]) {
|
|||
(filepath) => ideProtocolClient.readFile(filepath)
|
||||
);
|
||||
|
||||
// console.log("RESULTS: ", results);
|
||||
console.log("RESULTS: ", codebaseIndex.artifactId, results);
|
||||
|
||||
for await (let { progress, desc } of codebaseIndex.update(
|
||||
tag,
|
||||
|
|
Loading…
Reference in New Issue