🚧 constructing query for fts

This commit is contained in:
Nate Sesti 2024-01-28 22:39:49 -08:00
parent aae4c0c5e0
commit b71b2b05aa
10 changed files with 58 additions and 15 deletions

View File

@ -4,7 +4,6 @@ import {
ContextProviderDescription,
ContextProviderExtras,
} from "../..";
import { retrieveContextItemsFromEmbeddings } from "../retrieval";
class CodebaseContextProvider extends BaseContextProvider {
static description: ContextProviderDescription = {
@ -18,6 +17,7 @@ class CodebaseContextProvider extends BaseContextProvider {
query: string,
extras: ContextProviderExtras
): Promise<ContextItem[]> {
const { retrieveContextItemsFromEmbeddings } = await import("../retrieval");
return retrieveContextItemsFromEmbeddings(extras, this.options, undefined);
}
async load(): Promise<void> {}

View File

@ -7,7 +7,6 @@ import {
LoadSubmenuItemsArgs,
} from "../..";
import { getBasename, getLastNPathParts } from "../../util";
import { retrieveContextItemsFromEmbeddings } from "../retrieval";
class FolderContextProvider extends BaseContextProvider {
static description: ContextProviderDescription = {
@ -21,6 +20,7 @@ class FolderContextProvider extends BaseContextProvider {
query: string,
extras: ContextProviderExtras
): Promise<ContextItem[]> {
const { retrieveContextItemsFromEmbeddings } = await import("../retrieval");
return retrieveContextItemsFromEmbeddings(extras, this.options, query);
}
async loadSubmenuItems(

View File

@ -5,6 +5,9 @@ import {
ILLM,
ModelProvider,
} from "..";
import { FullTextSearchCodebaseIndex } from "../indexing/FullTextSearch";
import { ChunkCodebaseIndex } from "../indexing/chunk/ChunkCodebaseIndex";
import { IndexTag } from "../indexing/types";
import { getBasename } from "../util";
const RERANK_PROMPT = (
@ -146,6 +149,26 @@ export async function retrieveContextItemsFromEmbeddings(
filterDirectory
);
const ftsIndex = new FullTextSearchCodebaseIndex();
const workspaceDirs = await extras.ide.getWorkspaceDirs();
const branches = await Promise.all(
workspaceDirs.map((dir) => extras.ide.getBranch(dir))
);
const tags: IndexTag[] = workspaceDirs.map((directory, i) => ({
directory,
branch: branches[i],
artifactId: ChunkCodebaseIndex.artifactId,
}));
let ftsResults = await ftsIndex.retrieve(
tags,
extras.fullInput.trim().split(" ").join(" OR "),
nRetrieve,
filterDirectory
);
console.log("SIM RESULTS: ", results);
console.log("FTS RESULTS: ", ftsResults);
// Re-ranking
if (useReranking) {
results = await rerank(results, extras.llm, extras.fullInput, nFinal);

View File

@ -94,6 +94,10 @@ class FileSystemIde implements IDE {
return Promise.resolve();
}
getBranch(dir: string): Promise<string> {
return Promise.resolve("");
}
async verticalDiffUpdate(
filepath: string,
startLine: number,

View File

@ -112,6 +112,10 @@ export class ExtensionIde implements IDE {
return r("sendChunkForFile", { chunk, embedding, tags });
}
async getBranch(dir: string): Promise<string> {
return r("getBranch", { dir });
}
retrieveChunks(
text: string,
n: number,

1
core/index.d.ts vendored
View File

@ -316,6 +316,7 @@ export interface IDE {
getSearchResults(query: string): Promise<string>;
subprocess(command: string): Promise<[string, string]>;
getProblems(filepath?: string | undefined): Promise<Problem[]>;
getBranch(dir: string): Promise<string>;
// Embeddings
/**

View File

@ -14,7 +14,8 @@ export class FullTextSearchCodebaseIndex implements CodebaseIndex {
private async _createTables(db: DatabaseConnection) {
await db.exec(`CREATE VIRTUAL TABLE IF NOT EXISTS fts USING fts5(
path,
content
content,
tokenize = 'trigram'
)`);
await db.exec(`CREATE TABLE IF NOT EXISTS fts_metadata (
@ -50,7 +51,7 @@ export class FullTextSearchCodebaseIndex implements CodebaseIndex {
[item.path, chunk.content]
);
await db.run(
`INSERT INTO fts_metadata (id, path, cacheKey, chunkId) VALUES (?, ?, ?)`,
`INSERT INTO fts_metadata (id, path, cacheKey, chunkId) VALUES (?, ?, ?, ?)`,
[lastID, item.path, item.cacheKey, chunk.id]
);
}
@ -94,16 +95,16 @@ export class FullTextSearchCodebaseIndex implements CodebaseIndex {
const tagStrings = tags.map(tagToString);
const results = await db.all(
`SELECT fts_metadata.chunkId
FROM fts
JOIN fts_metadata ON fts.rowid = fts_metadata.id
JOIN chunk_tags ON fts_metadata.chunkId = chunk_tags.chunkId
WHERE fts MATCH ? AND filtered_chunk_tags.tag IN (${tagStrings
.map(() => "?")
.join(",")})
ORDER BY rank
LIMIT ?`,
[text, ...tagStrings, n]
`SELECT fts_metadata.chunkId, fts_metadata.path, fts.content, rank
FROM fts
JOIN fts_metadata ON fts.rowid = fts_metadata.id
JOIN chunk_tags ON fts_metadata.chunkId = chunk_tags.chunkId
WHERE fts MATCH '${text}' AND chunk_tags.tag IN (${tagStrings
.map(() => "?")
.join(",")})
ORDER BY rank
LIMIT ?`,
[...tagStrings, n]
);
const chunks = await db.all(

View File

@ -12,7 +12,8 @@ import {
import { chunkDocument } from "./chunk";
export class ChunkCodebaseIndex implements CodebaseIndex {
artifactId: string = "chunks";
static artifactId: string = "chunks";
artifactId: string = ChunkCodebaseIndex.artifactId;
readFile: (filepath: string) => Promise<string>;
constructor(readFile: (filepath: string) => Promise<string>) {

View File

@ -428,6 +428,11 @@ export function getSidebarContent(
respond(await ide.getProblems(data.message.filepath));
break;
}
case "getBranch": {
const { dir } = data.message;
respond(await ide.getBranch(dir));
break;
}
case "getOpenFiles": {
respond(await ide.getOpenFiles());
break;

View File

@ -416,6 +416,10 @@ class VsCodeIde implements IDE {
});
}
async getBranch(dir: string): Promise<string> {
return ideProtocolClient.getBranch(vscode.Uri.file(dir));
}
async getFilesToEmbed(
providerId: string
): Promise<[string, string, string][]> {