Drafted max depth changes

This commit is contained in:
Justin Milner 2024-05-12 18:16:06 -04:00
parent e3dc72be22
commit 895d91c0e1
7 changed files with 61 additions and 9 deletions

View File

@ -128,10 +128,12 @@ export class Core {
// Context providers
on("context/addDocs", async (msg) => {
const { startUrl, title, maxDepth } = msg.data;
for await (const _ of indexDocs(
msg.data.title,
new URL(msg.data.url),
new URL(msg.data.rootUrl),
new TransformersJsEmbeddingsProvider(),
msg.data.maxDepth
)) {
}
});

View File

@ -129,7 +129,7 @@ export type PageData = {
html: string;
};
export async function* crawlPage(url: URL): AsyncGenerator<PageData> {
export async function* crawlPage(url: URL, maxDepth?: number): AsyncGenerator<PageData> {
const { baseUrl, basePath } = splitUrl(url);
let paths: string[] = [basePath];
@ -172,3 +172,39 @@ export async function* crawlPage(url: URL): AsyncGenerator<PageData> {
);
}
}
export async function* crawlPage2(url: URL, maxDepth: number = 3): AsyncGenerator<PageData> {
const { baseUrl, basePath } = splitUrl(url);
let paths: { path: string; depth: number }[] = [{ path: basePath, depth: 0 }];
let index = 0;
while (index < paths.length) {
const batch = paths.slice(index, index + 50);
const promises = batch.map(({ path, depth }) => getLinksFromUrl(baseUrl, path).then(links => ({ links, path, depth }))); // Adjust for depth tracking
const results = await Promise.all(promises);
for (const { links: { html, links: linksArray }, path, depth } of results) {
if (html !== "" && depth <= maxDepth) { // Check depth
yield {
url: url.toString(),
path,
html,
};
}
// Ensure we only add links if within depth limit
if (depth < maxDepth) {
for (let link of linksArray) {
if (!paths.some(p => p.path === link)) {
paths.push({ path: link, depth: depth + 1 }); // Increment depth for new paths
}
}
}
}
index += batch.length; // Proceed to next batch
}
}

View File

@ -8,6 +8,7 @@ export async function* indexDocs(
title: string,
baseUrl: URL,
embeddingsProvider: EmbeddingsProvider,
maxDepth?: number
): AsyncGenerator<IndexingProgressUpdate> {
if (await hasDoc(baseUrl.toString())) {
yield {
@ -26,7 +27,7 @@ export async function* indexDocs(
const articles: Article[] = [];
for await (const page of crawlPage(baseUrl)) {
for await (const page of crawlPage(baseUrl, maxDepth)) {
const article = pageToArticle(page);
if (!article) continue;

View File

@ -1,7 +1,8 @@
export interface SiteIndexingConfig {
startUrl: string;
title: string;
rootUrl: string;
title: string;
maxDepth?: number;
}
const configs: SiteIndexingConfig[] = [

View File

@ -12,6 +12,7 @@ import {
SessionInfo,
} from ".";
import { AutocompleteInput } from "./autocomplete/completionProvider";
import { SiteIndexingConfig } from "./indexing/docs/preIndexedDocs";
import { IdeProtocol } from "./web/webviewProtocol";
export type ProtocolGeneratorType<T> = AsyncGenerator<{
@ -62,7 +63,7 @@ export type Protocol = {
{ title: string },
Promise<ContextSubmenuItem[]>,
];
"context/addDocs": [{ title: string; url: string }, void];
"context/addDocs": [SiteIndexingConfig, void];
"autocomplete/complete": [AutocompleteInput, Promise<string[]>];
"autocomplete/cancel": [undefined, void];
"autocomplete/accept": [{ completionId: string }, void];

View File

@ -555,7 +555,7 @@ export class VsCodeWebviewProtocol {
}
});
this.on("context/addDocs", (msg) => {
const { url, title } = msg.data;
const { startUrl, title, maxDepth } = msg.data;
const embeddingsProvider = new TransformersJsEmbeddingsProvider();
vscode.window.withProgress(
{
@ -566,8 +566,9 @@ export class VsCodeWebviewProtocol {
async (progress) => {
for await (const update of indexDocs(
title,
new URL(url),
new URL(startUrl),
embeddingsProvider,
maxDepth
)) {
progress.report({
increment: update.progress,

View File

@ -15,9 +15,13 @@ const GridDiv = styled.div`
`;
function AddDocsDialog() {
const defaultMaxDepth = 4
const [docsUrl, setDocsUrl] = React.useState("");
const [docsTitle, setDocsTitle] = React.useState("");
const [urlValid, setUrlValid] = React.useState(false);
const [maxDepth, setMaxDepth] = React.useState(defaultMaxDepth);
const [maxDepthValid, setMaxDepthValid] = React.useState(false) // ToDo
const dispatch = useDispatch();
const { addItem } = useContext(SubmenuContextProvidersContext);
@ -60,14 +64,20 @@ function AddDocsDialog() {
value={docsTitle}
onChange={(e) => setDocsTitle(e.target.value)}
/>
<Input
type="text"
placeholder={`Max Depth (Default=${defaultMaxDepth})`}
value={maxDepth}
onChange={(e) => setMaxDepth(Number(e.target.value))}
/>
<Button
disabled={!docsUrl || !urlValid}
className="ml-auto"
onClick={() => {
postToIde("context/addDocs", { url: docsUrl, title: docsTitle });
postToIde("context/addDocs", { startUrl: docsUrl, rootUrl: docsUrl, title: docsTitle, maxDepth:maxDepth });
setDocsTitle("");
setDocsUrl("");
setMaxDepth(defaultMaxDepth)
dispatch(setShowDialog(false));
addItem("docs", {
id: docsUrl,