Drafted max depth changes
This commit is contained in:
parent
e3dc72be22
commit
895d91c0e1
|
@ -128,10 +128,12 @@ export class Core {
|
|||
|
||||
// Context providers
|
||||
on("context/addDocs", async (msg) => {
|
||||
const { startUrl, title, maxDepth } = msg.data;
|
||||
for await (const _ of indexDocs(
|
||||
msg.data.title,
|
||||
new URL(msg.data.url),
|
||||
new URL(msg.data.rootUrl),
|
||||
new TransformersJsEmbeddingsProvider(),
|
||||
msg.data.maxDepth
|
||||
)) {
|
||||
}
|
||||
});
|
||||
|
|
|
@ -129,7 +129,7 @@ export type PageData = {
|
|||
html: string;
|
||||
};
|
||||
|
||||
export async function* crawlPage(url: URL): AsyncGenerator<PageData> {
|
||||
export async function* crawlPage(url: URL, maxDepth?: number): AsyncGenerator<PageData> {
|
||||
const { baseUrl, basePath } = splitUrl(url);
|
||||
let paths: string[] = [basePath];
|
||||
|
||||
|
@ -172,3 +172,39 @@ export async function* crawlPage(url: URL): AsyncGenerator<PageData> {
|
|||
);
|
||||
}
|
||||
}
|
||||
|
||||
export async function* crawlPage2(url: URL, maxDepth: number = 3): AsyncGenerator<PageData> {
|
||||
const { baseUrl, basePath } = splitUrl(url);
|
||||
let paths: { path: string; depth: number }[] = [{ path: basePath, depth: 0 }];
|
||||
|
||||
let index = 0;
|
||||
|
||||
while (index < paths.length) {
|
||||
const batch = paths.slice(index, index + 50);
|
||||
|
||||
const promises = batch.map(({ path, depth }) => getLinksFromUrl(baseUrl, path).then(links => ({ links, path, depth }))); // Adjust for depth tracking
|
||||
|
||||
const results = await Promise.all(promises);
|
||||
|
||||
for (const { links: { html, links: linksArray }, path, depth } of results) {
|
||||
if (html !== "" && depth <= maxDepth) { // Check depth
|
||||
yield {
|
||||
url: url.toString(),
|
||||
path,
|
||||
html,
|
||||
};
|
||||
}
|
||||
|
||||
// Ensure we only add links if within depth limit
|
||||
if (depth < maxDepth) {
|
||||
for (let link of linksArray) {
|
||||
if (!paths.some(p => p.path === link)) {
|
||||
paths.push({ path: link, depth: depth + 1 }); // Increment depth for new paths
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
index += batch.length; // Proceed to next batch
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@ export async function* indexDocs(
|
|||
title: string,
|
||||
baseUrl: URL,
|
||||
embeddingsProvider: EmbeddingsProvider,
|
||||
maxDepth?: number
|
||||
): AsyncGenerator<IndexingProgressUpdate> {
|
||||
if (await hasDoc(baseUrl.toString())) {
|
||||
yield {
|
||||
|
@ -26,7 +27,7 @@ export async function* indexDocs(
|
|||
|
||||
const articles: Article[] = [];
|
||||
|
||||
for await (const page of crawlPage(baseUrl)) {
|
||||
for await (const page of crawlPage(baseUrl, maxDepth)) {
|
||||
const article = pageToArticle(page);
|
||||
if (!article) continue;
|
||||
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
export interface SiteIndexingConfig {
|
||||
startUrl: string;
|
||||
title: string;
|
||||
rootUrl: string;
|
||||
title: string;
|
||||
maxDepth?: number;
|
||||
}
|
||||
|
||||
const configs: SiteIndexingConfig[] = [
|
||||
|
|
|
@ -12,6 +12,7 @@ import {
|
|||
SessionInfo,
|
||||
} from ".";
|
||||
import { AutocompleteInput } from "./autocomplete/completionProvider";
|
||||
import { SiteIndexingConfig } from "./indexing/docs/preIndexedDocs";
|
||||
import { IdeProtocol } from "./web/webviewProtocol";
|
||||
|
||||
export type ProtocolGeneratorType<T> = AsyncGenerator<{
|
||||
|
@ -62,7 +63,7 @@ export type Protocol = {
|
|||
{ title: string },
|
||||
Promise<ContextSubmenuItem[]>,
|
||||
];
|
||||
"context/addDocs": [{ title: string; url: string }, void];
|
||||
"context/addDocs": [SiteIndexingConfig, void];
|
||||
"autocomplete/complete": [AutocompleteInput, Promise<string[]>];
|
||||
"autocomplete/cancel": [undefined, void];
|
||||
"autocomplete/accept": [{ completionId: string }, void];
|
||||
|
|
|
@ -555,7 +555,7 @@ export class VsCodeWebviewProtocol {
|
|||
}
|
||||
});
|
||||
this.on("context/addDocs", (msg) => {
|
||||
const { url, title } = msg.data;
|
||||
const { startUrl, title, maxDepth } = msg.data;
|
||||
const embeddingsProvider = new TransformersJsEmbeddingsProvider();
|
||||
vscode.window.withProgress(
|
||||
{
|
||||
|
@ -566,8 +566,9 @@ export class VsCodeWebviewProtocol {
|
|||
async (progress) => {
|
||||
for await (const update of indexDocs(
|
||||
title,
|
||||
new URL(url),
|
||||
new URL(startUrl),
|
||||
embeddingsProvider,
|
||||
maxDepth
|
||||
)) {
|
||||
progress.report({
|
||||
increment: update.progress,
|
||||
|
|
|
@ -15,9 +15,13 @@ const GridDiv = styled.div`
|
|||
`;
|
||||
|
||||
function AddDocsDialog() {
|
||||
const defaultMaxDepth = 4
|
||||
const [docsUrl, setDocsUrl] = React.useState("");
|
||||
const [docsTitle, setDocsTitle] = React.useState("");
|
||||
const [urlValid, setUrlValid] = React.useState(false);
|
||||
const [maxDepth, setMaxDepth] = React.useState(defaultMaxDepth);
|
||||
const [maxDepthValid, setMaxDepthValid] = React.useState(false) // ToDo
|
||||
|
||||
const dispatch = useDispatch();
|
||||
|
||||
const { addItem } = useContext(SubmenuContextProvidersContext);
|
||||
|
@ -60,14 +64,20 @@ function AddDocsDialog() {
|
|||
value={docsTitle}
|
||||
onChange={(e) => setDocsTitle(e.target.value)}
|
||||
/>
|
||||
|
||||
<Input
|
||||
type="text"
|
||||
placeholder={`Max Depth (Default=${defaultMaxDepth})`}
|
||||
value={maxDepth}
|
||||
onChange={(e) => setMaxDepth(Number(e.target.value))}
|
||||
/>
|
||||
<Button
|
||||
disabled={!docsUrl || !urlValid}
|
||||
className="ml-auto"
|
||||
onClick={() => {
|
||||
postToIde("context/addDocs", { url: docsUrl, title: docsTitle });
|
||||
postToIde("context/addDocs", { startUrl: docsUrl, rootUrl: docsUrl, title: docsTitle, maxDepth:maxDepth });
|
||||
setDocsTitle("");
|
||||
setDocsUrl("");
|
||||
setMaxDepth(defaultMaxDepth)
|
||||
dispatch(setShowDialog(false));
|
||||
addItem("docs", {
|
||||
id: docsUrl,
|
||||
|
|
Loading…
Reference in New Issue