continue/core/commands/slash/edit.ts

627 lines
20 KiB
TypeScript

import {
filterCodeBlockLines,
filterEnglishLinesAtEnd,
filterEnglishLinesAtStart,
fixCodeLlamaFirstLineIndentation,
stopAtLines,
streamWithNewLines,
} from "../../autocomplete/lineStream.js";
import { streamLines } from "../../diff/util.js";
import { ContextItemWithId, ILLM, SlashCommand } from "../../index.js";
import { stripImages } from "../../llm/images.js";
import {
dedentAndGetCommonWhitespace,
getMarkdownLanguageTagForFile,
} from "../../util/index.js";
import {
contextItemToRangeInFileWithContents,
type RangeInFileWithContents,
} from "../util.js";
const PROMPT = `Take the file prefix and suffix into account, but only rewrite the code_to_edit as specified in the user_request. The code you write in modified_code_to_edit will replace the code between the code_to_edit tags. Do NOT preface your answer or write anything other than code. The </modified_code_to_edit> tag should be written to indicate the end of the modified code section. Do not ever use nested tags.
Example:
<file_prefix>
class Database:
def __init__(self):
self._data = {{}}
def get(self, key):
return self._data[key]
</file_prefix>
<code_to_edit>
def set(self, key, value):
self._data[key] = value
</code_to_edit>
<file_suffix>
def clear_all():
self._data = {{}}
</file_suffix>
<user_request>
Raise an error if the key already exists.
</user_request>
<modified_code_to_edit>
def set(self, key, value):
if key in self._data:
raise KeyError(f"Key {{key}} already exists")
self._data[key] = value
</modified_code_to_edit>
Main task:
`;
export async function getPromptParts(
rif: RangeInFileWithContents,
fullFileContents: string,
model: ILLM,
input: string,
tokenLimit: number | undefined,
) {
const maxTokens = Math.floor(model.contextLength / 2);
const TOKENS_TO_BE_CONSIDERED_LARGE_RANGE = tokenLimit ?? 1200;
// if (model.countTokens(rif.contents) > TOKENS_TO_BE_CONSIDERED_LARGE_RANGE) {
// throw new Error(
// "\n\n**It looks like you've selected a large range to edit, which may take a while to complete. If you'd like to cancel, click the 'X' button above. If you highlight a more specific range, Continue will only edit within it.**"
// );
// }
const BUFFER_FOR_FUNCTIONS = 400;
let totalTokens =
model.countTokens(fullFileContents + PROMPT + input) +
BUFFER_FOR_FUNCTIONS +
maxTokens;
const fullFileContentsList = fullFileContents.split("\n");
const maxStartLine = rif.range.start.line;
const minEndLine = rif.range.end.line;
let curStartLine = 0;
let curEndLine = fullFileContentsList.length - 1;
if (totalTokens > model.contextLength) {
while (curEndLine > minEndLine) {
totalTokens -= model.countTokens(fullFileContentsList[curEndLine]);
curEndLine--;
if (totalTokens < model.contextLength) {
break;
}
}
}
if (totalTokens > model.contextLength) {
while (curStartLine < maxStartLine) {
curStartLine++;
totalTokens -= model.countTokens(fullFileContentsList[curStartLine]);
if (totalTokens < model.contextLength) {
break;
}
}
}
let filePrefix = fullFileContentsList
.slice(curStartLine, maxStartLine)
.join("\n");
let fileSuffix = fullFileContentsList
.slice(minEndLine, curEndLine - 1)
.join("\n");
if (rif.contents.length > 0) {
let lines = rif.contents.split(/\r?\n/);
let firstLine = lines[0] || null;
while (firstLine && firstLine.trim() === "") {
filePrefix += firstLine;
rif.contents = rif.contents.substring(firstLine.length);
lines = rif.contents.split(/\r?\n/);
firstLine = lines[0] || null;
}
let lastLine = lines[lines.length - 1] || null;
while (lastLine && lastLine.trim() === "") {
fileSuffix = lastLine + fileSuffix;
rif.contents = rif.contents.substring(
0,
rif.contents.length - lastLine.length,
);
lines = rif.contents.split(/\r?\n/);
lastLine = lines[lines.length - 1] || null;
}
while (rif.contents.startsWith("\n")) {
filePrefix += "\n";
rif.contents = rif.contents.substring(1);
}
while (rif.contents.endsWith("\n")) {
fileSuffix = `\n${fileSuffix}`;
rif.contents = rif.contents.substring(0, rif.contents.length - 1);
}
}
return { filePrefix, fileSuffix, contents: rif.contents, maxTokens };
}
function compilePrompt(
filePrefix: string,
contents: string,
fileSuffix: string,
input: string,
): string {
if (contents.trim() === "") {
// Separate prompt for insertion at the cursor, the other tends to cause it to repeat whole file
return `\
<file_prefix>
${filePrefix}
</file_prefix>
<insertion_code_here>
<file_suffix>
${fileSuffix}
</file_suffix>
<user_request>
${input}
</user_request>
Please output the code to be inserted at the cursor in order to fulfill the user_request. Do NOT preface your answer or write anything other than code. You should not write any tags, just the code. Make sure to correctly indent the code:`;
}
let prompt = PROMPT;
if (filePrefix.trim() !== "") {
prompt += `
<file_prefix>
${filePrefix}
</file_prefix>`;
}
prompt += `
<code_to_edit>
${contents}
</code_to_edit>`;
if (fileSuffix.trim() !== "") {
prompt += `
<file_suffix>
${fileSuffix}
</file_suffix>`;
}
prompt += `
<user_request>
${input}
</user_request>
<modified_code_to_edit>
`;
return prompt;
}
function isEndLine(line: string) {
return (
line.includes("</modified_code_to_edit>") ||
line.includes("</code_to_edit>") ||
line.includes("[/CODE]")
);
}
function lineToBeIgnored(line: string, isFirstLine = false): boolean {
return (
line.includes("```") ||
line.includes("<modified_code_to_edit>") ||
line.includes("<file_prefix>") ||
line.includes("</file_prefix>") ||
line.includes("<file_suffix>") ||
line.includes("</file_suffix>") ||
line.includes("<user_request>") ||
line.includes("</user_request>") ||
line.includes("<code_to_edit>")
);
}
const EditSlashCommand: SlashCommand = {
name: "edit",
description: "Edit selected code",
run: async function* ({ ide, llm, input, history, contextItems, params }) {
let contextItemToEdit = contextItems.find(
(item: ContextItemWithId) =>
item.editing && item.id.providerTitle === "code",
);
if (!contextItemToEdit) {
contextItemToEdit = contextItems.find(
(item: ContextItemWithId) => item.id.providerTitle === "code",
);
}
if (!contextItemToEdit) {
yield "Please highlight the code you want to edit, then press `cmd/ctrl+shift+L` to add it to chat";
return;
}
// Strip unecessary parts of the input (the fact that you have to do this is suboptimal, should be refactored away)
let content = history[history.length - 1].content;
if (typeof content !== "string") {
content.forEach((part) => {
if (part.text?.startsWith("/edit")) {
part.text = part.text.replace("/edit", "").trimStart();
}
});
} else if (input?.startsWith("/edit")) {
content = input.replace("/edit", "").trimStart();
} else if (input?.startsWith("/comment")) {
content = input.replace("/comment", "").trimStart();
}
let userInput = stripImages(content).replace(
`\`\`\`${contextItemToEdit.name}\n${contextItemToEdit.content}\n\`\`\`\n`,
"",
);
// if the above replace fails to find a match, the code will still be present
// in the userInput. Replace it with input if available.
if (userInput.includes("```") && (input !== "" || !input)) {
userInput = input;
}
const rif: RangeInFileWithContents =
contextItemToRangeInFileWithContents(contextItemToEdit);
await ide.saveFile(rif.filepath);
const fullFileContents = await ide.readFile(rif.filepath);
let { filePrefix, contents, fileSuffix, maxTokens } = await getPromptParts(
rif,
fullFileContents,
llm,
userInput,
params?.tokenLimit,
);
const [dedentedContents, commonWhitespace] =
dedentAndGetCommonWhitespace(contents);
contents = dedentedContents;
const prompt = compilePrompt(filePrefix, contents, fileSuffix, userInput);
const fullFileContentsLines = fullFileContents.split("\n");
const fullPrefixLines = fullFileContentsLines.slice(
0,
Math.max(0, rif.range.start.line - 1),
);
const fullSuffixLines = fullFileContentsLines.slice(rif.range.end.line);
let linesToDisplay: string[] = [];
async function sendDiffUpdate(lines: string[], final = false) {
const completion = lines.join("\n");
// Don't do this at the very end, just show the inserted code
if (final) {
linesToDisplay = [];
}
// Only recalculate at every new-line, because this is sort of expensive
else if (completion.endsWith("\n")) {
const contentsLines = rif.contents.split("\n");
let rewrittenLines = 0;
for (const line of lines) {
for (let i = rewrittenLines; i < contentsLines.length; i++) {
if (
// difflib.SequenceMatcher(
// null, line, contentsLines[i]
// ).ratio()
// > 0.7
line.trim() === contentsLines[i].trim() && // Temp replacement for difflib (TODO)
contentsLines[i].trim() !== ""
) {
rewrittenLines = i + 1;
break;
}
}
}
linesToDisplay = contentsLines.slice(rewrittenLines);
}
const newFileContents = `${fullPrefixLines.join("\n")}\n${completion}\n${
linesToDisplay.length > 0 ? `${linesToDisplay.join("\n")}\n` : ""
}${fullSuffixLines.join("\n")}`;
const stepIndex = history.length - 1;
await ide.showDiff(rif.filepath, newFileContents, stepIndex);
}
// Important state variables
// -------------------------
const originalLines = rif.contents === "" ? [] : rif.contents.split("\n");
// In the actual file, taking into account block offset
let currentLineInFile = rif.range.start.line;
let currentBlockLines: string[] = [];
let originalLinesBelowPreviousBlocks = originalLines;
// The start of the current block in file, taking into account block offset
let currentBlockStart = -1;
let offsetFromBlocks = 0;
// Don't end the block until you've matched N simultaneous lines
// This helps avoid many tiny blocks
const LINES_TO_MATCH_BEFORE_ENDING_BLOCK = 2;
// If a line has been matched at the end of the block, this is its index within originalLinesBelowPreviousBlocks
// Except we are keeping track of multiple potentialities, so it's a list
// We always check the lines following each of these leads, but if multiple make it out at the end, we use the first one
// This is a tuple of (index_of_last_matched_line, number_of_lines_matched)
let indicesOfLastMatchedLines: [number, number][] = [];
async function handleGeneratedLine(line: string) {
if (currentBlockLines.length === 0) {
// Set this as the start of the next block
currentBlockStart =
rif.range.start.line +
originalLines.length -
originalLinesBelowPreviousBlocks.length +
offsetFromBlocks;
if (
originalLinesBelowPreviousBlocks.length > 0 &&
line === originalLinesBelowPreviousBlocks[0]
) {
// Line is equal to the next line in file, move past this line
originalLinesBelowPreviousBlocks =
originalLinesBelowPreviousBlocks.slice(1);
return;
}
}
// In a block, and have already matched at least one line
// Check if the next line matches, for each of the candidates
const matchesFound: any[] = [];
let firstValidMatch: any = null;
for (const [
index_of_last_matched_line,
num_lines_matched,
] of indicesOfLastMatchedLines) {
if (
index_of_last_matched_line + 1 <
originalLinesBelowPreviousBlocks.length &&
line ===
originalLinesBelowPreviousBlocks[index_of_last_matched_line + 1]
) {
matchesFound.push([
index_of_last_matched_line + 1,
num_lines_matched + 1,
]);
if (
firstValidMatch === null &&
num_lines_matched + 1 >= LINES_TO_MATCH_BEFORE_ENDING_BLOCK
) {
firstValidMatch = [
index_of_last_matched_line + 1,
num_lines_matched + 1,
];
}
}
}
indicesOfLastMatchedLines = matchesFound;
if (firstValidMatch !== null) {
// We've matched the required number of lines, insert suggestion!
// We added some lines to the block that were matched (including maybe some blank lines)
// So here we will strip all matching lines from the end of currentBlockLines
const linesStripped: string[] = [];
let indexOfLastLineInBlock: number = firstValidMatch[0];
while (
currentBlockLines.length > 0 &&
currentBlockLines[currentBlockLines.length - 1] ===
originalLinesBelowPreviousBlocks[indexOfLastLineInBlock - 1]
) {
linesStripped.push(currentBlockLines.pop() as string);
indexOfLastLineInBlock -= 1;
}
// Reset current block / update variables
currentLineInFile += 1;
offsetFromBlocks += currentBlockLines.length;
originalLinesBelowPreviousBlocks =
originalLinesBelowPreviousBlocks.slice(indexOfLastLineInBlock + 1);
currentBlockLines = [];
currentBlockStart = -1;
indicesOfLastMatchedLines = [];
return;
}
// Always look for new matching candidates
const newMatches: any[] = [];
for (let i = 0; i < originalLinesBelowPreviousBlocks.length; i++) {
const ogLine = originalLinesBelowPreviousBlocks[i];
// TODO: It's a bit sus to be disqualifying empty lines.
// What you ideally do is find ALL matches, and then throw them out as you check the following lines
if (ogLine === line) {
// and og_line.trim() !== "":
newMatches.push([i, 1]);
}
}
indicesOfLastMatchedLines = indicesOfLastMatchedLines.concat(newMatches);
// Make sure they are sorted by index
indicesOfLastMatchedLines = indicesOfLastMatchedLines.sort(
(a, b) => a[0] - b[0],
);
currentBlockLines.push(line);
}
let messages = history;
messages[messages.length - 1] = { role: "user", content: prompt };
let linesOfPrefixCopied = 0;
const lines = [];
let unfinishedLine = "";
let completionLinesCovered = 0;
let repeatingFileSuffix = false;
const lineBelowHighlightedRange = fileSuffix.trim().split("\n")[0];
// Use custom templates defined by the model
const template = llm.promptTemplates?.edit;
let generator: AsyncGenerator<string>;
if (template) {
const rendered = llm.renderPromptTemplate(
template,
// typeof template === 'string' ? template : template.prompt,
messages.slice(0, messages.length - 1),
{
codeToEdit: rif.contents,
userInput,
filePrefix: filePrefix,
fileSuffix: fileSuffix,
// Some built-in templates use these instead of the above
prefix: filePrefix,
suffix: fileSuffix,
language: getMarkdownLanguageTagForFile(rif.filepath),
systemMessage: llm.systemMessage ?? "",
// "contextItems": (await sdk.getContextItemChatMessages()).map(x => x.content || "").join("\n\n"),
},
);
if (typeof rendered === "string") {
messages = [
{
role: "user",
content: rendered,
},
];
} else {
messages = rendered;
}
const completion = llm.streamComplete(rendered as string, {
maxTokens: Math.min(maxTokens, Math.floor(llm.contextLength / 2), 4096),
raw: true,
});
let lineStream = streamLines(completion);
lineStream = filterEnglishLinesAtStart(lineStream);
lineStream = filterEnglishLinesAtEnd(filterCodeBlockLines(lineStream));
lineStream = stopAtLines(lineStream, () => {});
generator = streamWithNewLines(
fixCodeLlamaFirstLineIndentation(lineStream),
);
} else {
async function* gen() {
for await (const chunk of llm.streamChat(messages, {
temperature: 0.5, // TODO
maxTokens: Math.min(
maxTokens,
Math.floor(llm.contextLength / 2),
4096,
),
})) {
yield stripImages(chunk.content);
}
}
generator = gen();
}
for await (const chunk of generator) {
// Stop early if it is repeating the fileSuffix or the step was deleted
if (repeatingFileSuffix) {
break;
}
// Allow stopping breakpoints
yield undefined;
// Accumulate lines
const chunkLines = chunk.split("\n");
chunkLines[0] = unfinishedLine + chunkLines[0];
if (chunk.endsWith("\n")) {
unfinishedLine = "";
chunkLines.pop(); // because this will be an empty string
} else {
unfinishedLine = chunkLines.pop() ?? "";
}
// Deal with newly accumulated lines
for (let i = 0; i < chunkLines.length; i++) {
// Trailing whitespace doesn't matter
chunkLines[i] = chunkLines[i].trimEnd();
chunkLines[i] = commonWhitespace + chunkLines[i];
// Lines that should signify the end of generation
if (isEndLine(chunkLines[i])) {
break;
}
// Lines that should be ignored, like the <> tags
if (lineToBeIgnored(chunkLines[i], completionLinesCovered === 0)) {
continue; // noice
}
// Check if we are currently just copying the prefix
if (
(linesOfPrefixCopied > 0 || completionLinesCovered === 0) &&
linesOfPrefixCopied < filePrefix.split("\n").length &&
chunkLines[i] === fullPrefixLines[linesOfPrefixCopied]
) {
// This is a sketchy way of stopping it from repeating the filePrefix. Is a bug if output happens to have a matching line
linesOfPrefixCopied += 1;
continue; // also nice
}
// Because really short lines might be expected to be repeated, this is only a !heuristic!
// Stop when it starts copying the fileSuffix
if (
chunkLines[i].trim() === lineBelowHighlightedRange.trim() &&
chunkLines[i].trim().length > 4 &&
!(
originalLinesBelowPreviousBlocks.length > 0 &&
chunkLines[i].trim() === originalLinesBelowPreviousBlocks[0].trim()
)
) {
repeatingFileSuffix = true;
break;
}
lines.push(chunkLines[i]);
completionLinesCovered += 1;
currentLineInFile += 1;
}
await sendDiffUpdate(
lines.concat([
unfinishedLine?.startsWith("<")
? commonWhitespace
: commonWhitespace + unfinishedLine,
]),
);
}
// Add the unfinished line
if (
unfinishedLine !== "" &&
!lineToBeIgnored(unfinishedLine, completionLinesCovered === 0) &&
!isEndLine(unfinishedLine)
) {
unfinishedLine = commonWhitespace + unfinishedLine;
lines.push(unfinishedLine);
await handleGeneratedLine(unfinishedLine);
completionLinesCovered += 1;
currentLineInFile += 1;
}
await sendDiffUpdate(lines, true);
if (params?.recap) {
const prompt = `This is the code before editing:
\`\`\`
${contents}
\`\`\`
This is the code after editing:
\`\`\`
${lines.join("\n")}
\`\`\`
Please briefly explain the changes made to the code above. Give no more than 2-3 sentences, and use markdown bullet points:`;
for await (const update of llm.streamComplete(prompt)) {
yield update;
}
}
},
};
export default EditSlashCommand;