Treesitter-refactoring (#249608)

* wip

* Refactoring TokenizationTextModelPart

* treeSitterTokensPart1

* fix casing

* fix casing 2

* wip models

* progress

* Take care of some of the boring refactor changes (#249429)

* Implements TreeSitterLibraryService

* progress

* more progress

* Removes befores

* Take care of some of the boring refactor changes

* Settable observable

* Get things running

* Fix tree not updated when event files (#249544)

* Fix tree not updated when event files

* Update on Theme change

* Implements visibleLineRanges

* File renames and initial tree handling

* Fixes enter indentation bug

* Get tree sitter tokenization tests passing again

* Get colorize tests running again, minus injected regex test

* Fix monaco editor checks

* More standalone editor fixes

* Fix hygiene

* Fix TestTextModel

* Fix more tests

---------

Co-authored-by: Henning Dieterichs <hdieterichs@microsoft.com>
This commit is contained in:
Alex Ross 2025-05-23 16:30:38 +02:00 committed by GitHub
parent 6de48686ef
commit ba42f2d6e7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
47 changed files with 2832 additions and 6771 deletions

View File

@ -930,6 +930,86 @@ export class ResourceQueue implements IDisposable {
}
}
export type Task<T = void> = () => (Promise<T> | T);
/**
* Processes tasks in the order they were scheduled.
*/
export class TaskQueue {
private _runningTask: Task<any> | undefined = undefined;
private _pendingTasks: { task: Task<any>; deferred: DeferredPromise<any>; setUndefinedWhenCleared: boolean }[] = [];
/**
* Waits for the current and pending tasks to finish, then runs and awaits the given task.
* If the task is skipped because of clearPending, the promise is rejected with a CancellationError.
*/
public schedule<T>(task: Task<T>): Promise<T> {
const deferred = new DeferredPromise<T>();
this._pendingTasks.push({ task, deferred, setUndefinedWhenCleared: false });
this._runIfNotRunning();
return deferred.p;
}
/**
* Waits for the current and pending tasks to finish, then runs and awaits the given task.
* If the task is skipped because of clearPending, the promise is resolved with undefined.
*/
public scheduleSkipIfCleared<T>(task: Task<T>): Promise<T | undefined> {
const deferred = new DeferredPromise<T>();
this._pendingTasks.push({ task, deferred, setUndefinedWhenCleared: true });
this._runIfNotRunning();
return deferred.p;
}
private _runIfNotRunning(): void {
if (this._runningTask === undefined) {
this._processQueue();
}
}
private async _processQueue(): Promise<void> {
if (this._pendingTasks.length === 0) {
return;
}
const next = this._pendingTasks.shift();
if (!next) {
return;
}
if (this._runningTask) {
throw new BugIndicatingError();
}
this._runningTask = next.task;
try {
const result = await next.task();
next.deferred.complete(result);
} catch (e) {
next.deferred.error(e);
} finally {
this._runningTask = undefined;
this._processQueue();
}
}
/**
* Clears all pending tasks. Does not cancel the currently running task.
*/
public clearPending(): void {
const tasks = this._pendingTasks;
this._pendingTasks = [];
for (const task of tasks) {
if (task.setUndefinedWhenCleared) {
task.deferred.complete(undefined);
} else {
task.deferred.error(new CancellationError());
}
}
}
}
export class TimeoutTimer implements IDisposable {
private _token: Timeout | undefined;
private _isDisposed = false;

View File

@ -69,6 +69,14 @@ export class ObservablePromise<T> {
throw error;
});
}
public readonly resolvedValue = derived(this, reader => {
const result = this.promiseResult.read(reader);
if (!result) {
return undefined;
}
return result.getDataOrThrow();
});
}
export class PromiseResult<T> {

View File

@ -46,7 +46,7 @@ export class LineRange {
}
/**
* @param lineRanges An array of sorted line ranges.
* @param lineRanges An array of arrays of of sorted line ranges.
*/
public static joinMany(lineRanges: readonly (readonly LineRange[])[]): readonly LineRange[] {
if (lineRanges.length === 0) {

View File

@ -26,11 +26,6 @@ import { ContiguousMultilineTokens } from './tokens/contiguousMultilineTokens.js
import { localize } from '../../nls.js';
import { ExtensionIdentifier } from '../../platform/extensions/common/extensions.js';
import { IMarkerData } from '../../platform/markers/common/markers.js';
import { IModelTokensChangedEvent } from './textModelEvents.js';
import { ITextModel } from './model.js';
import { TokenUpdate } from './model/tokenStore.js';
import { ITextModelTreeSitter } from './services/treeSitterParserService.js';
import type * as Parser from '@vscode/tree-sitter-wasm';
/**
* @internal
@ -101,24 +96,6 @@ export interface QueryCapture {
encodedLanguageId: number;
}
/**
* An intermediate interface for scaffolding the new tree sitter tokenization support. Not final.
* @internal
*/
export interface ITreeSitterTokenizationSupport {
/**
* exposed for testing
*/
getTokensInRange(textModel: ITextModel, range: Range, rangeStartOffset: number, rangeEndOffset: number): TokenUpdate[] | undefined;
tokenizeEncoded(lineNumber: number, textModel: model.ITextModel): void;
captureAtPosition(lineNumber: number, column: number, textModel: model.ITextModel): QueryCapture[];
captureAtRangeTree(range: Range, tree: Parser.Tree, textModelTreeSitter: ITextModelTreeSitter): QueryCapture[];
onDidChangeTokens: Event<{ textModel: model.ITextModel; changes: IModelTokensChangedEvent }>;
onDidChangeBackgroundTokenization: Event<{ textModel: model.ITextModel }>;
tokenizeEncodedInstrumented(lineNumber: number, textModel: model.ITextModel): { result: Uint32Array; captureTime: number; metadataTime: number } | undefined;
guessTokensForLinesContent(lineNumber: number, textModel: model.ITextModel, lines: string[]): Uint32Array[] | undefined;
}
/**
* @internal
*/
@ -2418,11 +2395,6 @@ export interface ITokenizationRegistry<TSupport> {
*/
export const TokenizationRegistry: ITokenizationRegistry<ITokenizationSupport> = new TokenizationRegistryImpl();
/**
* @internal
*/
export const TreeSitterTokenizationRegistry: ITokenizationRegistry<ITreeSitterTokenizationSupport> = new TokenizationRegistryImpl();
/**
* @internal
*/

View File

@ -37,8 +37,8 @@ import { IntervalNode, IntervalTree, recomputeMaxEnd } from './intervalTree.js';
import { PieceTreeTextBuffer } from './pieceTreeTextBuffer/pieceTreeTextBuffer.js';
import { PieceTreeTextBufferBuilder } from './pieceTreeTextBuffer/pieceTreeTextBufferBuilder.js';
import { SearchParams, TextModelSearch } from './textModelSearch.js';
import { TokenizationTextModelPart } from './tokenizationTextModelPart.js';
import { AttachedViews } from './tokens.js';
import { TokenizationTextModelPart } from './tokens/tokenizationTextModelPart.js';
import { AttachedViews } from './tokens/abstractSyntaxTokenBackend.js';
import { IBracketPairsTextModelPart } from '../textModelBracketPairs.js';
import { IModelContentChangedEvent, IModelDecorationsChangedEvent, IModelOptionsChangedEvent, InternalModelContentChangeEvent, LineInjectedText, ModelInjectedTextChangedEvent, ModelRawChange, ModelRawContentChangedEvent, ModelRawEOLChanged, ModelRawFlush, ModelRawLineChanged, ModelRawLinesDeleted, ModelRawLinesInserted, ModelLineHeightChangedEvent, ModelLineHeightChanged } from '../textModelEvents.js';
import { IGuidesTextModelPart } from '../textModelGuides.js';

View File

@ -1,675 +0,0 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { CharCode } from '../../../base/common/charCode.js';
import { BugIndicatingError, onUnexpectedError } from '../../../base/common/errors.js';
import { Emitter, Event } from '../../../base/common/event.js';
import { DisposableMap, DisposableStore, MutableDisposable } from '../../../base/common/lifecycle.js';
import { countEOL } from '../core/misc/eolCounter.js';
import { LineRange } from '../core/ranges/lineRange.js';
import { IPosition, Position } from '../core/position.js';
import { Range } from '../core/range.js';
import { IWordAtPosition, getWordAtText } from '../core/wordHelper.js';
import { StandardTokenType } from '../encodedTokenAttributes.js';
import { IBackgroundTokenizationStore, IBackgroundTokenizer, ILanguageIdCodec, IState, ITokenizationSupport, TokenizationRegistry, TreeSitterTokenizationRegistry } from '../languages.js';
import { ILanguageService } from '../languages/language.js';
import { ILanguageConfigurationService, LanguageConfigurationServiceChangeEvent, ResolvedLanguageConfiguration } from '../languages/languageConfigurationRegistry.js';
import { IAttachedView } from '../model.js';
import { BracketPairsTextModelPart } from './bracketPairsTextModelPart/bracketPairsImpl.js';
import { TextModel } from './textModel.js';
import { TextModelPart } from './textModelPart.js';
import { DefaultBackgroundTokenizer, TokenizerWithStateStoreAndTextModel, TrackingTokenizationStateStore } from './textModelTokens.js';
import { AbstractTokens, AttachedViewHandler, AttachedViews } from './tokens.js';
import { TreeSitterTokens } from './treeSitterTokens.js';
import { IModelContentChangedEvent, IModelLanguageChangedEvent, IModelLanguageConfigurationChangedEvent, IModelTokensChangedEvent } from '../textModelEvents.js';
import { BackgroundTokenizationState, ITokenizationTextModelPart } from '../tokenizationTextModelPart.js';
import { ContiguousMultilineTokens } from '../tokens/contiguousMultilineTokens.js';
import { ContiguousMultilineTokensBuilder } from '../tokens/contiguousMultilineTokensBuilder.js';
import { ContiguousTokensStore } from '../tokens/contiguousTokensStore.js';
import { LineTokens } from '../tokens/lineTokens.js';
import { SparseMultilineTokens } from '../tokens/sparseMultilineTokens.js';
import { SparseTokensStore } from '../tokens/sparseTokensStore.js';
import { IInstantiationService } from '../../../platform/instantiation/common/instantiation.js';
export class TokenizationTextModelPart extends TextModelPart implements ITokenizationTextModelPart {
private readonly _semanticTokens: SparseTokensStore;
private readonly _onDidChangeLanguage: Emitter<IModelLanguageChangedEvent>;
public readonly onDidChangeLanguage: Event<IModelLanguageChangedEvent>;
private readonly _onDidChangeLanguageConfiguration: Emitter<IModelLanguageConfigurationChangedEvent>;
public readonly onDidChangeLanguageConfiguration: Event<IModelLanguageConfigurationChangedEvent>;
private readonly _onDidChangeTokens: Emitter<IModelTokensChangedEvent>;
public readonly onDidChangeTokens: Event<IModelTokensChangedEvent>;
private _tokens!: AbstractTokens;
private readonly _tokensDisposables: DisposableStore;
constructor(
private readonly _textModel: TextModel,
private readonly _bracketPairsTextModelPart: BracketPairsTextModelPart,
private _languageId: string,
private readonly _attachedViews: AttachedViews,
@ILanguageService private readonly _languageService: ILanguageService,
@ILanguageConfigurationService private readonly _languageConfigurationService: ILanguageConfigurationService,
@IInstantiationService private readonly _instantiationService: IInstantiationService
) {
super();
this._semanticTokens = new SparseTokensStore(this._languageService.languageIdCodec);
this._onDidChangeLanguage = this._register(new Emitter<IModelLanguageChangedEvent>());
this.onDidChangeLanguage = this._onDidChangeLanguage.event;
this._onDidChangeLanguageConfiguration = this._register(new Emitter<IModelLanguageConfigurationChangedEvent>());
this.onDidChangeLanguageConfiguration = this._onDidChangeLanguageConfiguration.event;
this._onDidChangeTokens = this._register(new Emitter<IModelTokensChangedEvent>());
this.onDidChangeTokens = this._onDidChangeTokens.event;
this._tokensDisposables = this._register(new DisposableStore());
// We just look at registry changes to determine whether to use tree sitter.
// This means that removing a language from the setting will not cause a switch to textmate and will require a reload.
// Adding a language to the setting will not need a reload, however.
this._register(Event.filter(TreeSitterTokenizationRegistry.onDidChange, (e) => e.changedLanguages.includes(this._languageId))(() => {
this.createPreferredTokenProvider();
}));
this.createPreferredTokenProvider();
}
private createGrammarTokens() {
return this._register(new GrammarTokens(this._languageService.languageIdCodec, this._textModel, () => this._languageId, this._attachedViews));
}
private createTreeSitterTokens(): AbstractTokens {
return this._register(this._instantiationService.createInstance(TreeSitterTokens, this._languageService.languageIdCodec, this._textModel, () => this._languageId));
}
private createTokens(useTreeSitter: boolean): void {
const needsReset = this._tokens !== undefined;
this._tokens?.dispose();
this._tokens = useTreeSitter ? this.createTreeSitterTokens() : this.createGrammarTokens();
this._tokensDisposables.clear();
this._tokensDisposables.add(this._tokens.onDidChangeTokens(e => {
this._emitModelTokensChangedEvent(e);
}));
this._tokensDisposables.add(this._tokens.onDidChangeBackgroundTokenizationState(e => {
this._bracketPairsTextModelPart.handleDidChangeBackgroundTokenizationState();
}));
if (needsReset) {
// We need to reset the tokenization, as the new token provider otherwise won't have a chance to provide tokens until some action happens in the editor.
this._tokens.resetTokenization();
}
}
private createPreferredTokenProvider() {
if (TreeSitterTokenizationRegistry.get(this._languageId)) {
if (!(this._tokens instanceof TreeSitterTokens)) {
this.createTokens(true);
}
} else {
if (!(this._tokens instanceof GrammarTokens)) {
this.createTokens(false);
}
}
}
_hasListeners(): boolean {
return (this._onDidChangeLanguage.hasListeners()
|| this._onDidChangeLanguageConfiguration.hasListeners()
|| this._onDidChangeTokens.hasListeners());
}
public handleLanguageConfigurationServiceChange(e: LanguageConfigurationServiceChangeEvent): void {
if (e.affects(this._languageId)) {
this._onDidChangeLanguageConfiguration.fire({});
}
}
public handleDidChangeContent(e: IModelContentChangedEvent): void {
if (e.isFlush) {
this._semanticTokens.flush();
} else if (!e.isEolChange) { // We don't have to do anything on an EOL change
for (const c of e.changes) {
const [eolCount, firstLineLength, lastLineLength] = countEOL(c.text);
this._semanticTokens.acceptEdit(
c.range,
eolCount,
firstLineLength,
lastLineLength,
c.text.length > 0 ? c.text.charCodeAt(0) : CharCode.Null
);
}
}
this._tokens.handleDidChangeContent(e);
}
public handleDidChangeAttached(): void {
this._tokens.handleDidChangeAttached();
}
/**
* Includes grammar and semantic tokens.
*/
public getLineTokens(lineNumber: number): LineTokens {
this.validateLineNumber(lineNumber);
const syntacticTokens = this._tokens.getLineTokens(lineNumber);
return this._semanticTokens.addSparseTokens(lineNumber, syntacticTokens);
}
private _emitModelTokensChangedEvent(e: IModelTokensChangedEvent): void {
if (!this._textModel._isDisposing()) {
this._bracketPairsTextModelPart.handleDidChangeTokens(e);
this._onDidChangeTokens.fire(e);
}
}
// #region Grammar Tokens
private validateLineNumber(lineNumber: number): void {
if (lineNumber < 1 || lineNumber > this._textModel.getLineCount()) {
throw new BugIndicatingError('Illegal value for lineNumber');
}
}
public get hasTokens(): boolean {
return this._tokens.hasTokens;
}
public resetTokenization() {
this._tokens.resetTokenization();
}
public get backgroundTokenizationState() {
return this._tokens.backgroundTokenizationState;
}
public forceTokenization(lineNumber: number): void {
this.validateLineNumber(lineNumber);
this._tokens.forceTokenization(lineNumber);
}
public hasAccurateTokensForLine(lineNumber: number): boolean {
this.validateLineNumber(lineNumber);
return this._tokens.hasAccurateTokensForLine(lineNumber);
}
public isCheapToTokenize(lineNumber: number): boolean {
this.validateLineNumber(lineNumber);
return this._tokens.isCheapToTokenize(lineNumber);
}
public tokenizeIfCheap(lineNumber: number): void {
this.validateLineNumber(lineNumber);
this._tokens.tokenizeIfCheap(lineNumber);
}
public getTokenTypeIfInsertingCharacter(lineNumber: number, column: number, character: string): StandardTokenType {
return this._tokens.getTokenTypeIfInsertingCharacter(lineNumber, column, character);
}
public tokenizeLinesAt(lineNumber: number, lines: string[]): LineTokens[] | null {
return this._tokens.tokenizeLinesAt(lineNumber, lines);
}
// #endregion
// #region Semantic Tokens
public setSemanticTokens(tokens: SparseMultilineTokens[] | null, isComplete: boolean): void {
this._semanticTokens.set(tokens, isComplete, this._textModel);
this._emitModelTokensChangedEvent({
semanticTokensApplied: tokens !== null,
ranges: [{ fromLineNumber: 1, toLineNumber: this._textModel.getLineCount() }],
});
}
public hasCompleteSemanticTokens(): boolean {
return this._semanticTokens.isComplete();
}
public hasSomeSemanticTokens(): boolean {
return !this._semanticTokens.isEmpty();
}
public setPartialSemanticTokens(range: Range, tokens: SparseMultilineTokens[]): void {
if (this.hasCompleteSemanticTokens()) {
return;
}
const changedRange = this._textModel.validateRange(
this._semanticTokens.setPartial(range, tokens)
);
this._emitModelTokensChangedEvent({
semanticTokensApplied: true,
ranges: [
{
fromLineNumber: changedRange.startLineNumber,
toLineNumber: changedRange.endLineNumber,
},
],
});
}
// #endregion
// #region Utility Methods
public getWordAtPosition(_position: IPosition): IWordAtPosition | null {
this.assertNotDisposed();
const position = this._textModel.validatePosition(_position);
const lineContent = this._textModel.getLineContent(position.lineNumber);
const lineTokens = this.getLineTokens(position.lineNumber);
const tokenIndex = lineTokens.findTokenIndexAtOffset(position.column - 1);
// (1). First try checking right biased word
const [rbStartOffset, rbEndOffset] = TokenizationTextModelPart._findLanguageBoundaries(lineTokens, tokenIndex);
const rightBiasedWord = getWordAtText(
position.column,
this.getLanguageConfiguration(lineTokens.getLanguageId(tokenIndex)).getWordDefinition(),
lineContent.substring(rbStartOffset, rbEndOffset),
rbStartOffset
);
// Make sure the result touches the original passed in position
if (
rightBiasedWord &&
rightBiasedWord.startColumn <= _position.column &&
_position.column <= rightBiasedWord.endColumn
) {
return rightBiasedWord;
}
// (2). Else, if we were at a language boundary, check the left biased word
if (tokenIndex > 0 && rbStartOffset === position.column - 1) {
// edge case, where `position` sits between two tokens belonging to two different languages
const [lbStartOffset, lbEndOffset] = TokenizationTextModelPart._findLanguageBoundaries(
lineTokens,
tokenIndex - 1
);
const leftBiasedWord = getWordAtText(
position.column,
this.getLanguageConfiguration(lineTokens.getLanguageId(tokenIndex - 1)).getWordDefinition(),
lineContent.substring(lbStartOffset, lbEndOffset),
lbStartOffset
);
// Make sure the result touches the original passed in position
if (
leftBiasedWord &&
leftBiasedWord.startColumn <= _position.column &&
_position.column <= leftBiasedWord.endColumn
) {
return leftBiasedWord;
}
}
return null;
}
private getLanguageConfiguration(languageId: string): ResolvedLanguageConfiguration {
return this._languageConfigurationService.getLanguageConfiguration(languageId);
}
private static _findLanguageBoundaries(lineTokens: LineTokens, tokenIndex: number): [number, number] {
const languageId = lineTokens.getLanguageId(tokenIndex);
// go left until a different language is hit
let startOffset = 0;
for (let i = tokenIndex; i >= 0 && lineTokens.getLanguageId(i) === languageId; i--) {
startOffset = lineTokens.getStartOffset(i);
}
// go right until a different language is hit
let endOffset = lineTokens.getLineContent().length;
for (
let i = tokenIndex, tokenCount = lineTokens.getCount();
i < tokenCount && lineTokens.getLanguageId(i) === languageId;
i++
) {
endOffset = lineTokens.getEndOffset(i);
}
return [startOffset, endOffset];
}
public getWordUntilPosition(position: IPosition): IWordAtPosition {
const wordAtPosition = this.getWordAtPosition(position);
if (!wordAtPosition) {
return { word: '', startColumn: position.column, endColumn: position.column, };
}
return {
word: wordAtPosition.word.substr(0, position.column - wordAtPosition.startColumn),
startColumn: wordAtPosition.startColumn,
endColumn: position.column,
};
}
// #endregion
// #region Language Id handling
public getLanguageId(): string {
return this._languageId;
}
public getLanguageIdAtPosition(lineNumber: number, column: number): string {
const position = this._textModel.validatePosition(new Position(lineNumber, column));
const lineTokens = this.getLineTokens(position.lineNumber);
return lineTokens.getLanguageId(lineTokens.findTokenIndexAtOffset(position.column - 1));
}
public setLanguageId(languageId: string, source: string = 'api'): void {
if (this._languageId === languageId) {
// There's nothing to do
return;
}
const e: IModelLanguageChangedEvent = {
oldLanguage: this._languageId,
newLanguage: languageId,
source
};
this._languageId = languageId;
this._bracketPairsTextModelPart.handleDidChangeLanguage(e);
this._tokens.resetTokenization();
this.createPreferredTokenProvider();
this._onDidChangeLanguage.fire(e);
this._onDidChangeLanguageConfiguration.fire({});
}
// #endregion
}
class GrammarTokens extends AbstractTokens {
private _tokenizer: TokenizerWithStateStoreAndTextModel | null = null;
protected _backgroundTokenizationState: BackgroundTokenizationState = BackgroundTokenizationState.InProgress;
protected readonly _onDidChangeBackgroundTokenizationState: Emitter<void> = this._register(new Emitter<void>());
public readonly onDidChangeBackgroundTokenizationState: Event<void> = this._onDidChangeBackgroundTokenizationState.event;
private _defaultBackgroundTokenizer: DefaultBackgroundTokenizer | null = null;
private readonly _backgroundTokenizer = this._register(new MutableDisposable<IBackgroundTokenizer>());
private readonly _tokens = new ContiguousTokensStore(this._languageIdCodec);
private _debugBackgroundTokens: ContiguousTokensStore | undefined;
private _debugBackgroundStates: TrackingTokenizationStateStore<IState> | undefined;
private readonly _debugBackgroundTokenizer = this._register(new MutableDisposable<IBackgroundTokenizer>());
private readonly _attachedViewStates = this._register(new DisposableMap<IAttachedView, AttachedViewHandler>());
constructor(
languageIdCodec: ILanguageIdCodec,
textModel: TextModel,
getLanguageId: () => string,
attachedViews: AttachedViews,
) {
super(languageIdCodec, textModel, getLanguageId);
this._register(TokenizationRegistry.onDidChange((e) => {
const languageId = this.getLanguageId();
if (e.changedLanguages.indexOf(languageId) === -1) {
return;
}
this.resetTokenization();
}));
this.resetTokenization();
this._register(attachedViews.onDidChangeVisibleRanges(({ view, state }) => {
if (state) {
let existing = this._attachedViewStates.get(view);
if (!existing) {
existing = new AttachedViewHandler(() => this.refreshRanges(existing!.lineRanges));
this._attachedViewStates.set(view, existing);
}
existing.handleStateChange(state);
} else {
this._attachedViewStates.deleteAndDispose(view);
}
}));
}
public resetTokenization(fireTokenChangeEvent: boolean = true): void {
this._tokens.flush();
this._debugBackgroundTokens?.flush();
if (this._debugBackgroundStates) {
this._debugBackgroundStates = new TrackingTokenizationStateStore(this._textModel.getLineCount());
}
if (fireTokenChangeEvent) {
this._onDidChangeTokens.fire({
semanticTokensApplied: false,
ranges: [
{
fromLineNumber: 1,
toLineNumber: this._textModel.getLineCount(),
},
],
});
}
const initializeTokenization = (): [ITokenizationSupport, IState] | [null, null] => {
if (this._textModel.isTooLargeForTokenization()) {
return [null, null];
}
const tokenizationSupport = TokenizationRegistry.get(this.getLanguageId());
if (!tokenizationSupport) {
return [null, null];
}
let initialState: IState;
try {
initialState = tokenizationSupport.getInitialState();
} catch (e) {
onUnexpectedError(e);
return [null, null];
}
return [tokenizationSupport, initialState];
};
const [tokenizationSupport, initialState] = initializeTokenization();
if (tokenizationSupport && initialState) {
this._tokenizer = new TokenizerWithStateStoreAndTextModel(this._textModel.getLineCount(), tokenizationSupport, this._textModel, this._languageIdCodec);
} else {
this._tokenizer = null;
}
this._backgroundTokenizer.clear();
this._defaultBackgroundTokenizer = null;
if (this._tokenizer) {
const b: IBackgroundTokenizationStore = {
setTokens: (tokens) => {
this.setTokens(tokens);
},
backgroundTokenizationFinished: () => {
if (this._backgroundTokenizationState === BackgroundTokenizationState.Completed) {
// We already did a full tokenization and don't go back to progressing.
return;
}
const newState = BackgroundTokenizationState.Completed;
this._backgroundTokenizationState = newState;
this._onDidChangeBackgroundTokenizationState.fire();
},
setEndState: (lineNumber, state) => {
if (!this._tokenizer) { return; }
const firstInvalidEndStateLineNumber = this._tokenizer.store.getFirstInvalidEndStateLineNumber();
// Don't accept states for definitely valid states, the renderer is ahead of the worker!
if (firstInvalidEndStateLineNumber !== null && lineNumber >= firstInvalidEndStateLineNumber) {
this._tokenizer?.store.setEndState(lineNumber, state);
}
},
};
if (tokenizationSupport && tokenizationSupport.createBackgroundTokenizer && !tokenizationSupport.backgroundTokenizerShouldOnlyVerifyTokens) {
this._backgroundTokenizer.value = tokenizationSupport.createBackgroundTokenizer(this._textModel, b);
}
if (!this._backgroundTokenizer.value && !this._textModel.isTooLargeForTokenization()) {
this._backgroundTokenizer.value = this._defaultBackgroundTokenizer =
new DefaultBackgroundTokenizer(this._tokenizer, b);
this._defaultBackgroundTokenizer.handleChanges();
}
if (tokenizationSupport?.backgroundTokenizerShouldOnlyVerifyTokens && tokenizationSupport.createBackgroundTokenizer) {
this._debugBackgroundTokens = new ContiguousTokensStore(this._languageIdCodec);
this._debugBackgroundStates = new TrackingTokenizationStateStore(this._textModel.getLineCount());
this._debugBackgroundTokenizer.clear();
this._debugBackgroundTokenizer.value = tokenizationSupport.createBackgroundTokenizer(this._textModel, {
setTokens: (tokens) => {
this._debugBackgroundTokens?.setMultilineTokens(tokens, this._textModel);
},
backgroundTokenizationFinished() {
// NO OP
},
setEndState: (lineNumber, state) => {
this._debugBackgroundStates?.setEndState(lineNumber, state);
},
});
} else {
this._debugBackgroundTokens = undefined;
this._debugBackgroundStates = undefined;
this._debugBackgroundTokenizer.value = undefined;
}
}
this.refreshAllVisibleLineTokens();
}
public handleDidChangeAttached() {
this._defaultBackgroundTokenizer?.handleChanges();
}
public handleDidChangeContent(e: IModelContentChangedEvent): void {
if (e.isFlush) {
// Don't fire the event, as the view might not have got the text change event yet
this.resetTokenization(false);
} else if (!e.isEolChange) { // We don't have to do anything on an EOL change
for (const c of e.changes) {
const [eolCount, firstLineLength] = countEOL(c.text);
this._tokens.acceptEdit(c.range, eolCount, firstLineLength);
this._debugBackgroundTokens?.acceptEdit(c.range, eolCount, firstLineLength);
}
this._debugBackgroundStates?.acceptChanges(e.changes);
if (this._tokenizer) {
this._tokenizer.store.acceptChanges(e.changes);
}
this._defaultBackgroundTokenizer?.handleChanges();
}
}
private setTokens(tokens: ContiguousMultilineTokens[]): { changes: { fromLineNumber: number; toLineNumber: number }[] } {
const { changes } = this._tokens.setMultilineTokens(tokens, this._textModel);
if (changes.length > 0) {
this._onDidChangeTokens.fire({ semanticTokensApplied: false, ranges: changes, });
}
return { changes: changes };
}
private refreshAllVisibleLineTokens(): void {
const ranges = LineRange.joinMany([...this._attachedViewStates].map(([_, s]) => s.lineRanges));
this.refreshRanges(ranges);
}
private refreshRanges(ranges: readonly LineRange[]): void {
for (const range of ranges) {
this.refreshRange(range.startLineNumber, range.endLineNumberExclusive - 1);
}
}
private refreshRange(startLineNumber: number, endLineNumber: number): void {
if (!this._tokenizer) {
return;
}
startLineNumber = Math.max(1, Math.min(this._textModel.getLineCount(), startLineNumber));
endLineNumber = Math.min(this._textModel.getLineCount(), endLineNumber);
const builder = new ContiguousMultilineTokensBuilder();
const { heuristicTokens } = this._tokenizer.tokenizeHeuristically(builder, startLineNumber, endLineNumber);
const changedTokens = this.setTokens(builder.finalize());
if (heuristicTokens) {
// We overrode tokens with heuristically computed ones.
// Because old states might get reused (thus stopping invalidation),
// we have to explicitly request the tokens for the changed ranges again.
for (const c of changedTokens.changes) {
this._backgroundTokenizer.value?.requestTokens(c.fromLineNumber, c.toLineNumber + 1);
}
}
this._defaultBackgroundTokenizer?.checkFinished();
}
public forceTokenization(lineNumber: number): void {
const builder = new ContiguousMultilineTokensBuilder();
this._tokenizer?.updateTokensUntilLine(builder, lineNumber);
this.setTokens(builder.finalize());
this._defaultBackgroundTokenizer?.checkFinished();
}
public hasAccurateTokensForLine(lineNumber: number): boolean {
if (!this._tokenizer) {
return true;
}
return this._tokenizer.hasAccurateTokensForLine(lineNumber);
}
public isCheapToTokenize(lineNumber: number): boolean {
if (!this._tokenizer) {
return true;
}
return this._tokenizer.isCheapToTokenize(lineNumber);
}
public getLineTokens(lineNumber: number): LineTokens {
const lineText = this._textModel.getLineContent(lineNumber);
const result = this._tokens.getTokens(
this._textModel.getLanguageId(),
lineNumber - 1,
lineText
);
if (this._debugBackgroundTokens && this._debugBackgroundStates && this._tokenizer) {
if (this._debugBackgroundStates.getFirstInvalidEndStateLineNumberOrMax() > lineNumber && this._tokenizer.store.getFirstInvalidEndStateLineNumberOrMax() > lineNumber) {
const backgroundResult = this._debugBackgroundTokens.getTokens(
this._textModel.getLanguageId(),
lineNumber - 1,
lineText
);
if (!result.equals(backgroundResult) && this._debugBackgroundTokenizer.value?.reportMismatchingTokens) {
this._debugBackgroundTokenizer.value.reportMismatchingTokens(lineNumber);
}
}
}
return result;
}
public getTokenTypeIfInsertingCharacter(lineNumber: number, column: number, character: string): StandardTokenType {
if (!this._tokenizer) {
return StandardTokenType.Other;
}
const position = this._textModel.validatePosition(new Position(lineNumber, column));
this.forceTokenization(position.lineNumber);
return this._tokenizer.getTokenTypeIfInsertingCharacter(position, character);
}
public tokenizeLinesAt(lineNumber: number, lines: string[]): LineTokens[] | null {
if (!this._tokenizer) {
return null;
}
this.forceTokenization(lineNumber);
return this._tokenizer.tokenizeLinesAt(lineNumber, lines);
}
public get hasTokens(): boolean {
return this._tokens.hasTokens;
}
}

View File

@ -3,56 +3,100 @@
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { equals } from '../../../base/common/arrays.js';
import { RunOnceScheduler } from '../../../base/common/async.js';
import { Emitter, Event } from '../../../base/common/event.js';
import { Disposable } from '../../../base/common/lifecycle.js';
import { LineRange } from '../core/ranges/lineRange.js';
import { StandardTokenType } from '../encodedTokenAttributes.js';
import { ILanguageIdCodec } from '../languages.js';
import { IAttachedView } from '../model.js';
import { TextModel } from './textModel.js';
import { IModelContentChangedEvent, IModelTokensChangedEvent } from '../textModelEvents.js';
import { BackgroundTokenizationState } from '../tokenizationTextModelPart.js';
import { LineTokens } from '../tokens/lineTokens.js';
import { equals } from '../../../../base/common/arrays.js';
import { RunOnceScheduler } from '../../../../base/common/async.js';
import { Emitter, Event } from '../../../../base/common/event.js';
import { Disposable } from '../../../../base/common/lifecycle.js';
import { LineRange } from '../../core/ranges/lineRange.js';
import { StandardTokenType } from '../../encodedTokenAttributes.js';
import { ILanguageIdCodec } from '../../languages.js';
import { IAttachedView } from '../../model.js';
import { TextModel } from '../textModel.js';
import { IModelContentChangedEvent, IModelTokensChangedEvent } from '../../textModelEvents.js';
import { BackgroundTokenizationState } from '../../tokenizationTextModelPart.js';
import { LineTokens } from '../../tokens/lineTokens.js';
import { derivedOpts, IObservable, ISettableObservable, observableSignal, observableValueOpts } from '../../../../base/common/observable.js';
import { equalsIfDefined, itemEquals, itemsEquals } from '../../../../base/common/equals.js';
/**
* @internal
*/
export class AttachedViews {
private readonly _onDidChangeVisibleRanges = new Emitter<{ view: IAttachedView; state: IAttachedViewState | undefined }>();
private readonly _onDidChangeVisibleRanges = new Emitter<{ view: IAttachedView; state: AttachedViewState | undefined }>();
public readonly onDidChangeVisibleRanges = this._onDidChangeVisibleRanges.event;
private readonly _views = new Set<AttachedViewImpl>();
private readonly _viewsChanged = observableSignal(this);
public readonly visibleLineRanges: IObservable<readonly LineRange[]>;
constructor() {
this.visibleLineRanges = derivedOpts({
owner: this,
equalsFn: itemsEquals(itemEquals())
}, reader => {
this._viewsChanged.read(reader);
const ranges = LineRange.joinMany(
[...this._views].map(view => view.state.read(reader)?.visibleLineRanges ?? [])
);
return ranges;
});
}
public attachView(): IAttachedView {
const view = new AttachedViewImpl((state) => {
this._onDidChangeVisibleRanges.fire({ view, state });
});
this._views.add(view);
this._viewsChanged.trigger(undefined);
return view;
}
public detachView(view: IAttachedView): void {
this._views.delete(view as AttachedViewImpl);
this._onDidChangeVisibleRanges.fire({ view, state: undefined });
this._viewsChanged.trigger(undefined);
}
}
/**
* @internal
*/
export interface IAttachedViewState {
readonly visibleLineRanges: readonly LineRange[];
readonly stabilized: boolean;
export class AttachedViewState {
constructor(
readonly visibleLineRanges: readonly LineRange[],
readonly stabilized: boolean,
) { }
public equals(other: AttachedViewState): boolean {
if (this === other) {
return true;
}
if (!equals(this.visibleLineRanges, other.visibleLineRanges, (a, b) => a.equals(b))) {
return false;
}
if (this.stabilized !== other.stabilized) {
return false;
}
return true;
}
}
class AttachedViewImpl implements IAttachedView {
constructor(private readonly handleStateChange: (state: IAttachedViewState) => void) { }
private readonly _state: ISettableObservable<AttachedViewState | undefined>;
public get state(): IObservable<AttachedViewState | undefined> { return this._state; }
constructor(
private readonly handleStateChange: (state: AttachedViewState) => void
) {
this._state = observableValueOpts<AttachedViewState | undefined>({ owner: this, equalsFn: equalsIfDefined((a, b) => a.equals(b)) }, undefined);
}
setVisibleLines(visibleLines: { startLineNumber: number; endLineNumber: number }[], stabilized: boolean): void {
const visibleLineRanges = visibleLines.map((line) => new LineRange(line.startLineNumber, line.endLineNumber + 1));
this.handleStateChange({ visibleLineRanges, stabilized });
const state = new AttachedViewState(visibleLineRanges, stabilized);
this._state.set(state, undefined, undefined);
this.handleStateChange(state);
}
}
@ -76,7 +120,7 @@ export class AttachedViewHandler extends Disposable {
this._refreshTokens();
}
public handleStateChange(state: IAttachedViewState): void {
public handleStateChange(state: AttachedViewState): void {
this._lineRanges = state.visibleLineRanges;
if (state.stabilized) {
this.runner.cancel();
@ -87,7 +131,7 @@ export class AttachedViewHandler extends Disposable {
}
}
export abstract class AbstractTokens extends Disposable {
export abstract class AbstractSyntaxTokenBackend extends Disposable {
protected abstract _backgroundTokenizationState: BackgroundTokenizationState;
public get backgroundTokenizationState(): BackgroundTokenizationState {
return this._backgroundTokenizationState;
@ -104,12 +148,11 @@ export abstract class AbstractTokens extends Disposable {
constructor(
protected readonly _languageIdCodec: ILanguageIdCodec,
protected readonly _textModel: TextModel,
protected getLanguageId: () => string,
) {
super();
}
public abstract resetTokenization(fireTokenChangeEvent?: boolean): void;
public abstract todo_resetTokenization(fireTokenChangeEvent?: boolean): void;
public abstract handleDidChangeAttached(): void;

View File

@ -0,0 +1,377 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { CharCode } from '../../../../base/common/charCode.js';
import { BugIndicatingError } from '../../../../base/common/errors.js';
import { Emitter, Event } from '../../../../base/common/event.js';
import { countEOL } from '../../core/misc/eolCounter.js';
import { IPosition, Position } from '../../core/position.js';
import { Range } from '../../core/range.js';
import { IWordAtPosition, getWordAtText } from '../../core/wordHelper.js';
import { StandardTokenType } from '../../encodedTokenAttributes.js';
import { ILanguageService } from '../../languages/language.js';
import { ILanguageConfigurationService, LanguageConfigurationServiceChangeEvent, ResolvedLanguageConfiguration } from '../../languages/languageConfigurationRegistry.js';
import { BracketPairsTextModelPart } from '../bracketPairsTextModelPart/bracketPairsImpl.js';
import { TextModel } from '../textModel.js';
import { TextModelPart } from '../textModelPart.js';
import { AbstractSyntaxTokenBackend, AttachedViews } from './abstractSyntaxTokenBackend.js';
import { TreeSitterSyntaxTokenBackend } from './treeSitter/treeSitterSyntaxTokenBackend.js';
import { IModelContentChangedEvent, IModelLanguageChangedEvent, IModelLanguageConfigurationChangedEvent, IModelTokensChangedEvent } from '../../textModelEvents.js';
import { ITokenizationTextModelPart } from '../../tokenizationTextModelPart.js';
import { LineTokens } from '../../tokens/lineTokens.js';
import { SparseMultilineTokens } from '../../tokens/sparseMultilineTokens.js';
import { SparseTokensStore } from '../../tokens/sparseTokensStore.js';
import { IInstantiationService } from '../../../../platform/instantiation/common/instantiation.js';
import { TokenizerSyntaxTokenBackend } from './tokenizerSyntaxTokenBackend.js';
import { ITreeSitterLibraryService } from '../../services/treeSitter/treeSitterLibraryService.js';
import { derived, IObservable, ISettableObservable, observableValue } from '../../../../base/common/observable.js';
export class TokenizationTextModelPart extends TextModelPart implements ITokenizationTextModelPart {
private readonly _semanticTokens: SparseTokensStore;
private readonly _onDidChangeLanguage: Emitter<IModelLanguageChangedEvent>;
public readonly onDidChangeLanguage: Event<IModelLanguageChangedEvent>;
private readonly _onDidChangeLanguageConfiguration: Emitter<IModelLanguageConfigurationChangedEvent>;
public readonly onDidChangeLanguageConfiguration: Event<IModelLanguageConfigurationChangedEvent>;
private readonly _onDidChangeTokens: Emitter<IModelTokensChangedEvent>;
public readonly onDidChangeTokens: Event<IModelTokensChangedEvent>;
public readonly tokens: IObservable<AbstractSyntaxTokenBackend>;
private readonly _useTreeSitter: IObservable<boolean>;
private readonly _languageIdObs: ISettableObservable<string>;
constructor(
private readonly _textModel: TextModel,
private readonly _bracketPairsTextModelPart: BracketPairsTextModelPart,
private _languageId: string,
private readonly _attachedViews: AttachedViews,
@ILanguageService private readonly _languageService: ILanguageService,
@ILanguageConfigurationService private readonly _languageConfigurationService: ILanguageConfigurationService,
@IInstantiationService private readonly _instantiationService: IInstantiationService,
@ITreeSitterLibraryService private readonly _treeSitterLibraryService: ITreeSitterLibraryService,
) {
super();
this._languageIdObs = observableValue(this, this._languageId);
this._useTreeSitter = derived(this, reader => {
const languageId = this._languageIdObs.read(reader);
return this._treeSitterLibraryService.supportsLanguage(languageId, reader);
});
this.tokens = derived(this, reader => {
let tokens: AbstractSyntaxTokenBackend;
if (this._useTreeSitter.read(reader)) {
tokens = reader.store.add(this._instantiationService.createInstance(
TreeSitterSyntaxTokenBackend,
this._languageIdObs,
this._languageService.languageIdCodec,
this._textModel,
this._attachedViews.visibleLineRanges
));
} else {
tokens = reader.store.add(new TokenizerSyntaxTokenBackend(this._languageService.languageIdCodec, this._textModel, () => this._languageId, this._attachedViews));
}
reader.store.add(tokens.onDidChangeTokens(e => {
this._emitModelTokensChangedEvent(e);
}));
reader.store.add(tokens.onDidChangeBackgroundTokenizationState(e => {
this._bracketPairsTextModelPart.handleDidChangeBackgroundTokenizationState();
}));
return tokens;
});
let hadTokens = false;
this.tokens.recomputeInitiallyAndOnChange(this._store, value => {
if (hadTokens) {
// We need to reset the tokenization, as the new token provider otherwise won't have a chance to provide tokens until some action happens in the editor.
// TODO@hediet: Look into why this is needed.
value.todo_resetTokenization();
}
hadTokens = true;
});
this._semanticTokens = new SparseTokensStore(this._languageService.languageIdCodec);
this._onDidChangeLanguage = this._register(new Emitter<IModelLanguageChangedEvent>());
this.onDidChangeLanguage = this._onDidChangeLanguage.event;
this._onDidChangeLanguageConfiguration = this._register(new Emitter<IModelLanguageConfigurationChangedEvent>());
this.onDidChangeLanguageConfiguration = this._onDidChangeLanguageConfiguration.event;
this._onDidChangeTokens = this._register(new Emitter<IModelTokensChangedEvent>());
this.onDidChangeTokens = this._onDidChangeTokens.event;
}
_hasListeners(): boolean {
return (this._onDidChangeLanguage.hasListeners()
|| this._onDidChangeLanguageConfiguration.hasListeners()
|| this._onDidChangeTokens.hasListeners());
}
public handleLanguageConfigurationServiceChange(e: LanguageConfigurationServiceChangeEvent): void {
if (e.affects(this._languageId)) {
this._onDidChangeLanguageConfiguration.fire({});
}
}
public handleDidChangeContent(e: IModelContentChangedEvent): void {
if (e.isFlush) {
this._semanticTokens.flush();
} else if (!e.isEolChange) { // We don't have to do anything on an EOL change
for (const c of e.changes) {
const [eolCount, firstLineLength, lastLineLength] = countEOL(c.text);
this._semanticTokens.acceptEdit(
c.range,
eolCount,
firstLineLength,
lastLineLength,
c.text.length > 0 ? c.text.charCodeAt(0) : CharCode.Null
);
}
}
this.tokens.get().handleDidChangeContent(e);
}
public handleDidChangeAttached(): void {
this.tokens.get().handleDidChangeAttached();
}
/**
* Includes grammar and semantic tokens.
*/
public getLineTokens(lineNumber: number): LineTokens {
this.validateLineNumber(lineNumber);
const syntacticTokens = this.tokens.get().getLineTokens(lineNumber);
return this._semanticTokens.addSparseTokens(lineNumber, syntacticTokens);
}
private _emitModelTokensChangedEvent(e: IModelTokensChangedEvent): void {
if (!this._textModel._isDisposing()) {
this._bracketPairsTextModelPart.handleDidChangeTokens(e);
this._onDidChangeTokens.fire(e);
}
}
// #region Grammar Tokens
private validateLineNumber(lineNumber: number): void {
if (lineNumber < 1 || lineNumber > this._textModel.getLineCount()) {
throw new BugIndicatingError('Illegal value for lineNumber');
}
}
public get hasTokens(): boolean {
return this.tokens.get().hasTokens;
}
public resetTokenization() {
this.tokens.get().todo_resetTokenization();
}
public get backgroundTokenizationState() {
return this.tokens.get().backgroundTokenizationState;
}
public forceTokenization(lineNumber: number): void {
this.validateLineNumber(lineNumber);
this.tokens.get().forceTokenization(lineNumber);
}
public hasAccurateTokensForLine(lineNumber: number): boolean {
this.validateLineNumber(lineNumber);
return this.tokens.get().hasAccurateTokensForLine(lineNumber);
}
public isCheapToTokenize(lineNumber: number): boolean {
this.validateLineNumber(lineNumber);
return this.tokens.get().isCheapToTokenize(lineNumber);
}
public tokenizeIfCheap(lineNumber: number): void {
this.validateLineNumber(lineNumber);
this.tokens.get().tokenizeIfCheap(lineNumber);
}
public getTokenTypeIfInsertingCharacter(lineNumber: number, column: number, character: string): StandardTokenType {
return this.tokens.get().getTokenTypeIfInsertingCharacter(lineNumber, column, character);
}
public tokenizeLinesAt(lineNumber: number, lines: string[]): LineTokens[] | null {
return this.tokens.get().tokenizeLinesAt(lineNumber, lines);
}
// #endregion
// #region Semantic Tokens
public setSemanticTokens(tokens: SparseMultilineTokens[] | null, isComplete: boolean): void {
this._semanticTokens.set(tokens, isComplete, this._textModel);
this._emitModelTokensChangedEvent({
semanticTokensApplied: tokens !== null,
ranges: [{ fromLineNumber: 1, toLineNumber: this._textModel.getLineCount() }],
});
}
public hasCompleteSemanticTokens(): boolean {
return this._semanticTokens.isComplete();
}
public hasSomeSemanticTokens(): boolean {
return !this._semanticTokens.isEmpty();
}
public setPartialSemanticTokens(range: Range, tokens: SparseMultilineTokens[]): void {
if (this.hasCompleteSemanticTokens()) {
return;
}
const changedRange = this._textModel.validateRange(
this._semanticTokens.setPartial(range, tokens)
);
this._emitModelTokensChangedEvent({
semanticTokensApplied: true,
ranges: [
{
fromLineNumber: changedRange.startLineNumber,
toLineNumber: changedRange.endLineNumber,
},
],
});
}
// #endregion
// #region Utility Methods
public getWordAtPosition(_position: IPosition): IWordAtPosition | null {
this.assertNotDisposed();
const position = this._textModel.validatePosition(_position);
const lineContent = this._textModel.getLineContent(position.lineNumber);
const lineTokens = this.getLineTokens(position.lineNumber);
const tokenIndex = lineTokens.findTokenIndexAtOffset(position.column - 1);
// (1). First try checking right biased word
const [rbStartOffset, rbEndOffset] = TokenizationTextModelPart._findLanguageBoundaries(lineTokens, tokenIndex);
const rightBiasedWord = getWordAtText(
position.column,
this.getLanguageConfiguration(lineTokens.getLanguageId(tokenIndex)).getWordDefinition(),
lineContent.substring(rbStartOffset, rbEndOffset),
rbStartOffset
);
// Make sure the result touches the original passed in position
if (
rightBiasedWord &&
rightBiasedWord.startColumn <= _position.column &&
_position.column <= rightBiasedWord.endColumn
) {
return rightBiasedWord;
}
// (2). Else, if we were at a language boundary, check the left biased word
if (tokenIndex > 0 && rbStartOffset === position.column - 1) {
// edge case, where `position` sits between two tokens belonging to two different languages
const [lbStartOffset, lbEndOffset] = TokenizationTextModelPart._findLanguageBoundaries(
lineTokens,
tokenIndex - 1
);
const leftBiasedWord = getWordAtText(
position.column,
this.getLanguageConfiguration(lineTokens.getLanguageId(tokenIndex - 1)).getWordDefinition(),
lineContent.substring(lbStartOffset, lbEndOffset),
lbStartOffset
);
// Make sure the result touches the original passed in position
if (
leftBiasedWord &&
leftBiasedWord.startColumn <= _position.column &&
_position.column <= leftBiasedWord.endColumn
) {
return leftBiasedWord;
}
}
return null;
}
private getLanguageConfiguration(languageId: string): ResolvedLanguageConfiguration {
return this._languageConfigurationService.getLanguageConfiguration(languageId);
}
private static _findLanguageBoundaries(lineTokens: LineTokens, tokenIndex: number): [number, number] {
const languageId = lineTokens.getLanguageId(tokenIndex);
// go left until a different language is hit
let startOffset = 0;
for (let i = tokenIndex; i >= 0 && lineTokens.getLanguageId(i) === languageId; i--) {
startOffset = lineTokens.getStartOffset(i);
}
// go right until a different language is hit
let endOffset = lineTokens.getLineContent().length;
for (
let i = tokenIndex, tokenCount = lineTokens.getCount();
i < tokenCount && lineTokens.getLanguageId(i) === languageId;
i++
) {
endOffset = lineTokens.getEndOffset(i);
}
return [startOffset, endOffset];
}
public getWordUntilPosition(position: IPosition): IWordAtPosition {
const wordAtPosition = this.getWordAtPosition(position);
if (!wordAtPosition) {
return { word: '', startColumn: position.column, endColumn: position.column, };
}
return {
word: wordAtPosition.word.substr(0, position.column - wordAtPosition.startColumn),
startColumn: wordAtPosition.startColumn,
endColumn: position.column,
};
}
// #endregion
// #region Language Id handling
public getLanguageId(): string {
return this._languageId;
}
public getLanguageIdAtPosition(lineNumber: number, column: number): string {
const position = this._textModel.validatePosition(new Position(lineNumber, column));
const lineTokens = this.getLineTokens(position.lineNumber);
return lineTokens.getLanguageId(lineTokens.findTokenIndexAtOffset(position.column - 1));
}
public setLanguageId(languageId: string, source: string = 'api'): void {
if (this._languageId === languageId) {
// There's nothing to do
return;
}
const e: IModelLanguageChangedEvent = {
oldLanguage: this._languageId,
newLanguage: languageId,
source
};
this._languageId = languageId;
this._languageIdObs.set(languageId, undefined);
this._bracketPairsTextModelPart.handleDidChangeLanguage(e);
this._onDidChangeLanguage.fire(e);
this._onDidChangeLanguageConfiguration.fire({});
}
// #endregion
}

View File

@ -0,0 +1,313 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { onUnexpectedError } from '../../../../base/common/errors.js';
import { Emitter, Event } from '../../../../base/common/event.js';
import { MutableDisposable, DisposableMap } from '../../../../base/common/lifecycle.js';
import { countEOL } from '../../core/misc/eolCounter.js';
import { Position } from '../../core/position.js';
import { LineRange } from '../../core/ranges/lineRange.js';
import { StandardTokenType } from '../../encodedTokenAttributes.js';
import { IBackgroundTokenizer, IState, ILanguageIdCodec, TokenizationRegistry, ITokenizationSupport, IBackgroundTokenizationStore } from '../../languages.js';
import { IAttachedView } from '../../model.js';
import { IModelContentChangedEvent } from '../../textModelEvents.js';
import { BackgroundTokenizationState } from '../../tokenizationTextModelPart.js';
import { ContiguousMultilineTokens } from '../../tokens/contiguousMultilineTokens.js';
import { ContiguousMultilineTokensBuilder } from '../../tokens/contiguousMultilineTokensBuilder.js';
import { ContiguousTokensStore } from '../../tokens/contiguousTokensStore.js';
import { LineTokens } from '../../tokens/lineTokens.js';
import { TextModel } from '../textModel.js';
import { TokenizerWithStateStoreAndTextModel, DefaultBackgroundTokenizer, TrackingTokenizationStateStore } from '../textModelTokens.js';
import { AbstractSyntaxTokenBackend, AttachedViewHandler, AttachedViews } from './abstractSyntaxTokenBackend.js';
/** For TextMate */
export class TokenizerSyntaxTokenBackend extends AbstractSyntaxTokenBackend {
private _tokenizer: TokenizerWithStateStoreAndTextModel | null = null;
protected _backgroundTokenizationState: BackgroundTokenizationState = BackgroundTokenizationState.InProgress;
protected readonly _onDidChangeBackgroundTokenizationState: Emitter<void> = this._register(new Emitter<void>());
public readonly onDidChangeBackgroundTokenizationState: Event<void> = this._onDidChangeBackgroundTokenizationState.event;
private _defaultBackgroundTokenizer: DefaultBackgroundTokenizer | null = null;
private readonly _backgroundTokenizer = this._register(new MutableDisposable<IBackgroundTokenizer>());
private readonly _tokens = new ContiguousTokensStore(this._languageIdCodec);
private _debugBackgroundTokens: ContiguousTokensStore | undefined;
private _debugBackgroundStates: TrackingTokenizationStateStore<IState> | undefined;
private readonly _debugBackgroundTokenizer = this._register(new MutableDisposable<IBackgroundTokenizer>());
private readonly _attachedViewStates = this._register(new DisposableMap<IAttachedView, AttachedViewHandler>());
constructor(
languageIdCodec: ILanguageIdCodec,
textModel: TextModel,
private readonly getLanguageId: () => string,
attachedViews: AttachedViews,
) {
super(languageIdCodec, textModel);
this._register(TokenizationRegistry.onDidChange((e) => {
const languageId = this.getLanguageId();
if (e.changedLanguages.indexOf(languageId) === -1) {
return;
}
this.todo_resetTokenization();
}));
this.todo_resetTokenization();
this._register(attachedViews.onDidChangeVisibleRanges(({ view, state }) => {
if (state) {
let existing = this._attachedViewStates.get(view);
if (!existing) {
existing = new AttachedViewHandler(() => this.refreshRanges(existing!.lineRanges));
this._attachedViewStates.set(view, existing);
}
existing.handleStateChange(state);
} else {
this._attachedViewStates.deleteAndDispose(view);
}
}));
}
public todo_resetTokenization(fireTokenChangeEvent: boolean = true): void {
this._tokens.flush();
this._debugBackgroundTokens?.flush();
if (this._debugBackgroundStates) {
this._debugBackgroundStates = new TrackingTokenizationStateStore(this._textModel.getLineCount());
}
if (fireTokenChangeEvent) {
this._onDidChangeTokens.fire({
semanticTokensApplied: false,
ranges: [
{
fromLineNumber: 1,
toLineNumber: this._textModel.getLineCount(),
},
],
});
}
const initializeTokenization = (): [ITokenizationSupport, IState] | [null, null] => {
if (this._textModel.isTooLargeForTokenization()) {
return [null, null];
}
const tokenizationSupport = TokenizationRegistry.get(this.getLanguageId());
if (!tokenizationSupport) {
return [null, null];
}
let initialState: IState;
try {
initialState = tokenizationSupport.getInitialState();
} catch (e) {
onUnexpectedError(e);
return [null, null];
}
return [tokenizationSupport, initialState];
};
const [tokenizationSupport, initialState] = initializeTokenization();
if (tokenizationSupport && initialState) {
this._tokenizer = new TokenizerWithStateStoreAndTextModel(this._textModel.getLineCount(), tokenizationSupport, this._textModel, this._languageIdCodec);
} else {
this._tokenizer = null;
}
this._backgroundTokenizer.clear();
this._defaultBackgroundTokenizer = null;
if (this._tokenizer) {
const b: IBackgroundTokenizationStore = {
setTokens: (tokens) => {
this.setTokens(tokens);
},
backgroundTokenizationFinished: () => {
if (this._backgroundTokenizationState === BackgroundTokenizationState.Completed) {
// We already did a full tokenization and don't go back to progressing.
return;
}
const newState = BackgroundTokenizationState.Completed;
this._backgroundTokenizationState = newState;
this._onDidChangeBackgroundTokenizationState.fire();
},
setEndState: (lineNumber, state) => {
if (!this._tokenizer) { return; }
const firstInvalidEndStateLineNumber = this._tokenizer.store.getFirstInvalidEndStateLineNumber();
// Don't accept states for definitely valid states, the renderer is ahead of the worker!
if (firstInvalidEndStateLineNumber !== null && lineNumber >= firstInvalidEndStateLineNumber) {
this._tokenizer?.store.setEndState(lineNumber, state);
}
},
};
if (tokenizationSupport && tokenizationSupport.createBackgroundTokenizer && !tokenizationSupport.backgroundTokenizerShouldOnlyVerifyTokens) {
this._backgroundTokenizer.value = tokenizationSupport.createBackgroundTokenizer(this._textModel, b);
}
if (!this._backgroundTokenizer.value && !this._textModel.isTooLargeForTokenization()) {
this._backgroundTokenizer.value = this._defaultBackgroundTokenizer =
new DefaultBackgroundTokenizer(this._tokenizer, b);
this._defaultBackgroundTokenizer.handleChanges();
}
if (tokenizationSupport?.backgroundTokenizerShouldOnlyVerifyTokens && tokenizationSupport.createBackgroundTokenizer) {
this._debugBackgroundTokens = new ContiguousTokensStore(this._languageIdCodec);
this._debugBackgroundStates = new TrackingTokenizationStateStore(this._textModel.getLineCount());
this._debugBackgroundTokenizer.clear();
this._debugBackgroundTokenizer.value = tokenizationSupport.createBackgroundTokenizer(this._textModel, {
setTokens: (tokens) => {
this._debugBackgroundTokens?.setMultilineTokens(tokens, this._textModel);
},
backgroundTokenizationFinished() {
// NO OP
},
setEndState: (lineNumber, state) => {
this._debugBackgroundStates?.setEndState(lineNumber, state);
},
});
} else {
this._debugBackgroundTokens = undefined;
this._debugBackgroundStates = undefined;
this._debugBackgroundTokenizer.value = undefined;
}
}
this.refreshAllVisibleLineTokens();
}
public handleDidChangeAttached() {
this._defaultBackgroundTokenizer?.handleChanges();
}
public handleDidChangeContent(e: IModelContentChangedEvent): void {
if (e.isFlush) {
// Don't fire the event, as the view might not have got the text change event yet
this.todo_resetTokenization(false);
} else if (!e.isEolChange) { // We don't have to do anything on an EOL change
for (const c of e.changes) {
const [eolCount, firstLineLength] = countEOL(c.text);
this._tokens.acceptEdit(c.range, eolCount, firstLineLength);
this._debugBackgroundTokens?.acceptEdit(c.range, eolCount, firstLineLength);
}
this._debugBackgroundStates?.acceptChanges(e.changes);
if (this._tokenizer) {
this._tokenizer.store.acceptChanges(e.changes);
}
this._defaultBackgroundTokenizer?.handleChanges();
}
}
private setTokens(tokens: ContiguousMultilineTokens[]): { changes: { fromLineNumber: number; toLineNumber: number }[] } {
const { changes } = this._tokens.setMultilineTokens(tokens, this._textModel);
if (changes.length > 0) {
this._onDidChangeTokens.fire({ semanticTokensApplied: false, ranges: changes, });
}
return { changes: changes };
}
private refreshAllVisibleLineTokens(): void {
const ranges = LineRange.joinMany([...this._attachedViewStates].map(([_, s]) => s.lineRanges));
this.refreshRanges(ranges);
}
private refreshRanges(ranges: readonly LineRange[]): void {
for (const range of ranges) {
this.refreshRange(range.startLineNumber, range.endLineNumberExclusive - 1);
}
}
private refreshRange(startLineNumber: number, endLineNumber: number): void {
if (!this._tokenizer) {
return;
}
startLineNumber = Math.max(1, Math.min(this._textModel.getLineCount(), startLineNumber));
endLineNumber = Math.min(this._textModel.getLineCount(), endLineNumber);
const builder = new ContiguousMultilineTokensBuilder();
const { heuristicTokens } = this._tokenizer.tokenizeHeuristically(builder, startLineNumber, endLineNumber);
const changedTokens = this.setTokens(builder.finalize());
if (heuristicTokens) {
// We overrode tokens with heuristically computed ones.
// Because old states might get reused (thus stopping invalidation),
// we have to explicitly request the tokens for the changed ranges again.
for (const c of changedTokens.changes) {
this._backgroundTokenizer.value?.requestTokens(c.fromLineNumber, c.toLineNumber + 1);
}
}
this._defaultBackgroundTokenizer?.checkFinished();
}
public forceTokenization(lineNumber: number): void {
const builder = new ContiguousMultilineTokensBuilder();
this._tokenizer?.updateTokensUntilLine(builder, lineNumber);
this.setTokens(builder.finalize());
this._defaultBackgroundTokenizer?.checkFinished();
}
public hasAccurateTokensForLine(lineNumber: number): boolean {
if (!this._tokenizer) {
return true;
}
return this._tokenizer.hasAccurateTokensForLine(lineNumber);
}
public isCheapToTokenize(lineNumber: number): boolean {
if (!this._tokenizer) {
return true;
}
return this._tokenizer.isCheapToTokenize(lineNumber);
}
public getLineTokens(lineNumber: number): LineTokens {
const lineText = this._textModel.getLineContent(lineNumber);
const result = this._tokens.getTokens(
this._textModel.getLanguageId(),
lineNumber - 1,
lineText
);
if (this._debugBackgroundTokens && this._debugBackgroundStates && this._tokenizer) {
if (this._debugBackgroundStates.getFirstInvalidEndStateLineNumberOrMax() > lineNumber && this._tokenizer.store.getFirstInvalidEndStateLineNumberOrMax() > lineNumber) {
const backgroundResult = this._debugBackgroundTokens.getTokens(
this._textModel.getLanguageId(),
lineNumber - 1,
lineText
);
if (!result.equals(backgroundResult) && this._debugBackgroundTokenizer.value?.reportMismatchingTokens) {
this._debugBackgroundTokenizer.value.reportMismatchingTokens(lineNumber);
}
}
}
return result;
}
public getTokenTypeIfInsertingCharacter(lineNumber: number, column: number, character: string): StandardTokenType {
if (!this._tokenizer) {
return StandardTokenType.Other;
}
const position = this._textModel.validatePosition(new Position(lineNumber, column));
this.forceTokenization(position.lineNumber);
return this._tokenizer.getTokenTypeIfInsertingCharacter(position, character);
}
public tokenizeLinesAt(lineNumber: number, lines: string[]): LineTokens[] | null {
if (!this._tokenizer) {
return null;
}
this.forceTokenization(lineNumber);
return this._tokenizer.tokenizeLinesAt(lineNumber, lines);
}
public get hasTokens(): boolean {
return this._tokens.hasTokens;
}
}

View File

@ -2,9 +2,9 @@
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import type * as Parser from '@vscode/tree-sitter-wasm';
import type * as TreeSitter from '@vscode/tree-sitter-wasm';
export function gotoNextSibling(newCursor: Parser.TreeCursor, oldCursor: Parser.TreeCursor) {
export function gotoNextSibling(newCursor: TreeSitter.TreeCursor, oldCursor: TreeSitter.TreeCursor) {
const n = newCursor.gotoNextSibling();
const o = oldCursor.gotoNextSibling();
if (n !== o) {
@ -13,7 +13,7 @@ export function gotoNextSibling(newCursor: Parser.TreeCursor, oldCursor: Parser.
return n && o;
}
export function gotoParent(newCursor: Parser.TreeCursor, oldCursor: Parser.TreeCursor) {
export function gotoParent(newCursor: TreeSitter.TreeCursor, oldCursor: TreeSitter.TreeCursor) {
const n = newCursor.gotoParent();
const o = oldCursor.gotoParent();
if (n !== o) {
@ -22,7 +22,7 @@ export function gotoParent(newCursor: Parser.TreeCursor, oldCursor: Parser.TreeC
return n && o;
}
export function gotoNthChild(newCursor: Parser.TreeCursor, oldCursor: Parser.TreeCursor, index: number) {
export function gotoNthChild(newCursor: TreeSitter.TreeCursor, oldCursor: TreeSitter.TreeCursor, index: number) {
const n = newCursor.gotoFirstChild();
const o = oldCursor.gotoFirstChild();
if (n !== o) {
@ -44,7 +44,7 @@ export function gotoNthChild(newCursor: Parser.TreeCursor, oldCursor: Parser.Tre
return n && o;
}
export function nextSiblingOrParentSibling(newCursor: Parser.TreeCursor, oldCursor: Parser.TreeCursor) {
export function nextSiblingOrParentSibling(newCursor: TreeSitter.TreeCursor, oldCursor: TreeSitter.TreeCursor) {
do {
if (newCursor.currentNode.nextSibling) {
return gotoNextSibling(newCursor, oldCursor);
@ -56,7 +56,7 @@ export function nextSiblingOrParentSibling(newCursor: Parser.TreeCursor, oldCurs
return false;
}
export function getClosestPreviousNodes(cursor: Parser.TreeCursor, tree: Parser.Tree): Parser.Node | undefined {
export function getClosestPreviousNodes(cursor: TreeSitter.TreeCursor, tree: TreeSitter.Tree): TreeSitter.Node | undefined {
// Go up parents until the end of the parent is before the start of the current.
const findPrev = tree.walk();
findPrev.resetTo(cursor);

View File

@ -3,8 +3,8 @@
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { IDisposable } from '../../../base/common/lifecycle.js';
import { ITextModel } from '../model.js';
import { IDisposable } from '../../../../../base/common/lifecycle.js';
import { ITextModel } from '../../../model.js';
class ListNode implements IDisposable {
parent?: ListNode;

View File

@ -0,0 +1,212 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import type * as TreeSitter from '@vscode/tree-sitter-wasm';
import { Emitter, Event } from '../../../../../base/common/event.js';
import { toDisposable } from '../../../../../base/common/lifecycle.js';
import { StandardTokenType } from '../../../encodedTokenAttributes.js';
import { ILanguageIdCodec } from '../../../languages.js';
import { IModelContentChangedEvent } from '../../../textModelEvents.js';
import { BackgroundTokenizationState } from '../../../tokenizationTextModelPart.js';
import { LineTokens } from '../../../tokens/lineTokens.js';
import { TextModel } from '../../textModel.js';
import { AbstractSyntaxTokenBackend } from '../abstractSyntaxTokenBackend.js';
import { autorun, derived, IObservable, ObservablePromise } from '../../../../../base/common/observable.js';
import { TreeSitterTree } from './treeSitterTree.js';
import { IInstantiationService } from '../../../../../platform/instantiation/common/instantiation.js';
import { TreeSitterTokenizationImpl } from './treeSitterTokenizationImpl.js';
import { ITreeSitterLibraryService } from '../../../services/treeSitter/treeSitterLibraryService.js';
import { LineRange } from '../../../core/ranges/lineRange.js';
export class TreeSitterSyntaxTokenBackend extends AbstractSyntaxTokenBackend {
protected _backgroundTokenizationState: BackgroundTokenizationState = BackgroundTokenizationState.InProgress;
protected readonly _onDidChangeBackgroundTokenizationState: Emitter<void> = this._register(new Emitter<void>());
public readonly onDidChangeBackgroundTokenizationState: Event<void> = this._onDidChangeBackgroundTokenizationState.event;
private readonly _tree: IObservable<TreeSitterTree | undefined>;
private readonly _tokenizationImpl: IObservable<TreeSitterTokenizationImpl | undefined>;
constructor(
private readonly _languageIdObs: IObservable<string>,
languageIdCodec: ILanguageIdCodec,
textModel: TextModel,
visibleLineRanges: IObservable<readonly LineRange[]>,
@ITreeSitterLibraryService private readonly _treeSitterLibraryService: ITreeSitterLibraryService,
@IInstantiationService private readonly _instantiationService: IInstantiationService
) {
super(languageIdCodec, textModel);
const parserClassPromise = new ObservablePromise(this._treeSitterLibraryService.getParserClass());
const parserClassObs = derived(this, reader => {
const parser = parserClassPromise.promiseResult?.read(reader)?.getDataOrThrow();
return parser;
});
this._tree = derived(this, reader => {
const parserClass = parserClassObs.read(reader);
if (!parserClass) {
return undefined;
}
const currentLanguage = this._languageIdObs.read(reader);
const treeSitterLang = this._treeSitterLibraryService.getLanguage(currentLanguage, reader);
if (!treeSitterLang) {
return undefined;
}
const parser = new parserClass();
reader.store.add(toDisposable(() => {
parser.delete();
}));
parser.setLanguage(treeSitterLang);
const queries = this._treeSitterLibraryService.getInjectionQueries(currentLanguage, reader);
if (queries === undefined) {
return undefined;
}
return reader.store.add(this._instantiationService.createInstance(TreeSitterTree, currentLanguage, undefined, parser, parserClass, /*queries, */this._textModel));
});
this._tokenizationImpl = derived(this, reader => {
const treeModel = this._tree.read(reader);
if (!treeModel) {
return undefined;
}
const queries = this._treeSitterLibraryService.getHighlightingQueries(treeModel.languageId, reader);
if (!queries) {
return undefined;
}
return reader.store.add(this._instantiationService.createInstance(TreeSitterTokenizationImpl, treeModel, queries, this._languageIdCodec, visibleLineRanges));
});
this._register(autorun(reader => {
const tokModel = this._tokenizationImpl.read(reader);
if (!tokModel) {
return;
}
reader.store.add(tokModel.onDidChangeTokens((e) => {
this._onDidChangeTokens.fire(e.changes);
}));
reader.store.add(tokModel.onDidChangeBackgroundTokenization(e => {
this._backgroundTokenizationState = BackgroundTokenizationState.Completed;
this._onDidChangeBackgroundTokenizationState.fire();
}));
}));
}
get tree(): IObservable<TreeSitterTree | undefined> {
return this._tree;
}
get tokenizationImpl(): IObservable<TreeSitterTokenizationImpl | undefined> {
return this._tokenizationImpl;
}
public getLineTokens(lineNumber: number): LineTokens {
const model = this._tokenizationImpl.get();
if (!model) {
const content = this._textModel.getLineContent(lineNumber);
return LineTokens.createEmpty(content, this._languageIdCodec);
}
return model.getLineTokens(lineNumber);
}
public todo_resetTokenization(fireTokenChangeEvent: boolean = true): void {
if (fireTokenChangeEvent) {
this._onDidChangeTokens.fire({
semanticTokensApplied: false,
ranges: [
{
fromLineNumber: 1,
toLineNumber: this._textModel.getLineCount(),
},
],
});
}
}
public override handleDidChangeAttached(): void {
// TODO @alexr00 implement for background tokenization
}
public override handleDidChangeContent(e: IModelContentChangedEvent): void {
if (e.isFlush) {
// Don't fire the event, as the view might not have got the text change event yet
this.todo_resetTokenization(false);
} else {
const model = this._tokenizationImpl.get();
model?.handleContentChanged(e);
}
const treeModel = this._tree.get();
treeModel?.handleContentChange(e);
}
public override forceTokenization(lineNumber: number): void {
const model = this._tokenizationImpl.get();
if (!model) {
return;
}
if (!model.hasAccurateTokensForLine(lineNumber)) {
model.tokenizeEncoded(lineNumber);
}
}
public override hasAccurateTokensForLine(lineNumber: number): boolean {
const model = this._tokenizationImpl.get();
if (!model) {
return false;
}
return model.hasAccurateTokensForLine(lineNumber);
}
public override isCheapToTokenize(lineNumber: number): boolean {
// TODO @alexr00 determine what makes it cheap to tokenize?
return true;
}
public override getTokenTypeIfInsertingCharacter(lineNumber: number, column: number, character: string): StandardTokenType {
// TODO @alexr00 implement once we have custom parsing and don't just feed in the whole text model value
return StandardTokenType.Other;
}
public override tokenizeLinesAt(lineNumber: number, lines: string[]): LineTokens[] | null {
const model = this._tokenizationImpl.get();
if (!model) {
return null;
}
return model.tokenizeLinesAt(lineNumber, lines);
}
public override get hasTokens(): boolean {
const model = this._tokenizationImpl.get();
if (!model) {
return false;
}
return model.hasTokens();
}
}
export function rangesEqual(a: TreeSitter.Range, b: TreeSitter.Range) {
return (a.startPosition.row === b.startPosition.row)
&& (a.startPosition.column === b.startPosition.column)
&& (a.endPosition.row === b.endPosition.row)
&& (a.endPosition.column === b.endPosition.column)
&& (a.startIndex === b.startIndex)
&& (a.endIndex === b.endIndex);
}
export function rangesIntersect(a: TreeSitter.Range, b: TreeSitter.Range) {
return (a.startIndex <= b.startIndex && a.endIndex >= b.startIndex) ||
(b.startIndex <= a.startIndex && b.endIndex >= a.startIndex);
}

View File

@ -0,0 +1,815 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { Emitter, Event } from '../../../../../base/common/event.js';
import { Disposable } from '../../../../../base/common/lifecycle.js';
import { setTimeout0 } from '../../../../../base/common/platform.js';
import { StopWatch } from '../../../../../base/common/stopwatch.js';
import { LanguageId } from '../../../encodedTokenAttributes.js';
import { ILanguageIdCodec, QueryCapture } from '../../../languages.js';
import { IModelContentChangedEvent, IModelTokensChangedEvent } from '../../../textModelEvents.js';
import { findLikelyRelevantLines } from '../../textModelTokens.js';
import { TokenStore, TokenUpdate, TokenQuality } from './tokenStore.js';
import { TreeSitterTree, RangeChange, RangeWithOffsets } from './treeSitterTree.js';
import type * as TreeSitter from '@vscode/tree-sitter-wasm';
import { autorun, autorunHandleChanges, IObservable, recordChanges, runOnChange } from '../../../../../base/common/observable.js';
import { LineRange } from '../../../core/ranges/lineRange.js';
import { LineTokens } from '../../../tokens/lineTokens.js';
import { Position } from '../../../core/position.js';
import { Range } from '../../../core/range.js';
import { isDefined } from '../../../../../base/common/types.js';
import { ITreeSitterThemeService } from '../../../services/treeSitter/treeSitterThemeService.js';
import { BugIndicatingError } from '../../../../../base/common/errors.js';
export class TreeSitterTokenizationImpl extends Disposable {
private readonly _tokenStore: TokenStore;
private _accurateVersion: number;
private _guessVersion: number;
private readonly _onDidChangeTokens: Emitter<{ changes: IModelTokensChangedEvent }> = this._register(new Emitter());
public readonly onDidChangeTokens: Event<{ changes: IModelTokensChangedEvent }> = this._onDidChangeTokens.event;
private readonly _onDidCompleteBackgroundTokenization: Emitter<void> = this._register(new Emitter());
public readonly onDidChangeBackgroundTokenization: Event<void> = this._onDidCompleteBackgroundTokenization.event;
private _encodedLanguageId: LanguageId;
private get _textModel() {
return this._tree.textModel;
}
constructor(
private readonly _tree: TreeSitterTree,
private readonly _highlightingQueries: TreeSitter.Query,
private readonly _languageIdCodec: ILanguageIdCodec,
private readonly _visibleLineRanges: IObservable<readonly LineRange[]>,
@ITreeSitterThemeService private readonly _treeSitterThemeService: ITreeSitterThemeService,
) {
super();
this._encodedLanguageId = this._languageIdCodec.encodeLanguageId(this._tree.languageId);
this._register(runOnChange(this._treeSitterThemeService.onChange, () => {
this._updateTheme();
}));
this._tokenStore = this._register(new TokenStore(this._textModel));
this._accurateVersion = this._textModel.getVersionId();
this._guessVersion = this._textModel.getVersionId();
this._tokenStore.buildStore(this._createEmptyTokens(), TokenQuality.None);
this._register(autorun(reader => {
const visibleLineRanges = this._visibleLineRanges.read(reader);
this._parseAndTokenizeViewPort(visibleLineRanges);
}));
this._register(autorunHandleChanges({
owner: this,
changeTracker: recordChanges({ tree: this._tree.tree }),
}, (reader, ctx) => {
const changeEvent = ctx.changes.at(0)?.change;
if (ctx.changes.length > 1) {
throw new BugIndicatingError('The tree changed twice in one transaction. This is currently not supported and should not happen.');
}
if (!changeEvent) {
if (ctx.tree) {
this._firstTreeUpdate(this._tree.treeLastParsedVersion.read(reader));
}
} else {
if (this.hasTokens()) {
// Mark the range for refresh immediately
for (const range of changeEvent.ranges) {
this._markForRefresh(range.newRange);
}
}
// First time we see a tree we need to build a token store.
if (!this.hasTokens()) {
this._firstTreeUpdate(changeEvent.versionId);
} else {
this._handleTreeUpdate(changeEvent.ranges, changeEvent.versionId);
}
}
}));
}
public handleContentChanged(e: IModelContentChangedEvent): void {
this._guessVersion = e.versionId;
for (const change of e.changes) {
if (change.text.length > change.rangeLength) {
// If possible, use the token before the change as the starting point for the new token.
// This is more likely to let the new text be the correct color as typeing is usually at the end of the token.
const offset = change.rangeOffset > 0 ? change.rangeOffset - 1 : change.rangeOffset;
const oldToken = this._tokenStore.getTokenAt(offset);
let newToken: TokenUpdate;
if (oldToken) {
// Insert. Just grow the token at this position to include the insert.
newToken = { startOffsetInclusive: oldToken.startOffsetInclusive, length: oldToken.length + change.text.length - change.rangeLength, token: oldToken.token };
// Also mark tokens that are in the range of the change as needing a refresh.
this._tokenStore.markForRefresh(offset, change.rangeOffset + (change.text.length > change.rangeLength ? change.text.length : change.rangeLength));
} else {
// The document got larger and the change is at the end of the document.
newToken = { startOffsetInclusive: offset, length: change.text.length, token: 0 };
}
this._tokenStore.update(oldToken?.length ?? 0, [newToken], TokenQuality.EditGuess);
} else if (change.text.length < change.rangeLength) {
// Delete. Delete the tokens at the corresponding range.
const deletedCharCount = change.rangeLength - change.text.length;
this._tokenStore.delete(deletedCharCount, change.rangeOffset);
}
}
}
public getLineTokens(lineNumber: number) {
const content = this._textModel.getLineContent(lineNumber);
const rawTokens = this.getTokens(lineNumber);
return new LineTokens(rawTokens, content, this._languageIdCodec);
}
private _createEmptyTokens() {
const emptyToken = this._emptyToken();
const modelEndOffset = this._textModel.getValueLength();
const emptyTokens: TokenUpdate[] = [this._emptyTokensForOffsetAndLength(0, modelEndOffset, emptyToken)];
return emptyTokens;
}
private _emptyToken() {
return this._treeSitterThemeService.findMetadata([], this._encodedLanguageId, false, undefined);
}
private _emptyTokensForOffsetAndLength(offset: number, length: number, emptyToken: number): TokenUpdate {
return { token: emptyToken, length: offset + length, startOffsetInclusive: 0 };
}
public hasAccurateTokensForLine(lineNumber: number): boolean {
return this.hasTokens(new Range(lineNumber, 1, lineNumber, this._textModel.getLineMaxColumn(lineNumber)));
}
public tokenizeLinesAt(lineNumber: number, lines: string[]): LineTokens[] | null {
const rawLineTokens = this._guessTokensForLinesContent(lineNumber, lines);
const lineTokens: LineTokens[] = [];
if (!rawLineTokens) {
return null;
}
for (let i = 0; i < rawLineTokens.length; i++) {
lineTokens.push(new LineTokens(rawLineTokens[i], lines[i], this._languageIdCodec));
}
return lineTokens;
}
private _rangeHasTokens(range: Range, minimumTokenQuality: TokenQuality): boolean {
return this._tokenStore.rangeHasTokens(this._textModel.getOffsetAt(range.getStartPosition()), this._textModel.getOffsetAt(range.getEndPosition()), minimumTokenQuality);
}
public hasTokens(accurateForRange?: Range): boolean {
if (!accurateForRange || (this._guessVersion === this._accurateVersion)) {
return true;
}
return !this._tokenStore.rangeNeedsRefresh(this._textModel.getOffsetAt(accurateForRange.getStartPosition()), this._textModel.getOffsetAt(accurateForRange.getEndPosition()));
}
public getTokens(line: number): Uint32Array {
const lineStartOffset = this._textModel.getOffsetAt({ lineNumber: line, column: 1 });
const lineEndOffset = this._textModel.getOffsetAt({ lineNumber: line, column: this._textModel.getLineLength(line) + 1 });
const lineTokens = this._tokenStore.getTokensInRange(lineStartOffset, lineEndOffset);
const result = new Uint32Array(lineTokens.length * 2);
for (let i = 0; i < lineTokens.length; i++) {
result[i * 2] = lineTokens[i].startOffsetInclusive - lineStartOffset + lineTokens[i].length;
result[i * 2 + 1] = lineTokens[i].token;
}
return result;
}
getTokensInRange(range: Range, rangeStartOffset: number, rangeEndOffset: number, captures?: QueryCapture[]): TokenUpdate[] | undefined {
const tokens = captures ? this._tokenizeCapturesWithMetadata(captures, rangeStartOffset, rangeEndOffset) : this._tokenize(range, rangeStartOffset, rangeEndOffset);
if (tokens?.endOffsetsAndMetadata) {
return this._rangeTokensAsUpdates(rangeStartOffset, tokens.endOffsetsAndMetadata);
}
return undefined;
}
private _updateTokensInStore(version: number, updates: { oldRangeLength?: number; newTokens: TokenUpdate[] }[], tokenQuality: TokenQuality): void {
this._accurateVersion = version;
for (const update of updates) {
const lastToken = update.newTokens.length > 0 ? update.newTokens[update.newTokens.length - 1] : undefined;
let oldRangeLength: number;
if (lastToken && (this._guessVersion >= version)) {
oldRangeLength = lastToken.startOffsetInclusive + lastToken.length - update.newTokens[0].startOffsetInclusive;
} else if (update.oldRangeLength) {
oldRangeLength = update.oldRangeLength;
} else {
oldRangeLength = 0;
}
this._tokenStore.update(oldRangeLength, update.newTokens, tokenQuality);
}
}
private _markForRefresh(range: Range): void {
this._tokenStore.markForRefresh(this._textModel.getOffsetAt(range.getStartPosition()), this._textModel.getOffsetAt(range.getEndPosition()));
}
private _getNeedsRefresh(): { range: Range; startOffset: number; endOffset: number }[] {
const needsRefreshOffsetRanges = this._tokenStore.getNeedsRefresh();
if (!needsRefreshOffsetRanges) {
return [];
}
return needsRefreshOffsetRanges.map(range => ({
range: Range.fromPositions(this._textModel.getPositionAt(range.startOffset), this._textModel.getPositionAt(range.endOffset)),
startOffset: range.startOffset,
endOffset: range.endOffset
}));
}
private _parseAndTokenizeViewPort(lineRanges: readonly LineRange[]) {
const viewportRanges = lineRanges.map(r => r.toInclusiveRange()).filter(isDefined);
for (const range of viewportRanges) {
const startOffsetOfRangeInDocument = this._textModel.getOffsetAt(range.getStartPosition());
const endOffsetOfRangeInDocument = this._textModel.getOffsetAt(range.getEndPosition());
const version = this._textModel.getVersionId();
if (this._rangeHasTokens(range, TokenQuality.ViewportGuess)) {
continue;
}
const content = this._textModel.getValueInRange(range);
const tokenUpdates = this._forceParseAndTokenizeContent(range, startOffsetOfRangeInDocument, endOffsetOfRangeInDocument, content, true);
if (!tokenUpdates || this._rangeHasTokens(range, TokenQuality.ViewportGuess)) {
continue;
}
if (tokenUpdates.length === 0) {
continue;
}
const lastToken = tokenUpdates[tokenUpdates.length - 1];
const oldRangeLength = lastToken.startOffsetInclusive + lastToken.length - tokenUpdates[0].startOffsetInclusive;
this._updateTokensInStore(version, [{ newTokens: tokenUpdates, oldRangeLength }], TokenQuality.ViewportGuess);
this._onDidChangeTokens.fire({ changes: { semanticTokensApplied: false, ranges: [{ fromLineNumber: range.startLineNumber, toLineNumber: range.endLineNumber }] } });
}
}
private _guessTokensForLinesContent(lineNumber: number, lines: string[]): Uint32Array[] | undefined {
if (lines.length === 0) {
return undefined;
}
const lineContent = lines.join(this._textModel.getEOL());
const range = new Range(1, 1, lineNumber + lines.length, lines[lines.length - 1].length + 1);
const startOffset = this._textModel.getOffsetAt({ lineNumber, column: 1 });
const tokens = this._forceParseAndTokenizeContent(range, startOffset, startOffset + lineContent.length, lineContent, false);
if (!tokens) {
return undefined;
}
const tokensByLine: Uint32Array[] = new Array(lines.length);
let tokensIndex: number = 0;
let tokenStartOffset = 0;
let lineStartOffset = 0;
for (let i = 0; i < lines.length; i++) {
const tokensForLine: EndOffsetToken[] = [];
let moveToNextLine = false;
for (let j = tokensIndex; (!moveToNextLine && (j < tokens.length)); j++) {
const token = tokens[j];
const lineAdjustedEndOffset = token.endOffset - lineStartOffset;
const lineAdjustedStartOffset = tokenStartOffset - lineStartOffset;
if (lineAdjustedEndOffset <= lines[i].length) {
tokensForLine.push({ endOffset: lineAdjustedEndOffset, metadata: token.metadata });
tokensIndex++;
} else if (lineAdjustedStartOffset < lines[i].length) {
const partialToken: EndOffsetToken = { endOffset: lines[i].length, metadata: token.metadata };
tokensForLine.push(partialToken);
moveToNextLine = true;
} else {
moveToNextLine = true;
}
tokenStartOffset = token.endOffset;
}
tokensByLine[i] = this._endOffsetTokensToUint32Array(tokensForLine);
lineStartOffset += lines[i].length + this._textModel.getEOL().length;
}
return tokensByLine;
}
private _forceParseAndTokenizeContent(range: Range, startOffsetOfRangeInDocument: number, endOffsetOfRangeInDocument: number, content: string, asUpdate: true): TokenUpdate[] | undefined;
private _forceParseAndTokenizeContent(range: Range, startOffsetOfRangeInDocument: number, endOffsetOfRangeInDocument: number, content: string, asUpdate: false): EndOffsetToken[] | undefined;
private _forceParseAndTokenizeContent(range: Range, startOffsetOfRangeInDocument: number, endOffsetOfRangeInDocument: number, content: string, asUpdate: boolean): EndOffsetToken[] | TokenUpdate[] | undefined {
const likelyRelevantLines = findLikelyRelevantLines(this._textModel, range.startLineNumber).likelyRelevantLines;
const likelyRelevantPrefix = likelyRelevantLines.join(this._textModel.getEOL());
const tree = this._tree.createParsedTreeSync(`${likelyRelevantPrefix}${content}`);
if (!tree) {
return;
}
const treeRange = new Range(1, 1, range.endLineNumber - range.startLineNumber + 1 + likelyRelevantLines.length, range.endColumn);
const captures = this.captureAtRange(treeRange);
const tokens = this._tokenizeCapturesWithMetadata(captures, likelyRelevantPrefix.length, endOffsetOfRangeInDocument - startOffsetOfRangeInDocument + likelyRelevantPrefix.length);
tree.delete();
if (!tokens) {
return;
}
if (asUpdate) {
return this._rangeTokensAsUpdates(startOffsetOfRangeInDocument, tokens.endOffsetsAndMetadata, likelyRelevantPrefix.length);
} else {
return tokens.endOffsetsAndMetadata;
}
}
private _firstTreeUpdate(versionId: number) {
return this._setViewPortTokens(versionId);
}
private _setViewPortTokens(versionId: number) {
const rangeChanges = this._visibleLineRanges.get().map<RangeChange | undefined>(lineRange => {
const range = lineRange.toInclusiveRange();
if (!range) { return undefined; }
const newRangeStartOffset = this._textModel.getOffsetAt(range.getStartPosition());
const newRangeEndOffset = this._textModel.getOffsetAt(range.getEndPosition());
return {
newRange: range,
newRangeEndOffset,
newRangeStartOffset,
};
}).filter(isDefined);
return this._handleTreeUpdate(rangeChanges, versionId);
}
/**
* Do not await in this method, it will cause a race
*/
private _handleTreeUpdate(ranges: RangeChange[], versionId: number) {
const rangeChanges: RangeWithOffsets[] = [];
const chunkSize = 1000;
for (let i = 0; i < ranges.length; i++) {
const rangeLinesLength = ranges[i].newRange.endLineNumber - ranges[i].newRange.startLineNumber;
if (rangeLinesLength > chunkSize) {
// Split the range into chunks to avoid long operations
const fullRangeEndLineNumber = ranges[i].newRange.endLineNumber;
let chunkLineStart = ranges[i].newRange.startLineNumber;
let chunkColumnStart = ranges[i].newRange.startColumn;
let chunkLineEnd = chunkLineStart + chunkSize;
do {
const chunkStartingPosition = new Position(chunkLineStart, chunkColumnStart);
const chunkEndColumn = ((chunkLineEnd === ranges[i].newRange.endLineNumber) ? ranges[i].newRange.endColumn : this._textModel.getLineMaxColumn(chunkLineEnd));
const chunkEndPosition = new Position(chunkLineEnd, chunkEndColumn);
const chunkRange = Range.fromPositions(chunkStartingPosition, chunkEndPosition);
rangeChanges.push({
range: chunkRange,
startOffset: this._textModel.getOffsetAt(chunkRange.getStartPosition()),
endOffset: this._textModel.getOffsetAt(chunkRange.getEndPosition())
});
chunkLineStart = chunkLineEnd + 1;
chunkColumnStart = 1;
if (chunkLineEnd < fullRangeEndLineNumber && chunkLineEnd + chunkSize > fullRangeEndLineNumber) {
chunkLineEnd = fullRangeEndLineNumber;
} else {
chunkLineEnd = chunkLineEnd + chunkSize;
}
} while (chunkLineEnd <= fullRangeEndLineNumber);
} else {
// Check that the previous range doesn't overlap
if ((i === 0) || (rangeChanges[i - 1].endOffset < ranges[i].newRangeStartOffset)) {
rangeChanges.push({
range: ranges[i].newRange,
startOffset: ranges[i].newRangeStartOffset,
endOffset: ranges[i].newRangeEndOffset
});
} else if (rangeChanges[i - 1].endOffset < ranges[i].newRangeEndOffset) {
// clip the range to the previous range
const startPosition = this._textModel.getPositionAt(rangeChanges[i - 1].endOffset + 1);
const range = new Range(startPosition.lineNumber, startPosition.column, ranges[i].newRange.endLineNumber, ranges[i].newRange.endColumn);
rangeChanges.push({
range,
startOffset: rangeChanges[i - 1].endOffset + 1,
endOffset: ranges[i].newRangeEndOffset
});
}
}
}
// Get the captures immediately while the text model is correct
const captures = rangeChanges.map(range => this._getCaptures(range.range));
// Don't block
return this._updateTreeForRanges(rangeChanges, versionId, captures).then(() => {
if (!this._textModel.isDisposed() && (this._tree.treeLastParsedVersion.get() === this._textModel.getVersionId())) {
this._refreshNeedsRefresh(versionId);
}
});
}
private async _updateTreeForRanges(rangeChanges: RangeWithOffsets[], versionId: number, captures: QueryCapture[][]) {
let tokenUpdate: { newTokens: TokenUpdate[] } | undefined;
for (let i = 0; i < rangeChanges.length; i++) {
if (!this._textModel.isDisposed() && versionId !== this._textModel.getVersionId()) {
// Our captures have become invalid and we need to re-capture
break;
}
const capture = captures[i];
const range = rangeChanges[i];
const updates = this.getTokensInRange(range.range, range.startOffset, range.endOffset, capture);
if (updates) {
tokenUpdate = { newTokens: updates };
} else {
tokenUpdate = { newTokens: [] };
}
this._updateTokensInStore(versionId, [tokenUpdate], TokenQuality.Accurate);
this._onDidChangeTokens.fire({
changes: {
semanticTokensApplied: false,
ranges: [{ fromLineNumber: range.range.getStartPosition().lineNumber, toLineNumber: range.range.getEndPosition().lineNumber }]
}
});
await new Promise<void>(resolve => setTimeout0(resolve));
}
this._onDidCompleteBackgroundTokenization.fire();
}
private _refreshNeedsRefresh(versionId: number) {
const rangesToRefresh = this._getNeedsRefresh();
if (rangesToRefresh.length === 0) {
return;
}
const rangeChanges: RangeChange[] = new Array(rangesToRefresh.length);
for (let i = 0; i < rangesToRefresh.length; i++) {
const range = rangesToRefresh[i];
rangeChanges[i] = {
newRange: range.range,
newRangeStartOffset: range.startOffset,
newRangeEndOffset: range.endOffset
};
}
this._handleTreeUpdate(rangeChanges, versionId);
}
private _rangeTokensAsUpdates(rangeOffset: number, endOffsetToken: EndOffsetToken[], startingOffsetInArray?: number) {
const updates: TokenUpdate[] = [];
let lastEnd = 0;
for (const token of endOffsetToken) {
if (token.endOffset <= lastEnd || (startingOffsetInArray && (token.endOffset < startingOffsetInArray))) {
continue;
}
let tokenUpdate: TokenUpdate;
if (startingOffsetInArray && (lastEnd < startingOffsetInArray)) {
tokenUpdate = { startOffsetInclusive: rangeOffset + startingOffsetInArray, length: token.endOffset - startingOffsetInArray, token: token.metadata };
} else {
tokenUpdate = { startOffsetInclusive: rangeOffset + lastEnd, length: token.endOffset - lastEnd, token: token.metadata };
}
updates.push(tokenUpdate);
lastEnd = token.endOffset;
}
return updates;
}
private _updateTheme() {
const modelRange = this._textModel.getFullModelRange();
this._markForRefresh(modelRange);
this._parseAndTokenizeViewPort(this._visibleLineRanges.get());
}
// Was used for inspect editor tokens command
captureAtPosition(lineNumber: number, column: number): QueryCapture[] {
const captures = this.captureAtRangeWithInjections(new Range(lineNumber, column, lineNumber, column + 1));
return captures;
}
// Was used for the colorization tests
captureAtRangeTree(range: Range): QueryCapture[] {
const captures = this.captureAtRangeWithInjections(range);
return captures;
}
private captureAtRange(range: Range): QueryCapture[] {
const tree = this._tree.tree.get();
if (!tree) {
return [];
}
// Tree sitter row is 0 based, column is 0 based
return this._highlightingQueries.captures(tree.rootNode, { startPosition: { row: range.startLineNumber - 1, column: range.startColumn - 1 }, endPosition: { row: range.endLineNumber - 1, column: range.endColumn - 1 } }).map(capture => (
{
name: capture.name,
text: capture.node.text,
node: {
startIndex: capture.node.startIndex,
endIndex: capture.node.endIndex,
startPosition: {
lineNumber: capture.node.startPosition.row + 1,
column: capture.node.startPosition.column + 1
},
endPosition: {
lineNumber: capture.node.endPosition.row + 1,
column: capture.node.endPosition.column + 1
}
},
encodedLanguageId: this._encodedLanguageId
}
));
}
private captureAtRangeWithInjections(range: Range): QueryCapture[] {
const captures: QueryCapture[] = this.captureAtRange(range);
for (let i = 0; i < captures.length; i++) {
const capture = captures[i];
const capStartLine = capture.node.startPosition.lineNumber;
const capEndLine = capture.node.endPosition.lineNumber;
const capStartColumn = capture.node.startPosition.column;
const capEndColumn = capture.node.endPosition.column;
const startLine = ((capStartLine > range.startLineNumber) && (capStartLine < range.endLineNumber)) ? capStartLine : range.startLineNumber;
const endLine = ((capEndLine > range.startLineNumber) && (capEndLine < range.endLineNumber)) ? capEndLine : range.endLineNumber;
const startColumn = (capStartLine === range.startLineNumber) ? (capStartColumn < range.startColumn ? range.startColumn : capStartColumn) : (capStartLine < range.startLineNumber ? range.startColumn : capStartColumn);
const endColumn = (capEndLine === range.endLineNumber) ? (capEndColumn > range.endColumn ? range.endColumn : capEndColumn) : (capEndLine > range.endLineNumber ? range.endColumn : capEndColumn);
const injectionRange = new Range(startLine, startColumn, endLine, endColumn);
const injection = this._getInjectionCaptures(capture, injectionRange);
if (injection && injection.length > 0) {
captures.splice(i + 1, 0, ...injection);
i += injection.length;
}
}
return captures;
}
/**
* Gets the tokens for a given line.
* Each token takes 2 elements in the array. The first element is the offset of the end of the token *in the line, not in the document*, and the second element is the metadata.
*
* @param lineNumber
* @returns
*/
public tokenizeEncoded(lineNumber: number) {
const tokens = this._tokenizeEncoded(lineNumber);
if (!tokens) {
return undefined;
}
const updates = this._rangeTokensAsUpdates(this._textModel.getOffsetAt({ lineNumber, column: 1 }), tokens.result);
if (tokens.versionId === this._textModel.getVersionId()) {
this._updateTokensInStore(tokens.versionId, [{ newTokens: updates, oldRangeLength: this._textModel.getLineLength(lineNumber) }], TokenQuality.Accurate);
}
}
public tokenizeEncodedInstrumented(lineNumber: number): { result: Uint32Array; captureTime: number; metadataTime: number } | undefined {
const tokens = this._tokenizeEncoded(lineNumber);
if (!tokens) {
return undefined;
}
return { result: this._endOffsetTokensToUint32Array(tokens.result), captureTime: tokens.captureTime, metadataTime: tokens.metadataTime };
}
private _getCaptures(range: Range): QueryCapture[] {
const captures = this.captureAtRangeWithInjections(range);
return captures;
}
private _tokenize(range: Range, rangeStartOffset: number, rangeEndOffset: number): { endOffsetsAndMetadata: { endOffset: number; metadata: number }[]; versionId: number; captureTime: number; metadataTime: number } | undefined {
const captures = this._getCaptures(range);
const result = this._tokenizeCapturesWithMetadata(captures, rangeStartOffset, rangeEndOffset);
if (!result) {
return undefined;
}
return { ...result, versionId: this._tree.treeLastParsedVersion.get() };
}
private _createTokensFromCaptures(captures: QueryCapture[], rangeStartOffset: number, rangeEndOffset: number): { endOffsets: EndOffsetAndScopes[]; captureTime: number } | undefined {
const tree = this._tree.tree.get();
const stopwatch = StopWatch.create();
const rangeLength = rangeEndOffset - rangeStartOffset;
const encodedLanguageId = this._languageIdCodec.encodeLanguageId(this._tree.languageId);
const baseScope: string = TREESITTER_BASE_SCOPES[this._tree.languageId] || 'source';
if (captures.length === 0) {
if (tree) {
stopwatch.stop();
const endOffsetsAndMetadata = [{ endOffset: rangeLength, scopes: [], encodedLanguageId }];
return { endOffsets: endOffsetsAndMetadata, captureTime: stopwatch.elapsed() };
}
return undefined;
}
const endOffsetsAndScopes: EndOffsetAndScopes[] = Array(captures.length);
endOffsetsAndScopes.fill({ endOffset: 0, scopes: [baseScope], encodedLanguageId });
let tokenIndex = 0;
const increaseSizeOfTokensByOneToken = () => {
endOffsetsAndScopes.push({ endOffset: 0, scopes: [baseScope], encodedLanguageId });
};
const brackets = (capture: QueryCapture, startOffset: number): number[] | undefined => {
return (capture.name.includes('punctuation') && capture.text) ? Array.from(capture.text.matchAll(BRACKETS)).map(match => startOffset + match.index) : undefined;
};
const addCurrentTokenToArray = (capture: QueryCapture, startOffset: number, endOffset: number, position?: number) => {
if (position !== undefined) {
const oldScopes = endOffsetsAndScopes[position].scopes;
let oldBracket = endOffsetsAndScopes[position].bracket;
// Check that the previous token ends at the same point that the current token starts
const prevEndOffset = position > 0 ? endOffsetsAndScopes[position - 1].endOffset : 0;
if (prevEndOffset !== startOffset) {
let preInsertBracket: number[] | undefined = undefined;
if (oldBracket && oldBracket.length > 0) {
preInsertBracket = [];
const postInsertBracket: number[] = [];
for (let i = 0; i < oldBracket.length; i++) {
const bracket = oldBracket[i];
if (bracket < startOffset) {
preInsertBracket.push(bracket);
} else if (bracket > endOffset) {
postInsertBracket.push(bracket);
}
}
if (preInsertBracket.length === 0) {
preInsertBracket = undefined;
}
if (postInsertBracket.length === 0) {
oldBracket = undefined;
} else {
oldBracket = postInsertBracket;
}
}
// We need to add some of the position token to cover the space
endOffsetsAndScopes.splice(position, 0, { endOffset: startOffset, scopes: [...oldScopes], bracket: preInsertBracket, encodedLanguageId: capture.encodedLanguageId });
position++;
increaseSizeOfTokensByOneToken();
tokenIndex++;
}
endOffsetsAndScopes.splice(position, 0, { endOffset: endOffset, scopes: [...oldScopes, capture.name], bracket: brackets(capture, startOffset), encodedLanguageId: capture.encodedLanguageId });
endOffsetsAndScopes[tokenIndex].bracket = oldBracket;
} else {
endOffsetsAndScopes[tokenIndex] = { endOffset: endOffset, scopes: [baseScope, capture.name], bracket: brackets(capture, startOffset), encodedLanguageId: capture.encodedLanguageId };
}
tokenIndex++;
};
for (let captureIndex = 0; captureIndex < captures.length; captureIndex++) {
const capture = captures[captureIndex];
const tokenEndIndex = capture.node.endIndex < rangeEndOffset ? ((capture.node.endIndex < rangeStartOffset) ? rangeStartOffset : capture.node.endIndex) : rangeEndOffset;
const tokenStartIndex = capture.node.startIndex < rangeStartOffset ? rangeStartOffset : capture.node.startIndex;
const endOffset = tokenEndIndex - rangeStartOffset;
// Not every character will get captured, so we need to make sure that our current capture doesn't bleed toward the start of the line and cover characters that it doesn't apply to.
// We do this by creating a new token in the array if the previous token ends before the current token starts.
let previousEndOffset: number;
const currentTokenLength = tokenEndIndex - tokenStartIndex;
if (captureIndex > 0) {
previousEndOffset = endOffsetsAndScopes[(tokenIndex - 1)].endOffset;
} else {
previousEndOffset = tokenStartIndex - rangeStartOffset - 1;
}
const startOffset = endOffset - currentTokenLength;
if ((previousEndOffset >= 0) && (previousEndOffset < startOffset)) {
// Add en empty token to cover the space where there were no captures
endOffsetsAndScopes[tokenIndex] = { endOffset: startOffset, scopes: [baseScope], encodedLanguageId: this._encodedLanguageId };
tokenIndex++;
increaseSizeOfTokensByOneToken();
}
if (currentTokenLength < 0) {
// This happens when we have a token "gap" right at the end of the capture range. The last capture isn't used because it's start index isn't included in the range.
continue;
}
if (previousEndOffset >= endOffset) {
// walk back through the tokens until we find the one that contains the current token
let withinTokenIndex = tokenIndex - 1;
let previousTokenEndOffset = endOffsetsAndScopes[withinTokenIndex].endOffset;
let previousTokenStartOffset = ((withinTokenIndex >= 2) ? endOffsetsAndScopes[withinTokenIndex - 1].endOffset : 0);
do {
// Check that the current token doesn't just replace the last token
if ((previousTokenStartOffset + currentTokenLength) === previousTokenEndOffset) {
if (previousTokenStartOffset === startOffset) {
// Current token and previous token span the exact same characters, add the scopes to the previous token
endOffsetsAndScopes[withinTokenIndex].scopes.push(capture.name);
const oldBracket = endOffsetsAndScopes[withinTokenIndex].bracket;
endOffsetsAndScopes[withinTokenIndex].bracket = ((oldBracket && (oldBracket.length > 0)) ? oldBracket : brackets(capture, startOffset));
}
} else if (previousTokenStartOffset <= startOffset) {
addCurrentTokenToArray(capture, startOffset, endOffset, withinTokenIndex);
break;
}
withinTokenIndex--;
previousTokenStartOffset = ((withinTokenIndex >= 1) ? endOffsetsAndScopes[withinTokenIndex - 1].endOffset : 0);
previousTokenEndOffset = ((withinTokenIndex >= 0) ? endOffsetsAndScopes[withinTokenIndex].endOffset : 0);
} while (previousTokenEndOffset > startOffset);
} else {
// Just add the token to the array
addCurrentTokenToArray(capture, startOffset, endOffset);
}
}
// Account for uncaptured characters at the end of the line
if ((endOffsetsAndScopes[tokenIndex - 1].endOffset < rangeLength)) {
if (rangeLength - endOffsetsAndScopes[tokenIndex - 1].endOffset > 0) {
increaseSizeOfTokensByOneToken();
endOffsetsAndScopes[tokenIndex] = { endOffset: rangeLength, scopes: endOffsetsAndScopes[tokenIndex].scopes, encodedLanguageId: this._encodedLanguageId };
tokenIndex++;
}
}
for (let i = 0; i < endOffsetsAndScopes.length; i++) {
const token = endOffsetsAndScopes[i];
if (token.endOffset === 0 && i !== 0) {
endOffsetsAndScopes.splice(i, endOffsetsAndScopes.length - i);
break;
}
}
const captureTime = stopwatch.elapsed();
return { endOffsets: endOffsetsAndScopes as { endOffset: number; scopes: string[]; encodedLanguageId: LanguageId }[], captureTime };
}
private _getInjectionCaptures(parentCapture: QueryCapture, range: Range): QueryCapture[] {
/*
const injection = textModelTreeSitter.getInjection(parentCapture.node.startIndex, this._treeSitterModel.languageId);
if (!injection?.tree || injection.versionId !== textModelTreeSitter.parseResult?.versionId) {
return undefined;
}
const feature = TreeSitterTokenizationRegistry.get(injection.languageId);
if (!feature) {
return undefined;
}
return feature.tokSupport_captureAtRangeTree(range, injection.tree, textModelTreeSitter);*/
return [];
}
private _tokenizeCapturesWithMetadata(captures: QueryCapture[], rangeStartOffset: number, rangeEndOffset: number): { endOffsetsAndMetadata: EndOffsetToken[]; captureTime: number; metadataTime: number } | undefined {
const stopwatch = StopWatch.create();
const emptyTokens = this._createTokensFromCaptures(captures, rangeStartOffset, rangeEndOffset);
if (!emptyTokens) {
return undefined;
}
const endOffsetsAndScopes: EndOffsetWithMeta[] = emptyTokens.endOffsets;
for (let i = 0; i < endOffsetsAndScopes.length; i++) {
const token = endOffsetsAndScopes[i];
token.metadata = this._treeSitterThemeService.findMetadata(token.scopes, token.encodedLanguageId, !!token.bracket && (token.bracket.length > 0), undefined);
}
const metadataTime = stopwatch.elapsed();
return { endOffsetsAndMetadata: endOffsetsAndScopes as { endOffset: number; scopes: string[]; metadata: number }[], captureTime: emptyTokens.captureTime, metadataTime };
}
private _tokenizeEncoded(lineNumber: number): { result: EndOffsetToken[]; captureTime: number; metadataTime: number; versionId: number } | undefined {
const lineOffset = this._textModel.getOffsetAt({ lineNumber: lineNumber, column: 1 });
const maxLine = this._textModel.getLineCount();
const lineEndOffset = (lineNumber + 1 <= maxLine) ? this._textModel.getOffsetAt({ lineNumber: lineNumber + 1, column: 1 }) : this._textModel.getValueLength();
const lineLength = lineEndOffset - lineOffset;
const result = this._tokenize(new Range(lineNumber, 1, lineNumber, lineLength + 1), lineOffset, lineEndOffset);
if (!result) {
return undefined;
}
return { result: result.endOffsetsAndMetadata, captureTime: result.captureTime, metadataTime: result.metadataTime, versionId: result.versionId };
}
private _endOffsetTokensToUint32Array(endOffsetsAndMetadata: EndOffsetToken[]): Uint32Array {
const uint32Array = new Uint32Array(endOffsetsAndMetadata.length * 2);
for (let i = 0; i < endOffsetsAndMetadata.length; i++) {
uint32Array[i * 2] = endOffsetsAndMetadata[i].endOffset;
uint32Array[i * 2 + 1] = endOffsetsAndMetadata[i].metadata;
}
return uint32Array;
}
}
interface EndOffsetToken {
endOffset: number;
metadata: number;
}
interface EndOffsetAndScopes {
endOffset: number;
scopes: string[];
bracket?: number[];
encodedLanguageId: LanguageId;
}
interface EndOffsetWithMeta extends EndOffsetAndScopes {
metadata?: number;
}
export const TREESITTER_BASE_SCOPES: Record<string, string> = {
'css': 'source.css',
'typescript': 'source.ts',
'ini': 'source.ini',
'regex': 'source.regex',
};
const BRACKETS = /[\{\}\[\]\<\>\(\)]/g;

View File

@ -0,0 +1,447 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import type * as TreeSitter from '@vscode/tree-sitter-wasm';
import { TaskQueue } from '../../../../../base/common/async.js';
import { Disposable, toDisposable } from '../../../../../base/common/lifecycle.js';
import { IObservable, observableValue, transaction, IObservableWithChange } from '../../../../../base/common/observable.js';
import { setTimeout0 } from '../../../../../base/common/platform.js';
import { ILogService } from '../../../../../platform/log/common/log.js';
import { ITelemetryService } from '../../../../../platform/telemetry/common/telemetry.js';
import { TextLength } from '../../../core/text/textLength.js';
import { IModelContentChangedEvent, IModelContentChange } from '../../../textModelEvents.js';
import { TextModel } from '../../textModel.js';
import { gotoParent, getClosestPreviousNodes, nextSiblingOrParentSibling, gotoNthChild } from './cursorUtils.js';
import { rangesIntersect, rangesEqual } from './treeSitterSyntaxTokenBackend.js';
import { Range } from '../../../core/range.js';
export class TreeSitterTree extends Disposable {
private readonly _tree = observableValue<TreeSitter.Tree | undefined, TreeParseUpdateEvent>(this, undefined);
public readonly tree: IObservableWithChange<TreeSitter.Tree | undefined, TreeParseUpdateEvent> = this._tree;
private readonly _treeLastParsedVersion = observableValue(this, -1);
public readonly treeLastParsedVersion: IObservable<number> = this._treeLastParsedVersion;
private _lastFullyParsed: TreeSitter.Tree | undefined;
private _lastFullyParsedWithEdits: TreeSitter.Tree | undefined;
private _onDidChangeContentQueue: TaskQueue = new TaskQueue();
constructor(
public readonly languageId: string,
private _ranges: TreeSitter.Range[] | undefined,
// readonly treeSitterLanguage: Language,
/** Must have the language set! */
private readonly _parser: TreeSitter.Parser,
private readonly _parserClass: typeof TreeSitter.Parser,
// private readonly _injectionQuery: TreeSitter.Query,
public readonly textModel: TextModel,
@ILogService private readonly _logService: ILogService,
@ITelemetryService private readonly _telemetryService: ITelemetryService
) {
super();
this._tree = observableValue(this, undefined);
this.tree = this._tree;
this._register(toDisposable(() => {
this._tree.get()?.delete();
this._lastFullyParsed?.delete();
this._lastFullyParsedWithEdits?.delete();
this._parser.delete();
}));
this.handleContentChange(undefined, this._ranges);
}
public handleContentChange(e: IModelContentChangedEvent | undefined, ranges?: TreeSitter.Range[]): void {
const version = this.textModel.getVersionId();
let newRanges: TreeSitter.Range[] = [];
if (ranges) {
newRanges = this._setRanges(ranges);
}
if (e) {
this._applyEdits(e.changes);
}
this._onDidChangeContentQueue.clearPending();
this._onDidChangeContentQueue.schedule(async () => {
if (this._store.isDisposed) {
// No need to continue the queue if we are disposed
return;
}
const oldTree = this._lastFullyParsed;
let changedNodes: TreeSitter.Range[] | undefined;
if (this._lastFullyParsedWithEdits && this._lastFullyParsed) {
changedNodes = this._findChangedNodes(this._lastFullyParsedWithEdits, this._lastFullyParsed);
}
const completed = await this._parseAndUpdateTree(version);
if (completed) {
let ranges: RangeChange[] | undefined;
if (!changedNodes) {
if (this._ranges) {
ranges = this._ranges.map(r => ({ newRange: new Range(r.startPosition.row + 1, r.startPosition.column + 1, r.endPosition.row + 1, r.endPosition.column + 1), oldRangeLength: r.endIndex - r.startIndex, newRangeStartOffset: r.startIndex, newRangeEndOffset: r.endIndex }));
}
} else if (oldTree && changedNodes) {
ranges = this._findTreeChanges(completed, changedNodes, newRanges);
}
if (!ranges) {
ranges = [{ newRange: this.textModel.getFullModelRange(), newRangeStartOffset: 0, newRangeEndOffset: this.textModel.getValueLength() }];
}
const previousTree = this._tree.get();
transaction(tx => {
this._tree.set(completed, tx, { ranges, versionId: version });
this._treeLastParsedVersion.set(version, tx);
});
previousTree?.delete();
}
});
}
get ranges(): TreeSitter.Range[] | undefined {
return this._ranges;
}
public getInjectionTrees(startIndex: number, languageId: string): TreeSitterTree | undefined {
// TODO
return undefined;
}
private _applyEdits(changes: IModelContentChange[]) {
for (const change of changes) {
const originalTextLength = TextLength.ofRange(Range.lift(change.range));
const newTextLength = TextLength.ofText(change.text);
const summedTextLengths = change.text.length === 0 ? newTextLength : originalTextLength.add(newTextLength);
const edit = {
startIndex: change.rangeOffset,
oldEndIndex: change.rangeOffset + change.rangeLength,
newEndIndex: change.rangeOffset + change.text.length,
startPosition: { row: change.range.startLineNumber - 1, column: change.range.startColumn - 1 },
oldEndPosition: { row: change.range.endLineNumber - 1, column: change.range.endColumn - 1 },
newEndPosition: { row: change.range.startLineNumber + summedTextLengths.lineCount - 1, column: summedTextLengths.lineCount ? summedTextLengths.columnCount : (change.range.endColumn + summedTextLengths.columnCount) }
};
this._tree.get()?.edit(edit);
this._lastFullyParsedWithEdits?.edit(edit);
}
}
private _findChangedNodes(newTree: TreeSitter.Tree, oldTree: TreeSitter.Tree): TreeSitter.Range[] | undefined {
if ((this._ranges && this._ranges.every(range => range.startPosition.row !== newTree.rootNode.startPosition.row)) || newTree.rootNode.startPosition.row !== 0) {
return [];
}
const newCursor = newTree.walk();
const oldCursor = oldTree.walk();
const nodes: TreeSitter.Range[] = [];
let next = true;
do {
if (newCursor.currentNode.hasChanges) {
// Check if only one of the children has changes.
// If it's only one, then we go to that child.
// If it's more then, we need to go to each child
// If it's none, then we've found one of our ranges
const newChildren = newCursor.currentNode.children;
const indexChangedChildren: number[] = [];
const changedChildren = newChildren.filter((c, index) => {
if (c?.hasChanges || (oldCursor.currentNode.children.length <= index)) {
indexChangedChildren.push(index);
return true;
}
return false;
});
// If we have changes and we *had* an error, the whole node should be refreshed.
if ((changedChildren.length === 0) || (newCursor.currentNode.hasError !== oldCursor.currentNode.hasError)) {
// walk up again until we get to the first one that's named as unnamed nodes can be too granular
while (newCursor.currentNode.parent && next && !newCursor.currentNode.isNamed) {
next = gotoParent(newCursor, oldCursor);
}
// Use the end position of the previous node and the start position of the current node
const newNode = newCursor.currentNode;
const closestPreviousNode = getClosestPreviousNodes(newCursor, newTree) ?? newNode;
nodes.push({
startIndex: closestPreviousNode.startIndex,
endIndex: newNode.endIndex,
startPosition: closestPreviousNode.startPosition,
endPosition: newNode.endPosition
});
next = nextSiblingOrParentSibling(newCursor, oldCursor);
} else if (changedChildren.length >= 1) {
next = gotoNthChild(newCursor, oldCursor, indexChangedChildren[0]);
}
} else {
next = nextSiblingOrParentSibling(newCursor, oldCursor);
}
} while (next);
newCursor.delete();
oldCursor.delete();
return nodes;
}
private _findTreeChanges(newTree: TreeSitter.Tree, changedNodes: TreeSitter.Range[], newRanges: TreeSitter.Range[]): RangeChange[] {
let newRangeIndex = 0;
const mergedChanges: RangeChange[] = [];
// Find the parent in the new tree of the changed node
for (let nodeIndex = 0; nodeIndex < changedNodes.length; nodeIndex++) {
const node = changedNodes[nodeIndex];
if (mergedChanges.length > 0) {
if ((node.startIndex >= mergedChanges[mergedChanges.length - 1].newRangeStartOffset) && (node.endIndex <= mergedChanges[mergedChanges.length - 1].newRangeEndOffset)) {
// This node is within the previous range, skip it
continue;
}
}
const cursor = newTree.walk();
const cursorContainersNode = () => cursor.startIndex < node.startIndex && cursor.endIndex > node.endIndex;
while (cursorContainersNode()) {
// See if we can go to a child
let child = cursor.gotoFirstChild();
let foundChild = false;
while (child) {
if (cursorContainersNode() && cursor.currentNode.isNamed) {
foundChild = true;
break;
} else {
child = cursor.gotoNextSibling();
}
}
if (!foundChild) {
cursor.gotoParent();
break;
}
if (cursor.currentNode.childCount === 0) {
break;
}
}
const startPosition = cursor.currentNode.startPosition;
const endPosition = cursor.currentNode.endPosition;
const startIndex = cursor.currentNode.startIndex;
const endIndex = cursor.currentNode.endIndex;
const newChange = { newRange: new Range(startPosition.row + 1, startPosition.column + 1, endPosition.row + 1, endPosition.column + 1), newRangeStartOffset: startIndex, newRangeEndOffset: endIndex };
if ((newRangeIndex < newRanges.length) && rangesIntersect(newRanges[newRangeIndex], { startIndex, endIndex, startPosition, endPosition })) {
// combine the new change with the range
if (newRanges[newRangeIndex].startIndex < newChange.newRangeStartOffset) {
newChange.newRange = newChange.newRange.setStartPosition(newRanges[newRangeIndex].startPosition.row + 1, newRanges[newRangeIndex].startPosition.column + 1);
newChange.newRangeStartOffset = newRanges[newRangeIndex].startIndex;
}
if (newRanges[newRangeIndex].endIndex > newChange.newRangeEndOffset) {
newChange.newRange = newChange.newRange.setEndPosition(newRanges[newRangeIndex].endPosition.row + 1, newRanges[newRangeIndex].endPosition.column + 1);
newChange.newRangeEndOffset = newRanges[newRangeIndex].endIndex;
}
newRangeIndex++;
} else if (newRangeIndex < newRanges.length && newRanges[newRangeIndex].endIndex < newChange.newRangeStartOffset) {
// add the full range to the merged changes
mergedChanges.push({
newRange: new Range(newRanges[newRangeIndex].startPosition.row + 1, newRanges[newRangeIndex].startPosition.column + 1, newRanges[newRangeIndex].endPosition.row + 1, newRanges[newRangeIndex].endPosition.column + 1),
newRangeStartOffset: newRanges[newRangeIndex].startIndex,
newRangeEndOffset: newRanges[newRangeIndex].endIndex
});
}
if ((mergedChanges.length > 0) && (mergedChanges[mergedChanges.length - 1].newRangeEndOffset >= newChange.newRangeStartOffset)) {
// Merge the changes
mergedChanges[mergedChanges.length - 1].newRange = Range.fromPositions(mergedChanges[mergedChanges.length - 1].newRange.getStartPosition(), newChange.newRange.getEndPosition());
mergedChanges[mergedChanges.length - 1].newRangeEndOffset = newChange.newRangeEndOffset;
} else {
mergedChanges.push(newChange);
}
}
return this._constrainRanges(mergedChanges);
}
private _constrainRanges(changes: RangeChange[]): RangeChange[] {
if (!this._ranges) {
return changes;
}
const constrainedChanges: RangeChange[] = [];
let changesIndex = 0;
let rangesIndex = 0;
while (changesIndex < changes.length && rangesIndex < this._ranges.length) {
const change = changes[changesIndex];
const range = this._ranges[rangesIndex];
if (change.newRangeEndOffset < range.startIndex) {
// Change is before the range, move to the next change
changesIndex++;
} else if (change.newRangeStartOffset > range.endIndex) {
// Change is after the range, move to the next range
rangesIndex++;
} else {
// Change is within the range, constrain it
const newRangeStartOffset = Math.max(change.newRangeStartOffset, range.startIndex);
const newRangeEndOffset = Math.min(change.newRangeEndOffset, range.endIndex);
const newRange = change.newRange.intersectRanges(new Range(range.startPosition.row + 1, range.startPosition.column + 1, range.endPosition.row + 1, range.endPosition.column + 1))!;
constrainedChanges.push({
newRange,
newRangeEndOffset,
newRangeStartOffset
});
// Remove the intersected range from the current change
if (newRangeEndOffset < change.newRangeEndOffset) {
change.newRange = Range.fromPositions(newRange.getEndPosition(), change.newRange.getEndPosition());
change.newRangeStartOffset = newRangeEndOffset + 1;
} else {
// Move to the next change
changesIndex++;
}
}
}
return constrainedChanges;
}
private async _parseAndUpdateTree(version: number): Promise<TreeSitter.Tree | undefined> {
const tree = await this._parse();
if (tree) {
this._lastFullyParsed?.delete();
this._lastFullyParsed = tree.copy();
this._lastFullyParsedWithEdits?.delete();
this._lastFullyParsedWithEdits = tree.copy();
return tree;
} else if (!this._tree.get()) {
// No tree means this is the initial parse and there were edits
// parse function doesn't handle this well and we can end up with an incorrect tree, so we reset
this._parser.reset();
}
return undefined;
}
private _parse(): Promise<TreeSitter.Tree | undefined> {
let parseType: TelemetryParseType = TelemetryParseType.Full;
if (this._tree.get()) {
parseType = TelemetryParseType.Incremental;
}
return this._parseAndYield(parseType);
}
private async _parseAndYield(parseType: TelemetryParseType): Promise<TreeSitter.Tree | undefined> {
let time: number = 0;
let passes: number = 0;
const inProgressVersion = this.textModel.getVersionId();
let newTree: TreeSitter.Tree | null | undefined;
const progressCallback = newTimeOutProgressCallback();
do {
const timer = performance.now();
newTree = this._parser.parse((index: number, position?: TreeSitter.Point) => this._parseCallback(index), this._tree.get(), { progressCallback, includedRanges: this._ranges });
time += performance.now() - timer;
passes++;
// So long as this isn't the initial parse, even if the model changes and edits are applied, the tree parsing will continue correctly after the await.
await new Promise<void>(resolve => setTimeout0(resolve));
} while (!this._store.isDisposed && !newTree && inProgressVersion === this.textModel.getVersionId());
this._sendParseTimeTelemetry(parseType, time, passes);
return (newTree && (inProgressVersion === this.textModel.getVersionId())) ? newTree : undefined;
}
private _parseCallback(index: number): string | undefined {
try {
return this.textModel.getTextBuffer().getNearestChunk(index);
} catch (e) {
this._logService.debug('Error getting chunk for tree-sitter parsing', e);
}
return undefined;
}
private _setRanges(newRanges: TreeSitter.Range[]): TreeSitter.Range[] {
const unKnownRanges: TreeSitter.Range[] = [];
// If we have existing ranges, find the parts of the new ranges that are not included in the existing ones
if (this._ranges) {
for (const newRange of newRanges) {
let isFullyIncluded = false;
for (let i = 0; i < this._ranges.length; i++) {
const existingRange = this._ranges[i];
if (rangesEqual(existingRange, newRange) || rangesIntersect(existingRange, newRange)) {
isFullyIncluded = true;
break;
}
}
if (!isFullyIncluded) {
unKnownRanges.push(newRange);
}
}
} else {
// No existing ranges, all new ranges are unknown
unKnownRanges.push(...newRanges);
}
this._ranges = newRanges;
return unKnownRanges;
}
private _sendParseTimeTelemetry(parseType: TelemetryParseType, time: number, passes: number): void {
this._logService.debug(`Tree parsing (${parseType}) took ${time} ms and ${passes} passes.`);
type ParseTimeClassification = {
owner: 'alexr00';
comment: 'Used to understand how long it takes to parse a tree-sitter tree';
languageId: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; comment: 'The programming language ID.' };
time: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; isMeasurement: true; comment: 'The ms it took to parse' };
passes: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; isMeasurement: true; comment: 'The number of passes it took to parse' };
};
if (parseType === TelemetryParseType.Full) {
this._telemetryService.publicLog2<{ languageId: string; time: number; passes: number }, ParseTimeClassification>(`treeSitter.fullParse`, { languageId: this.languageId, time, passes });
} else {
this._telemetryService.publicLog2<{ languageId: string; time: number; passes: number }, ParseTimeClassification>(`treeSitter.incrementalParse`, { languageId: this.languageId, time, passes });
}
}
public createParsedTreeSync(src: string): TreeSitter.Tree | undefined {
const parser = new this._parserClass();
parser.setLanguage(this._parser.language!);
const tree = parser.parse(src);
parser.delete();
return tree ?? undefined;
}
}
const enum TelemetryParseType {
Full = 'fullParse',
Incremental = 'incrementalParse'
}
export interface TreeParseUpdateEvent {
ranges: RangeChange[];
versionId: number;
}
export interface RangeWithOffsets {
range: Range;
startOffset: number;
endOffset: number;
}
export interface RangeChange {
newRange: Range;
newRangeStartOffset: number;
newRangeEndOffset: number;
}
function newTimeOutProgressCallback(): (state: TreeSitter.ParseState) => void {
let lastYieldTime: number = performance.now();
return function parseProgressCallback(_state: TreeSitter.ParseState) {
const now = performance.now();
if (now - lastYieldTime > 50) {
lastYieldTime = now;
return true;
}
return false;
};
}

View File

@ -1,180 +0,0 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { Range } from '../core/range.js';
import { ITextModel } from '../model.js';
import { TokenQuality, TokenStore, TokenUpdate } from './tokenStore.js';
import { InstantiationType, registerSingleton } from '../../../platform/instantiation/common/extensions.js';
import { createDecorator } from '../../../platform/instantiation/common/instantiation.js';
import { DisposableStore, IDisposable } from '../../../base/common/lifecycle.js';
import { IModelContentChangedEvent } from '../textModelEvents.js';
export interface ITreeSitterTokenizationStoreService {
readonly _serviceBrand: undefined;
setTokens(model: ITextModel, tokens: TokenUpdate[], tokenQuality: TokenQuality): void;
handleContentChanged(model: ITextModel, e: IModelContentChangedEvent): void;
getTokens(model: ITextModel, line: number): Uint32Array | undefined;
updateTokens(model: ITextModel, version: number, updates: { oldRangeLength?: number; newTokens: TokenUpdate[] }[], tokenQuality: TokenQuality): void;
markForRefresh(model: ITextModel, range: Range): void;
getNeedsRefresh(model: ITextModel): { range: Range; startOffset: number; endOffset: number }[];
hasTokens(model: ITextModel, accurateForRange?: Range): boolean;
rangeHasTokens(model: ITextModel, range: Range, minimumTokenQuality: TokenQuality): boolean;
delete(model: ITextModel): void;
}
export const ITreeSitterTokenizationStoreService = createDecorator<ITreeSitterTokenizationStoreService>('treeSitterTokenizationStoreService');
export interface TokenInformation {
tokens: Uint32Array;
needsRefresh?: boolean;
}
class TreeSitterTokenizationStoreService implements ITreeSitterTokenizationStoreService, IDisposable {
readonly _serviceBrand: undefined;
private readonly tokens = new Map<ITextModel, { store: TokenStore; accurateVersion: number; guessVersion: number; readonly disposables: DisposableStore }>();
constructor() { }
setTokens(model: ITextModel, tokens: TokenUpdate[], tokenQuality: TokenQuality): void {
const disposables = new DisposableStore();
const store = disposables.add(new TokenStore(model));
this.tokens.set(model, { store: store, accurateVersion: model.getVersionId(), disposables, guessVersion: model.getVersionId() });
store.buildStore(tokens, tokenQuality);
disposables.add(model.onWillDispose(() => {
const storeInfo = this.tokens.get(model);
if (storeInfo) {
storeInfo.disposables.dispose();
this.tokens.delete(model);
}
}));
}
handleContentChanged(model: ITextModel, e: IModelContentChangedEvent): void {
const storeInfo = this.tokens.get(model);
if (!storeInfo) {
return;
}
storeInfo.guessVersion = e.versionId;
for (const change of e.changes) {
if (change.text.length > change.rangeLength) {
// If possible, use the token before the change as the starting point for the new token.
// This is more likely to let the new text be the correct color as typeing is usually at the end of the token.
const offset = change.rangeOffset > 0 ? change.rangeOffset - 1 : change.rangeOffset;
const oldToken = storeInfo.store.getTokenAt(offset);
let newToken: TokenUpdate;
if (oldToken) {
// Insert. Just grow the token at this position to include the insert.
newToken = { startOffsetInclusive: oldToken.startOffsetInclusive, length: oldToken.length + change.text.length - change.rangeLength, token: oldToken.token };
// Also mark tokens that are in the range of the change as needing a refresh.
storeInfo.store.markForRefresh(offset, change.rangeOffset + (change.text.length > change.rangeLength ? change.text.length : change.rangeLength));
} else {
// The document got larger and the change is at the end of the document.
newToken = { startOffsetInclusive: offset, length: change.text.length, token: 0 };
}
storeInfo.store.update(oldToken?.length ?? 0, [newToken], TokenQuality.EditGuess);
} else if (change.text.length < change.rangeLength) {
// Delete. Delete the tokens at the corresponding range.
const deletedCharCount = change.rangeLength - change.text.length;
storeInfo.store.delete(deletedCharCount, change.rangeOffset);
}
}
}
rangeHasTokens(model: ITextModel, range: Range, minimumTokenQuality: TokenQuality): boolean {
const tokens = this.tokens.get(model);
if (!tokens) {
return false;
}
return tokens.store.rangeHasTokens(model.getOffsetAt(range.getStartPosition()), model.getOffsetAt(range.getEndPosition()), minimumTokenQuality);
}
hasTokens(model: ITextModel, accurateForRange?: Range): boolean {
const tokens = this.tokens.get(model);
if (!tokens) {
return false;
}
if (!accurateForRange || (tokens.guessVersion === tokens.accurateVersion)) {
return true;
}
return !tokens.store.rangeNeedsRefresh(model.getOffsetAt(accurateForRange.getStartPosition()), model.getOffsetAt(accurateForRange.getEndPosition()));
}
getTokens(model: ITextModel, line: number): Uint32Array | undefined {
const tokens = this.tokens.get(model)?.store;
if (!tokens) {
return undefined;
}
const lineStartOffset = model.getOffsetAt({ lineNumber: line, column: 1 });
const lineTokens = tokens.getTokensInRange(lineStartOffset, model.getOffsetAt({ lineNumber: line, column: model.getLineLength(line) }) + 1);
const result = new Uint32Array(lineTokens.length * 2);
for (let i = 0; i < lineTokens.length; i++) {
result[i * 2] = lineTokens[i].startOffsetInclusive - lineStartOffset + lineTokens[i].length;
result[i * 2 + 1] = lineTokens[i].token;
}
return result;
}
updateTokens(model: ITextModel, version: number, updates: { oldRangeLength?: number; newTokens: TokenUpdate[] }[], tokenQuality: TokenQuality): void {
const existingTokens = this.tokens.get(model);
if (!existingTokens) {
return;
}
existingTokens.accurateVersion = version;
for (const update of updates) {
const lastToken = update.newTokens.length > 0 ? update.newTokens[update.newTokens.length - 1] : undefined;
let oldRangeLength: number;
if (lastToken && (existingTokens.guessVersion >= version)) {
oldRangeLength = lastToken.startOffsetInclusive + lastToken.length - update.newTokens[0].startOffsetInclusive;
} else if (update.oldRangeLength) {
oldRangeLength = update.oldRangeLength;
} else {
oldRangeLength = 0;
}
existingTokens.store.update(oldRangeLength, update.newTokens, tokenQuality);
}
}
markForRefresh(model: ITextModel, range: Range): void {
const tree = this.tokens.get(model)?.store;
if (!tree) {
return;
}
tree.markForRefresh(model.getOffsetAt(range.getStartPosition()), model.getOffsetAt(range.getEndPosition()));
}
getNeedsRefresh(model: ITextModel): { range: Range; startOffset: number; endOffset: number }[] {
const needsRefreshOffsetRanges = this.tokens.get(model)?.store.getNeedsRefresh();
if (!needsRefreshOffsetRanges) {
return [];
}
return needsRefreshOffsetRanges.map(range => ({
range: Range.fromPositions(model.getPositionAt(range.startOffset), model.getPositionAt(range.endOffset)),
startOffset: range.startOffset,
endOffset: range.endOffset
}));
}
delete(model: ITextModel): void {
const storeInfo = this.tokens.get(model);
if (storeInfo) {
storeInfo.disposables.dispose();
this.tokens.delete(model);
}
}
dispose(): void {
for (const [, value] of this.tokens) {
value.disposables.dispose();
}
}
}
registerSingleton(ITreeSitterTokenizationStoreService, TreeSitterTokenizationStoreService, InstantiationType.Delayed);

View File

@ -1,133 +0,0 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { ILanguageIdCodec, ITreeSitterTokenizationSupport, TreeSitterTokenizationRegistry } from '../languages.js';
import { LineTokens } from '../tokens/lineTokens.js';
import { StandardTokenType } from '../encodedTokenAttributes.js';
import { TextModel } from './textModel.js';
import { IModelContentChangedEvent } from '../textModelEvents.js';
import { AbstractTokens } from './tokens.js';
import { IDisposable, MutableDisposable } from '../../../base/common/lifecycle.js';
import { ITreeSitterTokenizationStoreService } from './treeSitterTokenStoreService.js';
import { Range } from '../core/range.js';
import { BackgroundTokenizationState } from '../tokenizationTextModelPart.js';
import { Emitter, Event } from '../../../base/common/event.js';
export class TreeSitterTokens extends AbstractTokens {
private _tokenizationSupport: ITreeSitterTokenizationSupport | null = null;
protected _backgroundTokenizationState: BackgroundTokenizationState = BackgroundTokenizationState.InProgress;
protected readonly _onDidChangeBackgroundTokenizationState: Emitter<void> = this._register(new Emitter<void>());
public readonly onDidChangeBackgroundTokenizationState: Event<void> = this._onDidChangeBackgroundTokenizationState.event;
private _lastLanguageId: string | undefined;
private readonly _tokensChangedListener: MutableDisposable<IDisposable> = this._register(new MutableDisposable());
private readonly _onDidChangeBackgroundTokenization: MutableDisposable<IDisposable> = this._register(new MutableDisposable());
constructor(languageIdCodec: ILanguageIdCodec,
textModel: TextModel,
languageId: () => string,
@ITreeSitterTokenizationStoreService private readonly _tokenStore: ITreeSitterTokenizationStoreService) {
super(languageIdCodec, textModel, languageId);
this._initialize();
}
private _initialize() {
const newLanguage = this.getLanguageId();
if (!this._tokenizationSupport || this._lastLanguageId !== newLanguage) {
this._lastLanguageId = newLanguage;
this._tokenizationSupport = TreeSitterTokenizationRegistry.get(newLanguage);
this._tokensChangedListener.value = this._tokenizationSupport?.onDidChangeTokens((e) => {
if (e.textModel === this._textModel) {
this._onDidChangeTokens.fire(e.changes);
}
});
this._onDidChangeBackgroundTokenization.value = this._tokenizationSupport?.onDidChangeBackgroundTokenization(e => {
if (e.textModel === this._textModel) {
this._backgroundTokenizationState = BackgroundTokenizationState.Completed;
this._onDidChangeBackgroundTokenizationState.fire();
}
});
}
}
public getLineTokens(lineNumber: number): LineTokens {
const content = this._textModel.getLineContent(lineNumber);
if (this._tokenizationSupport && content.length > 0) {
const rawTokens = this._tokenStore.getTokens(this._textModel, lineNumber);
if (rawTokens && rawTokens.length > 0) {
return new LineTokens(rawTokens, content, this._languageIdCodec);
}
}
return LineTokens.createEmpty(content, this._languageIdCodec);
}
public resetTokenization(fireTokenChangeEvent: boolean = true): void {
if (fireTokenChangeEvent) {
this._onDidChangeTokens.fire({
semanticTokensApplied: false,
ranges: [
{
fromLineNumber: 1,
toLineNumber: this._textModel.getLineCount(),
},
],
});
}
this._initialize();
}
public override handleDidChangeAttached(): void {
// TODO @alexr00 implement for background tokenization
}
public override handleDidChangeContent(e: IModelContentChangedEvent): void {
if (e.isFlush) {
// Don't fire the event, as the view might not have got the text change event yet
this.resetTokenization(false);
} else {
this._tokenStore.handleContentChanged(this._textModel, e);
}
}
public override forceTokenization(lineNumber: number): void {
if (this._tokenizationSupport && !this.hasAccurateTokensForLine(lineNumber)) {
this._tokenizationSupport.tokenizeEncoded(lineNumber, this._textModel);
}
}
public override hasAccurateTokensForLine(lineNumber: number): boolean {
return this._tokenStore.hasTokens(this._textModel, new Range(lineNumber, 1, lineNumber, this._textModel.getLineMaxColumn(lineNumber)));
}
public override isCheapToTokenize(lineNumber: number): boolean {
// TODO @alexr00 determine what makes it cheap to tokenize?
return true;
}
public override getTokenTypeIfInsertingCharacter(lineNumber: number, column: number, character: string): StandardTokenType {
// TODO @alexr00 implement once we have custom parsing and don't just feed in the whole text model value
return StandardTokenType.Other;
}
public override tokenizeLinesAt(lineNumber: number, lines: string[]): LineTokens[] | null {
if (this._tokenizationSupport) {
const rawLineTokens = this._tokenizationSupport.guessTokensForLinesContent(lineNumber, this._textModel, lines);
const lineTokens: LineTokens[] = [];
if (rawLineTokens) {
for (let i = 0; i < rawLineTokens.length; i++) {
lineTokens.push(new LineTokens(rawLineTokens[i], lines[i], this._languageIdCodec));
}
return lineTokens;
}
}
return null;
}
public override get hasTokens(): boolean {
return this._tokenStore.hasTokens(this._textModel);
}
}

View File

@ -1,767 +0,0 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import type * as Parser from '@vscode/tree-sitter-wasm';
import { ITreeSitterParseResult, ITextModelTreeSitter, RangeChange, TreeParseUpdateEvent, ITreeSitterImporter, ModelTreeUpdateEvent } from '../treeSitterParserService.js';
import { Disposable, DisposableMap, DisposableStore, dispose, IDisposable } from '../../../../base/common/lifecycle.js';
import { ITextModel } from '../../model.js';
import { IModelContentChange, IModelContentChangedEvent } from '../../textModelEvents.js';
import { ITelemetryService } from '../../../../platform/telemetry/common/telemetry.js';
import { ILogService } from '../../../../platform/log/common/log.js';
import { setTimeout0 } from '../../../../base/common/platform.js';
import { Emitter, Event } from '../../../../base/common/event.js';
import { CancellationToken, cancelOnDispose } from '../../../../base/common/cancellation.js';
import { Range } from '../../core/range.js';
import { LimitedQueue } from '../../../../base/common/async.js';
import { TextLength } from '../../core/text/textLength.js';
import { TreeSitterLanguages } from './treeSitterLanguages.js';
import { AppResourcePath, FileAccess } from '../../../../base/common/network.js';
import { IFileService } from '../../../../platform/files/common/files.js';
import { CancellationError, isCancellationError } from '../../../../base/common/errors.js';
import { getClosestPreviousNodes, gotoNthChild, gotoParent, nextSiblingOrParentSibling } from './cursorUtils.js';
export interface TextModelTreeSitterItem {
dispose(): void;
textModelTreeSitter: TextModelTreeSitter;
disposables: DisposableStore;
}
const enum TelemetryParseType {
Full = 'fullParse',
Incremental = 'incrementalParse'
}
export class TextModelTreeSitter extends Disposable implements ITextModelTreeSitter {
private _onDidChangeParseResult: Emitter<ModelTreeUpdateEvent> = this._register(new Emitter<ModelTreeUpdateEvent>());
public readonly onDidChangeParseResult: Event<ModelTreeUpdateEvent> = this._onDidChangeParseResult.event;
private _rootTreeSitterTree: TreeSitterParseResult | undefined;
private _query: Parser.Query | undefined;
// TODO: @alexr00 use a better data structure for this
private readonly _injectionTreeSitterTrees: DisposableMap<string, TreeSitterParseResult> = this._register(new DisposableMap());
private readonly _injectionTreeSitterLanguages: Map<string, Set<string>> = new Map(); // parent language -> injected languages
private _versionId: number = 0;
get parseResult(): ITreeSitterParseResult | undefined { return this._rootTreeSitterTree; }
constructor(
readonly textModel: ITextModel,
private readonly _treeSitterLanguages: TreeSitterLanguages,
parseImmediately: boolean = true,
@ITreeSitterImporter private readonly _treeSitterImporter: ITreeSitterImporter,
@ILogService private readonly _logService: ILogService,
@ITelemetryService private readonly _telemetryService: ITelemetryService,
@IFileService private readonly _fileService: IFileService
) {
super();
if (parseImmediately) {
this._register(Event.runAndSubscribe(this.textModel.onDidChangeLanguage, (e => this._onDidChangeLanguage(e ? e.newLanguage : this.textModel.getLanguageId()))));
} else {
this._register(this.textModel.onDidChangeLanguage(e => this._onDidChangeLanguage(e ? e.newLanguage : this.textModel.getLanguageId())));
}
}
private readonly _parseSessionDisposables = this._register(new DisposableStore());
private async _onDidChangeLanguage(languageId: string) {
this.parse(languageId);
}
/**
* Be very careful when making changes to this method as it is easy to introduce race conditions.
*/
public async parse(languageId: string = this.textModel.getLanguageId()): Promise<ITreeSitterParseResult | undefined> {
this._parseSessionDisposables.clear();
this._rootTreeSitterTree = undefined;
const token = cancelOnDispose(this._parseSessionDisposables);
let language: Parser.Language | undefined;
try {
language = await this._getLanguage(languageId, token);
} catch (e) {
if (isCancellationError(e)) {
return;
}
throw e;
}
const Parser = await this._treeSitterImporter.getParserClass();
if (token.isCancellationRequested) {
return;
}
const treeSitterTree = this._parseSessionDisposables.add(new TreeSitterParseResult(new Parser(), languageId, language, this._logService, this._telemetryService));
this._rootTreeSitterTree = treeSitterTree;
this._parseSessionDisposables.add(treeSitterTree.onDidUpdate(e => this._handleTreeUpdate(e)));
this._parseSessionDisposables.add(this.textModel.onDidChangeContent(e => this._onDidChangeContent(treeSitterTree, [e])));
this._onDidChangeContent(treeSitterTree, undefined);
if (token.isCancellationRequested) {
return;
}
return this._rootTreeSitterTree;
}
private _getLanguage(languageId: string, token: CancellationToken): Promise<Parser.Language> {
const language = this._treeSitterLanguages.getOrInitLanguage(languageId);
if (language) {
return Promise.resolve(language);
}
const disposables: IDisposable[] = [];
return new Promise((resolve, reject) => {
disposables.push(this._treeSitterLanguages.onDidAddLanguage(e => {
if (e.id === languageId) {
dispose(disposables);
resolve(e.language);
}
}));
token.onCancellationRequested(() => {
dispose(disposables);
reject(new CancellationError());
}, undefined, disposables);
});
}
private async _handleTreeUpdate(e: TreeParseUpdateEvent, parentTree?: Parser.Tree) {
if (e.ranges && (e.versionId >= this._versionId)) {
this._versionId = e.versionId;
const tree = (parentTree ?? this._rootTreeSitterTree!.tree)?.copy();
let injections: Map<string, Parser.Range[]> | undefined;
if (tree) {
injections = await this._collectInjections(tree);
// kick off check for injected languages
if (injections) {
this._processInjections(injections, tree, e.language, e.includedModelChanges).then(() => {
// Clean up tree copy
tree.delete();
});
}
}
this._onDidChangeParseResult.fire({ ranges: e.ranges, versionId: e.versionId, tree: this, languageId: this.textModel.getLanguageId(), hasInjections: !!injections && injections.size > 0 });
}
}
private _queries: string | undefined;
private async _ensureInjectionQueries() {
if (!this._queries) {
const injectionsQueriesLocation: AppResourcePath = `vs/editor/common/languages/injections/${this.textModel.getLanguageId()}.scm`;
const uri = FileAccess.asFileUri(injectionsQueriesLocation);
if (!(await this._fileService.exists(uri))) {
this._queries = '';
} else if (this._fileService.hasProvider(uri)) {
const query = await this._fileService.readFile(uri);
this._queries = query.value.toString();
} else {
this._queries = '';
}
}
return this._queries;
}
private async _getQuery() {
if (!this._query) {
const language = await this._treeSitterLanguages.getLanguage(this.textModel.getLanguageId());
if (!language) {
return;
}
const queries = await this._ensureInjectionQueries();
if (queries === '') {
return;
}
const Query = await this._treeSitterImporter.getQueryClass();
this._query = new Query(language, queries);
}
return this._query;
}
private async _collectInjections(copyOfTree: Parser.Tree): Promise<Map<string, Parser.Range[]> | undefined> {
const query = await this._getQuery();
if (!query) {
return;
}
if (!copyOfTree?.rootNode) {
// need to check the root node here as `walk` will throw if not defined.
return;
}
const cursor = copyOfTree.walk();
const injections: Map<string, Parser.Range[]> = new Map();
let hasNext = true;
while (hasNext) {
hasNext = await this._processNode(cursor, query, injections);
// Yield periodically
await new Promise<void>(resolve => setTimeout0(resolve));
}
cursor.delete();
return this._mergeAdjacentRanges(injections);
}
private _processNode(cursor: Parser.TreeCursor, query: Parser.Query, injections: Map<string, Parser.Range[]>): boolean {
const node = cursor.currentNode;
const nodeLineCount = node.endPosition.row - node.startPosition.row;
// We check the node line count to avoid processing large nodes in one go as that can cause performance issues.
if (nodeLineCount <= 1000) {
this._processCaptures(query, node, injections);
// Move to next sibling or up and over
return cursor.gotoNextSibling() || this.gotoNextSiblingOfAncestor(cursor);
} else {
// Node is too large, go to first child or next sibling
return cursor.gotoFirstChild() || cursor.gotoNextSibling() || this.gotoNextSiblingOfAncestor(cursor);
}
}
private _processCaptures(query: Parser.Query, node: Parser.Node, injections: Map<string, Parser.Range[]>): void {
const captures = query.captures(node);
for (const capture of captures) {
const injectionLanguage = capture.setProperties?.['injection.language'];
if (injectionLanguage) {
const range = this._createRangeFromNode(capture.node);
if (!injections.has(injectionLanguage)) {
injections.set(injectionLanguage, []);
}
injections.get(injectionLanguage)?.push(range);
}
}
}
private _createRangeFromNode(node: Parser.Node): Parser.Range {
return {
startIndex: node.startIndex,
endIndex: node.endIndex,
startPosition: { row: node.startPosition.row, column: node.startPosition.column },
endPosition: { row: node.endPosition.row, column: node.endPosition.column }
};
}
private _mergeAdjacentRanges(injections: Map<string, Parser.Range[]>): Map<string, Parser.Range[]> {
for (const [languageId, ranges] of injections) {
if (ranges.length <= 1) {
continue;
}
const mergedRanges: Parser.Range[] = [];
let current = ranges[0];
for (let i = 1; i < ranges.length; i++) {
const next = ranges[i];
if (next.startIndex <= current.endIndex) {
current = this._mergeRanges(current, next);
} else {
mergedRanges.push(current);
current = next;
}
}
mergedRanges.push(current);
injections.set(languageId, mergedRanges);
}
return injections;
}
private _mergeRanges(current: Parser.Range, next: Parser.Range): Parser.Range {
return {
startIndex: current.startIndex,
endIndex: Math.max(current.endIndex, next.endIndex),
startPosition: current.startPosition,
endPosition: next.endPosition.row > current.endPosition.row ?
next.endPosition :
current.endPosition
};
}
private async _processInjections(
injections: Map<string, Parser.Range[]>,
copyOfParentTree: Parser.Tree,
parentLanguage: string,
modelChanges: IModelContentChangedEvent[] | undefined
): Promise<void> {
const unseenInjections: Set<string> = this._injectionTreeSitterLanguages.get(parentLanguage) ?? new Set();
for (const [languageId, ranges] of injections) {
const language = await this._treeSitterLanguages.getLanguage(languageId);
if (!language) {
continue;
}
const treeSitterTree = await this._getOrCreateInjectedTree(languageId, language, copyOfParentTree, parentLanguage);
if (treeSitterTree) {
unseenInjections.delete(languageId);
this._onDidChangeContent(treeSitterTree, modelChanges, ranges);
}
}
for (const unseenInjection of unseenInjections) {
this._injectionTreeSitterTrees.deleteAndDispose(unseenInjection);
}
}
private async _getOrCreateInjectedTree(
languageId: string,
language: Parser.Language,
copyOfParentTree: Parser.Tree,
parentLanguage: string
): Promise<TreeSitterParseResult | undefined> {
let treeSitterTree = this._injectionTreeSitterTrees.get(languageId);
if (!treeSitterTree) {
const Parser = await this._treeSitterImporter.getParserClass();
treeSitterTree = new TreeSitterParseResult(new Parser(), languageId, language, this._logService, this._telemetryService);
this._parseSessionDisposables.add(treeSitterTree.onDidUpdate(e => this._handleTreeUpdate(e, copyOfParentTree)));
this._injectionTreeSitterTrees.set(languageId, treeSitterTree);
const injectionLanguages = this._injectionTreeSitterLanguages.get(parentLanguage) ?? this._injectionTreeSitterLanguages.set(parentLanguage, new Set()).get(parentLanguage)!;
injectionLanguages.add(languageId);
}
return treeSitterTree;
}
private gotoNextSiblingOfAncestor(cursor: Parser.TreeCursor): boolean {
while (cursor.gotoParent()) {
if (cursor.gotoNextSibling()) {
return true;
}
}
return false;
}
getInjection(offset: number, parentLanguage: string): ITreeSitterParseResult | undefined {
if (this._injectionTreeSitterTrees.size === 0) {
return undefined;
}
let hasFoundParentLanguage = parentLanguage === this.textModel.getLanguageId();
for (const [_, treeSitterTree] of this._injectionTreeSitterTrees) {
if (treeSitterTree.tree) {
if (hasFoundParentLanguage && treeSitterTree.ranges?.find(r => r.startIndex <= offset && r.endIndex >= offset)) {
return treeSitterTree;
}
if (!hasFoundParentLanguage && treeSitterTree.languageId === parentLanguage) {
hasFoundParentLanguage = true;
}
}
}
return undefined;
}
private _onDidChangeContent(treeSitterTree: TreeSitterParseResult, change: IModelContentChangedEvent[] | undefined, ranges?: Parser.Range[]) {
treeSitterTree.onDidChangeContent(this.textModel, change, ranges);
}
}
export class TreeSitterParseResult implements IDisposable, ITreeSitterParseResult {
private _tree: Parser.Tree | undefined;
private _lastFullyParsed: Parser.Tree | undefined;
private _lastFullyParsedWithEdits: Parser.Tree | undefined;
private readonly _onDidUpdate: Emitter<TreeParseUpdateEvent> = new Emitter<TreeParseUpdateEvent>();
public readonly onDidUpdate: Event<TreeParseUpdateEvent> = this._onDidUpdate.event;
private _versionId: number = 0;
private _editVersion: number = 0;
get versionId() {
return this._versionId;
}
private _isDisposed: boolean = false;
constructor(public readonly parser: Parser.Parser,
public readonly languageId: string,
public /** exposed for tests **/ readonly language: Parser.Language,
private readonly _logService: ILogService,
private readonly _telemetryService: ITelemetryService) {
this.parser.setLanguage(language);
}
dispose(): void {
this._isDisposed = true;
this._onDidUpdate.dispose();
this._tree?.delete();
this._lastFullyParsed?.delete();
this._lastFullyParsedWithEdits?.delete();
this.parser?.delete();
}
get tree() { return this._lastFullyParsed; }
get isDisposed() { return this._isDisposed; }
private findChangedNodes(newTree: Parser.Tree, oldTree: Parser.Tree): Parser.Range[] | undefined {
if ((this.ranges && this.ranges.every(range => range.startPosition.row !== newTree.rootNode.startPosition.row)) || newTree.rootNode.startPosition.row !== 0) {
return [];
}
const newCursor = newTree.walk();
const oldCursor = oldTree.walk();
const nodes: Parser.Range[] = [];
let next = true;
do {
if (newCursor.currentNode.hasChanges) {
// Check if only one of the children has changes.
// If it's only one, then we go to that child.
// If it's more then, we need to go to each child
// If it's none, then we've found one of our ranges
const newChildren = newCursor.currentNode.children;
const indexChangedChildren: number[] = [];
const changedChildren = newChildren.filter((c, index) => {
if (c?.hasChanges || (oldCursor.currentNode.children.length <= index)) {
indexChangedChildren.push(index);
return true;
}
return false;
});
// If we have changes and we *had* an error, the whole node should be refreshed.
if ((changedChildren.length === 0) || (newCursor.currentNode.hasError !== oldCursor.currentNode.hasError)) {
// walk up again until we get to the first one that's named as unnamed nodes can be too granular
while (newCursor.currentNode.parent && next && !newCursor.currentNode.isNamed) {
next = gotoParent(newCursor, oldCursor);
}
// Use the end position of the previous node and the start position of the current node
const newNode = newCursor.currentNode;
const closestPreviousNode = getClosestPreviousNodes(newCursor, newTree) ?? newNode;
nodes.push({
startIndex: closestPreviousNode.startIndex,
endIndex: newNode.endIndex,
startPosition: closestPreviousNode.startPosition,
endPosition: newNode.endPosition
});
next = nextSiblingOrParentSibling(newCursor, oldCursor);
} else if (changedChildren.length >= 1) {
next = gotoNthChild(newCursor, oldCursor, indexChangedChildren[0]);
}
} else {
next = nextSiblingOrParentSibling(newCursor, oldCursor);
}
} while (next);
newCursor.delete();
oldCursor.delete();
return nodes;
}
private findTreeChanges(newTree: Parser.Tree, changedNodes: Parser.Range[], newRanges: Parser.Range[]): RangeChange[] {
let newRangeIndex = 0;
const mergedChanges: RangeChange[] = [];
// Find the parent in the new tree of the changed node
for (let nodeIndex = 0; nodeIndex < changedNodes.length; nodeIndex++) {
const node = changedNodes[nodeIndex];
if (mergedChanges.length > 0) {
if ((node.startIndex >= mergedChanges[mergedChanges.length - 1].newRangeStartOffset) && (node.endIndex <= mergedChanges[mergedChanges.length - 1].newRangeEndOffset)) {
// This node is within the previous range, skip it
continue;
}
}
const cursor = newTree.walk();
const cursorContainersNode = () => cursor.startIndex < node.startIndex && cursor.endIndex > node.endIndex;
while (cursorContainersNode()) {
// See if we can go to a child
let child = cursor.gotoFirstChild();
let foundChild = false;
while (child) {
if (cursorContainersNode() && cursor.currentNode.isNamed) {
foundChild = true;
break;
} else {
child = cursor.gotoNextSibling();
}
}
if (!foundChild) {
cursor.gotoParent();
break;
}
if (cursor.currentNode.childCount === 0) {
break;
}
}
const startPosition = cursor.currentNode.startPosition;
const endPosition = cursor.currentNode.endPosition;
const startIndex = cursor.currentNode.startIndex;
const endIndex = cursor.currentNode.endIndex;
const newChange = { newRange: new Range(startPosition.row + 1, startPosition.column + 1, endPosition.row + 1, endPosition.column + 1), newRangeStartOffset: startIndex, newRangeEndOffset: endIndex };
if ((newRangeIndex < newRanges.length) && rangesIntersect(newRanges[newRangeIndex], { startIndex, endIndex, startPosition, endPosition })) {
// combine the new change with the range
if (newRanges[newRangeIndex].startIndex < newChange.newRangeStartOffset) {
newChange.newRange = newChange.newRange.setStartPosition(newRanges[newRangeIndex].startPosition.row + 1, newRanges[newRangeIndex].startPosition.column + 1);
newChange.newRangeStartOffset = newRanges[newRangeIndex].startIndex;
}
if (newRanges[newRangeIndex].endIndex > newChange.newRangeEndOffset) {
newChange.newRange = newChange.newRange.setEndPosition(newRanges[newRangeIndex].endPosition.row + 1, newRanges[newRangeIndex].endPosition.column + 1);
newChange.newRangeEndOffset = newRanges[newRangeIndex].endIndex;
}
newRangeIndex++;
} else if (newRangeIndex < newRanges.length && newRanges[newRangeIndex].endIndex < newChange.newRangeStartOffset) {
// add the full range to the merged changes
mergedChanges.push({
newRange: new Range(newRanges[newRangeIndex].startPosition.row + 1, newRanges[newRangeIndex].startPosition.column + 1, newRanges[newRangeIndex].endPosition.row + 1, newRanges[newRangeIndex].endPosition.column + 1),
newRangeStartOffset: newRanges[newRangeIndex].startIndex,
newRangeEndOffset: newRanges[newRangeIndex].endIndex
});
}
if ((mergedChanges.length > 0) && (mergedChanges[mergedChanges.length - 1].newRangeEndOffset >= newChange.newRangeStartOffset)) {
// Merge the changes
mergedChanges[mergedChanges.length - 1].newRange = Range.fromPositions(mergedChanges[mergedChanges.length - 1].newRange.getStartPosition(), newChange.newRange.getEndPosition());
mergedChanges[mergedChanges.length - 1].newRangeEndOffset = newChange.newRangeEndOffset;
} else {
mergedChanges.push(newChange);
}
}
return this._constrainRanges(mergedChanges);
}
private _constrainRanges(changes: RangeChange[]): RangeChange[] {
if (!this.ranges) {
return changes;
}
const constrainedChanges: RangeChange[] = [];
let changesIndex = 0;
let rangesIndex = 0;
while (changesIndex < changes.length && rangesIndex < this.ranges.length) {
const change = changes[changesIndex];
const range = this.ranges[rangesIndex];
if (change.newRangeEndOffset < range.startIndex) {
// Change is before the range, move to the next change
changesIndex++;
} else if (change.newRangeStartOffset > range.endIndex) {
// Change is after the range, move to the next range
rangesIndex++;
} else {
// Change is within the range, constrain it
const newRangeStartOffset = Math.max(change.newRangeStartOffset, range.startIndex);
const newRangeEndOffset = Math.min(change.newRangeEndOffset, range.endIndex);
const newRange = change.newRange.intersectRanges(new Range(range.startPosition.row + 1, range.startPosition.column + 1, range.endPosition.row + 1, range.endPosition.column + 1))!;
constrainedChanges.push({
newRange,
newRangeEndOffset,
newRangeStartOffset
});
// Remove the intersected range from the current change
if (newRangeEndOffset < change.newRangeEndOffset) {
change.newRange = Range.fromPositions(newRange.getEndPosition(), change.newRange.getEndPosition());
change.newRangeStartOffset = newRangeEndOffset + 1;
} else {
// Move to the next change
changesIndex++;
}
}
}
return constrainedChanges;
}
private _unfiredChanges: IModelContentChangedEvent[] | undefined;
private _onDidChangeContentQueue: LimitedQueue = new LimitedQueue();
public onDidChangeContent(model: ITextModel, changes: IModelContentChangedEvent[] | undefined, ranges?: Parser.Range[]): void {
const version = model.getVersionId();
if (version === this._editVersion) {
return;
}
let newRanges: Parser.Range[] = [];
if (ranges) {
newRanges = this._setRanges(ranges);
}
if (changes && changes.length > 0) {
if (this._unfiredChanges) {
this._unfiredChanges.push(...changes);
} else {
this._unfiredChanges = changes;
}
for (const change of changes) {
this._applyEdits(change.changes, version);
}
} else {
this._applyEdits([], version);
}
this._onDidChangeContentQueue.queue(async () => {
if (this.isDisposed) {
// No need to continue the queue if we are disposed
return;
}
const oldTree = this._lastFullyParsed;
let changedNodes: Parser.Range[] | undefined;
if (this._lastFullyParsedWithEdits && this._lastFullyParsed) {
changedNodes = this.findChangedNodes(this._lastFullyParsedWithEdits, this._lastFullyParsed);
}
const completed = await this._parseAndUpdateTree(model, version);
if (completed) {
let ranges: RangeChange[] | undefined;
if (!changedNodes) {
if (this._ranges) {
ranges = this._ranges.map(r => ({ newRange: new Range(r.startPosition.row + 1, r.startPosition.column + 1, r.endPosition.row + 1, r.endPosition.column + 1), oldRangeLength: r.endIndex - r.startIndex, newRangeStartOffset: r.startIndex, newRangeEndOffset: r.endIndex }));
} else {
ranges = [{ newRange: model.getFullModelRange(), newRangeStartOffset: 0, newRangeEndOffset: model.getValueLength() }];
}
} else if (oldTree && changedNodes) {
ranges = this.findTreeChanges(completed, changedNodes, newRanges);
}
const changes = this._unfiredChanges ?? [];
this._unfiredChanges = undefined;
this._onDidUpdate.fire({ language: this.languageId, ranges, versionId: version, tree: completed, includedModelChanges: changes });
}
});
}
private _applyEdits(changes: IModelContentChange[], version: number) {
for (const change of changes) {
const originalTextLength = TextLength.ofRange(Range.lift(change.range));
const newTextLength = TextLength.ofText(change.text);
const summedTextLengths = change.text.length === 0 ? newTextLength : originalTextLength.add(newTextLength);
const edit = {
startIndex: change.rangeOffset,
oldEndIndex: change.rangeOffset + change.rangeLength,
newEndIndex: change.rangeOffset + change.text.length,
startPosition: { row: change.range.startLineNumber - 1, column: change.range.startColumn - 1 },
oldEndPosition: { row: change.range.endLineNumber - 1, column: change.range.endColumn - 1 },
newEndPosition: { row: change.range.startLineNumber + summedTextLengths.lineCount - 1, column: summedTextLengths.lineCount ? summedTextLengths.columnCount : (change.range.endColumn + summedTextLengths.columnCount) }
};
this._tree?.edit(edit);
this._lastFullyParsedWithEdits?.edit(edit);
}
this._editVersion = version;
}
private async _parseAndUpdateTree(model: ITextModel, version: number): Promise<Parser.Tree | undefined> {
const tree = await this._parse(model);
if (tree) {
this._tree?.delete();
this._tree = tree;
this._lastFullyParsed?.delete();
this._lastFullyParsed = tree.copy();
this._lastFullyParsedWithEdits?.delete();
this._lastFullyParsedWithEdits = tree.copy();
this._versionId = version;
return tree;
} else if (!this._tree) {
// No tree means this is the initial parse and there were edits
// parse function doesn't handle this well and we can end up with an incorrect tree, so we reset
this.parser.reset();
}
return undefined;
}
private _parse(model: ITextModel): Promise<Parser.Tree | undefined> {
let parseType: TelemetryParseType = TelemetryParseType.Full;
if (this.tree) {
parseType = TelemetryParseType.Incremental;
}
return this._parseAndYield(model, parseType);
}
private async _parseAndYield(model: ITextModel, parseType: TelemetryParseType): Promise<Parser.Tree | undefined> {
let time: number = 0;
let passes: number = 0;
const inProgressVersion = this._editVersion;
let newTree: Parser.Tree | null | undefined;
this._lastYieldTime = performance.now();
do {
const timer = performance.now();
try {
newTree = this.parser.parse((index: number, position?: Parser.Point) => this._parseCallback(model, index), this._tree, { progressCallback: this._parseProgressCallback.bind(this), includedRanges: this._ranges });
} catch (e) {
// parsing can fail when the timeout is reached, will resume upon next loop
} finally {
time += performance.now() - timer;
passes++;
}
// So long as this isn't the initial parse, even if the model changes and edits are applied, the tree parsing will continue correctly after the await.
await new Promise<void>(resolve => setTimeout0(resolve));
} while (!model.isDisposed() && !this.isDisposed && !newTree && inProgressVersion === model.getVersionId());
this.sendParseTimeTelemetry(parseType, time, passes);
return (newTree && (inProgressVersion === model.getVersionId())) ? newTree : undefined;
}
private _lastYieldTime: number = 0;
private _parseProgressCallback(state: Parser.ParseState) {
const now = performance.now();
if (now - this._lastYieldTime > 50) {
this._lastYieldTime = now;
return true;
}
return false;
}
private _parseCallback(textModel: ITextModel, index: number): string | undefined {
try {
return textModel.getTextBuffer().getNearestChunk(index);
} catch (e) {
this._logService.debug('Error getting chunk for tree-sitter parsing', e);
}
return undefined;
}
private _ranges: Parser.Range[] | undefined;
private _setRanges(newRanges: Parser.Range[]): Parser.Range[] {
const unKnownRanges: Parser.Range[] = [];
// If we have existing ranges, find the parts of the new ranges that are not included in the existing ones
if (this._ranges) {
for (const newRange of newRanges) {
let isFullyIncluded = false;
for (let i = 0; i < this._ranges.length; i++) {
const existingRange = this._ranges[i];
if (rangesEqual(existingRange, newRange) || rangesIntersect(existingRange, newRange)) {
isFullyIncluded = true;
break;
}
}
if (!isFullyIncluded) {
unKnownRanges.push(newRange);
}
}
} else {
// No existing ranges, all new ranges are unknown
unKnownRanges.push(...newRanges);
}
this._ranges = newRanges;
return unKnownRanges;
}
get ranges(): Parser.Range[] | undefined {
return this._ranges;
}
private sendParseTimeTelemetry(parseType: TelemetryParseType, time: number, passes: number): void {
this._logService.debug(`Tree parsing (${parseType}) took ${time} ms and ${passes} passes.`);
type ParseTimeClassification = {
owner: 'alexr00';
comment: 'Used to understand how long it takes to parse a tree-sitter tree';
languageId: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; comment: 'The programming language ID.' };
time: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; isMeasurement: true; comment: 'The ms it took to parse' };
passes: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; isMeasurement: true; comment: 'The number of passes it took to parse' };
};
if (parseType === TelemetryParseType.Full) {
this._telemetryService.publicLog2<{ languageId: string; time: number; passes: number }, ParseTimeClassification>(`treeSitter.fullParse`, { languageId: this.languageId, time, passes });
} else {
this._telemetryService.publicLog2<{ languageId: string; time: number; passes: number }, ParseTimeClassification>(`treeSitter.incrementalParse`, { languageId: this.languageId, time, passes });
}
}
}
function rangesEqual(a: Parser.Range, b: Parser.Range) {
return (a.startPosition.row === b.startPosition.row)
&& (a.startPosition.column === b.startPosition.column)
&& (a.endPosition.row === b.endPosition.row)
&& (a.endPosition.column === b.endPosition.column)
&& (a.startIndex === b.startIndex)
&& (a.endIndex === b.endIndex);
}
function rangesIntersect(a: Parser.Range, b: Parser.Range) {
return (a.startIndex <= b.startIndex && a.endIndex >= b.startIndex) ||
(b.startIndex <= a.startIndex && b.endIndex >= a.startIndex);
}

View File

@ -1,147 +0,0 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import type * as Parser from '@vscode/tree-sitter-wasm';
import { AppResourcePath, FileAccess, nodeModulesAsarUnpackedPath, nodeModulesPath } from '../../../../base/common/network.js';
import { EDITOR_EXPERIMENTAL_PREFER_TREESITTER, ITreeSitterImporter } from '../treeSitterParserService.js';
import { Disposable } from '../../../../base/common/lifecycle.js';
import { IFileService } from '../../../../platform/files/common/files.js';
import { canASAR } from '../../../../amdX.js';
import { Emitter, Event } from '../../../../base/common/event.js';
import { IEnvironmentService } from '../../../../platform/environment/common/environment.js';
import { PromiseResult } from '../../../../base/common/observable.js';
import { IConfigurationService } from '../../../../platform/configuration/common/configuration.js';
export const MODULE_LOCATION_SUBPATH = `@vscode/tree-sitter-wasm/wasm`;
export function getModuleLocation(environmentService: IEnvironmentService): AppResourcePath {
return `${(canASAR && environmentService.isBuilt) ? nodeModulesAsarUnpackedPath : nodeModulesPath}/${MODULE_LOCATION_SUBPATH}`;
}
export class TreeSitterLanguages extends Disposable {
private _languages: AsyncCache<string, Parser.Language | undefined> = new AsyncCache();
public /*exposed for tests*/ readonly _onDidAddLanguage: Emitter<{ id: string; language: Parser.Language }> = this._register(new Emitter());
/**
* If you're looking for a specific language, make sure to check if it already exists with `getLanguage` as it will kick off the process to add it if it doesn't exist.
*/
public readonly onDidAddLanguage: Event<{ id: string; language: Parser.Language }> = this._onDidAddLanguage.event;
constructor(private readonly _treeSitterImporter: ITreeSitterImporter,
private readonly _fileService: IFileService,
private readonly _environmentService: IEnvironmentService,
configurationService: IConfigurationService,
private readonly _registeredLanguages: Map<string, string>,
) {
super();
this._register(configurationService.onDidChangeConfiguration(e => {
if (e.affectsConfiguration(EDITOR_EXPERIMENTAL_PREFER_TREESITTER)) {
for (const language of this._languages.keys()) {
if (e.affectsConfiguration(`${EDITOR_EXPERIMENTAL_PREFER_TREESITTER}.${language}`)) {
if (this._languages.getSyncIfCached(language) === undefined) {
this._languages.delete(language);
}
}
}
}
}));
}
public getOrInitLanguage(languageId: string): Parser.Language | undefined {
if (this._languages.isCached(languageId)) {
return this._languages.getSyncIfCached(languageId);
} else {
// kick off adding the language, but don't wait
this._addLanguage(languageId);
return undefined;
}
}
public async getLanguage(languageId: string): Promise<Parser.Language | undefined> {
if (this._languages.isCached(languageId)) {
return this._languages.getSyncIfCached(languageId);
} else {
await this._addLanguage(languageId);
return this._languages.get(languageId);
}
}
private async _addLanguage(languageId: string): Promise<void> {
const languagePromise = this._languages.get(languageId);
if (!languagePromise) {
this._languages.set(languageId, this._fetchLanguage(languageId));
const language = await this._languages.get(languageId);
if (!language) {
return undefined;
}
this._onDidAddLanguage.fire({ id: languageId, language });
}
}
private async _fetchLanguage(languageId: string): Promise<Parser.Language | undefined> {
const grammarName = this._registeredLanguages.get(languageId);
const languageLocation = this._getLanguageLocation(languageId);
if (!grammarName || !languageLocation) {
return undefined;
}
const wasmPath: AppResourcePath = `${languageLocation}/${grammarName}.wasm`;
const languageFile = await (this._fileService.readFile(FileAccess.asFileUri(wasmPath)));
const Language = await this._treeSitterImporter.getLanguageClass();
return Language.load(languageFile.value.buffer);
}
private _getLanguageLocation(languageId: string): AppResourcePath | undefined {
const grammarName = this._registeredLanguages.get(languageId);
if (!grammarName) {
return undefined;
}
return getModuleLocation(this._environmentService);
}
}
class AsyncCache<TKey, T> {
private readonly _values = new Map<TKey, PromiseWithSyncAccess<T>>();
set(key: TKey, promise: Promise<T>) {
this._values.set(key, new PromiseWithSyncAccess(promise));
}
get(key: TKey): Promise<T> | undefined {
return this._values.get(key)?.promise;
}
getSyncIfCached(key: TKey): T | undefined {
return this._values.get(key)?.result?.data;
}
isCached(key: TKey): boolean {
return this._values.get(key)?.result !== undefined;
}
delete(key: TKey) {
return this._values.delete(key);
}
keys() {
return this._values.keys();
}
}
class PromiseWithSyncAccess<T> {
private _result: PromiseResult<T> | undefined;
/**
* Returns undefined if the promise did not resolve yet.
*/
get result(): PromiseResult<T> | undefined {
return this._result;
}
constructor(public readonly promise: Promise<T>) {
promise.then(result => {
this._result = new PromiseResult(result, undefined);
}).catch(e => {
this._result = new PromiseResult<T>(undefined, e);
});
}
}

View File

@ -0,0 +1,31 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import type { Language, Parser, Query } from '@vscode/tree-sitter-wasm';
import { createDecorator } from '../../../../platform/instantiation/common/instantiation.js';
import { IReader } from '../../../../base/common/observable.js';
export const ITreeSitterLibraryService = createDecorator<ITreeSitterLibraryService>('treeSitterLibraryService');
export interface ITreeSitterLibraryService {
readonly _serviceBrand: undefined;
getParserClass(): Promise<typeof Parser>;
supportsLanguage(languageId: string, reader: IReader | undefined): boolean;
getLanguage(languageId: string, reader: IReader | undefined): Language | undefined;
/**
* Return value of null indicates that there are no injection queries for this language.
* @param languageId
* @param reader
*/
getInjectionQueries(languageId: string, reader: IReader | undefined): Query | null | undefined;
/**
* Return value of null indicates that there are no highlights queries for this language.
* @param languageId
* @param reader
*/
getHighlightingQueries(languageId: string, reader: IReader | undefined): Query | null | undefined;
}

View File

@ -1,201 +0,0 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import type * as Parser from '@vscode/tree-sitter-wasm';
import { AppResourcePath, FileAccess } from '../../../../base/common/network.js';
import { EDITOR_EXPERIMENTAL_PREFER_TREESITTER, ITreeSitterParserService, ITextModelTreeSitter, TreeUpdateEvent, ITreeSitterImporter, TREESITTER_ALLOWED_SUPPORT, ModelTreeUpdateEvent } from '../treeSitterParserService.js';
import { IModelService } from '../model.js';
import { Disposable, DisposableMap, DisposableStore } from '../../../../base/common/lifecycle.js';
import { ITextModel } from '../../model.js';
import { IFileService } from '../../../../platform/files/common/files.js';
import { IConfigurationService } from '../../../../platform/configuration/common/configuration.js';
import { Emitter, Event } from '../../../../base/common/event.js';
import { IEnvironmentService } from '../../../../platform/environment/common/environment.js';
import { TextModelTreeSitter, TextModelTreeSitterItem } from './textModelTreeSitter.js';
import { getModuleLocation, TreeSitterLanguages } from './treeSitterLanguages.js';
import { IInstantiationService } from '../../../../platform/instantiation/common/instantiation.js';
const EDITOR_TREESITTER_TELEMETRY = 'editor.experimental.treeSitterTelemetry';
const FILENAME_TREESITTER_WASM = `tree-sitter.wasm`;
export class TreeSitterTextModelService extends Disposable implements ITreeSitterParserService {
readonly _serviceBrand: undefined;
private _init!: Promise<boolean>;
private _textModelTreeSitters: DisposableMap<ITextModel, TextModelTreeSitterItem> = this._register(new DisposableMap());
private readonly _registeredLanguages: Map<string, string> = new Map();
private readonly _treeSitterLanguages: TreeSitterLanguages;
public readonly onDidAddLanguage: Event<{ id: string; language: Parser.Language }>;
private _onDidUpdateTree: Emitter<TreeUpdateEvent> = this._register(new Emitter());
public readonly onDidUpdateTree: Event<TreeUpdateEvent> = this._onDidUpdateTree.event;
public isTest: boolean = false;
constructor(@IModelService private readonly _modelService: IModelService,
@IFileService fileService: IFileService,
@IConfigurationService private readonly _configurationService: IConfigurationService,
@IEnvironmentService private readonly _environmentService: IEnvironmentService,
@ITreeSitterImporter private readonly _treeSitterImporter: ITreeSitterImporter,
@IInstantiationService private readonly _instantiationService: IInstantiationService
) {
super();
this._treeSitterLanguages = this._register(new TreeSitterLanguages(this._treeSitterImporter, fileService, this._environmentService, this._configurationService, this._registeredLanguages));
this.onDidAddLanguage = this._treeSitterLanguages.onDidAddLanguage;
this._register(this._configurationService.onDidChangeConfiguration(e => {
if (e.affectsConfiguration(EDITOR_EXPERIMENTAL_PREFER_TREESITTER)) {
this._supportedLanguagesChanged();
}
}));
this._supportedLanguagesChanged();
}
getOrInitLanguage(languageId: string): Parser.Language | undefined {
return this._treeSitterLanguages.getOrInitLanguage(languageId);
}
getParseResult(textModel: ITextModel): ITextModelTreeSitter | undefined {
const textModelTreeSitter = this._textModelTreeSitters.get(textModel);
return textModelTreeSitter?.textModelTreeSitter;
}
/**
* For testing
*/
async getTree(content: string, languageId: string): Promise<Parser.Tree | undefined> {
const language = await this.getLanguage(languageId);
const Parser = await this._treeSitterImporter.getParserClass();
if (language) {
const parser = new Parser();
parser.setLanguage(language);
return parser.parse(content) ?? undefined;
}
return undefined;
}
getTreeSync(content: string, languageId: string): Parser.Tree | undefined {
const language = this.getOrInitLanguage(languageId);
const Parser = this._treeSitterImporter.parserClass;
if (language && Parser) {
const parser = new Parser();
parser.setLanguage(language);
return parser.parse(content) ?? undefined;
}
return undefined;
}
async getLanguage(languageId: string): Promise<Parser.Language | undefined> {
await this._init;
return this._treeSitterLanguages.getLanguage(languageId);
}
private async _doInitParser() {
const Parser = await this._treeSitterImporter.getParserClass();
const environmentService = this._environmentService;
const isTest = this.isTest;
await Parser.init({
locateFile(_file: string, _folder: string) {
const location: AppResourcePath = `${getModuleLocation(environmentService)}/${FILENAME_TREESITTER_WASM}`;
if (isTest) {
return FileAccess.asFileUri(location).toString(true);
} else {
return FileAccess.asBrowserUri(location).toString(true);
}
}
});
return true;
}
private _hasInit: boolean = false;
private async _initParser(hasLanguages: boolean): Promise<boolean> {
if (this._hasInit) {
return this._init;
}
if (hasLanguages) {
this._hasInit = true;
this._init = this._doInitParser();
// New init, we need to deal with all the existing text models and set up listeners
this._init.then(() => this._registerModelServiceListeners());
} else {
this._init = Promise.resolve(false);
}
return this._init;
}
private async _supportedLanguagesChanged() {
let hasLanguages = false;
const handleLanguage = (languageId: string) => {
if (this._getSetting(languageId)) {
hasLanguages = true;
this._addGrammar(languageId, `tree-sitter-${languageId}`);
} else {
this._removeGrammar(languageId);
}
};
// Eventually, this should actually use an extension point to add tree sitter grammars, but for now they are hard coded in core
for (const languageId of TREESITTER_ALLOWED_SUPPORT) {
handleLanguage(languageId);
}
return this._initParser(hasLanguages);
}
private _getSetting(languageId: string): boolean {
const setting = this._configurationService.getValue<boolean>(`${EDITOR_EXPERIMENTAL_PREFER_TREESITTER}.${languageId}`);
if (!setting && TREESITTER_ALLOWED_SUPPORT.includes(languageId)) {
return this._configurationService.getValue<boolean>(EDITOR_TREESITTER_TELEMETRY);
}
return setting;
}
private async _registerModelServiceListeners() {
this._register(this._modelService.onModelAdded(model => {
this._createTextModelTreeSitter(model);
}));
this._register(this._modelService.onModelRemoved(model => {
this._textModelTreeSitters.deleteAndDispose(model);
}));
this._modelService.getModels().forEach(model => this._createTextModelTreeSitter(model));
}
public async getTextModelTreeSitter(model: ITextModel, parseImmediately: boolean = false): Promise<ITextModelTreeSitter> {
await this.getLanguage(model.getLanguageId());
return this._createTextModelTreeSitter(model, parseImmediately);
}
private _createTextModelTreeSitter(model: ITextModel, parseImmediately: boolean = true): ITextModelTreeSitter {
const textModelTreeSitter = this._instantiationService.createInstance(TextModelTreeSitter, model, this._treeSitterLanguages, parseImmediately);
const disposables = new DisposableStore();
disposables.add(textModelTreeSitter);
disposables.add(textModelTreeSitter.onDidChangeParseResult((e) => this._handleOnDidChangeParseResult(e, model)));
this._textModelTreeSitters.set(model, {
textModelTreeSitter,
disposables,
dispose: disposables.dispose.bind(disposables)
});
return textModelTreeSitter;
}
private _handleOnDidChangeParseResult(change: ModelTreeUpdateEvent, model: ITextModel) {
this._onDidUpdateTree.fire({ textModel: model, ranges: change.ranges, versionId: change.versionId, tree: change.tree, languageId: change.languageId, hasInjections: change.hasInjections });
}
private _addGrammar(languageId: string, grammarName: string) {
if (!this._registeredLanguages.has(languageId)) {
this._registeredLanguages.set(languageId, grammarName);
}
}
private _removeGrammar(languageId: string) {
if (this._registeredLanguages.has(languageId)) {
this._registeredLanguages.delete(languageId);
}
}
}

View File

@ -0,0 +1,16 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { createDecorator } from '../../../../platform/instantiation/common/instantiation.js';
import { IObservable, IReader } from '../../../../base/common/observable.js';
export const ITreeSitterThemeService = createDecorator<ITreeSitterThemeService>('treeSitterThemeService');
export interface ITreeSitterThemeService {
readonly _serviceBrand: undefined;
readonly onChange: IObservable<void>;
findMetadata(captureNames: string[], languageId: number, bracket: boolean, reader: IReader | undefined): number;
}

View File

@ -1,135 +0,0 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import type * as Parser from '@vscode/tree-sitter-wasm';
import { Event } from '../../../base/common/event.js';
import { ITextModel } from '../model.js';
import { createDecorator } from '../../../platform/instantiation/common/instantiation.js';
import { Range } from '../core/range.js';
import { importAMDNodeModule } from '../../../amdX.js';
import { IModelContentChangedEvent } from '../textModelEvents.js';
export const EDITOR_EXPERIMENTAL_PREFER_TREESITTER = 'editor.experimental.preferTreeSitter';
export const TREESITTER_ALLOWED_SUPPORT = ['css', 'typescript', 'ini', 'regex'];
export const ITreeSitterParserService = createDecorator<ITreeSitterParserService>('treeSitterParserService');
export interface RangeWithOffsets {
range: Range;
startOffset: number;
endOffset: number;
}
export interface RangeChange {
newRange: Range;
newRangeStartOffset: number;
newRangeEndOffset: number;
}
export interface TreeParseUpdateEvent {
ranges: RangeChange[] | undefined;
language: string;
versionId: number;
tree: Parser.Tree;
includedModelChanges: IModelContentChangedEvent[];
}
export interface ModelTreeUpdateEvent {
ranges: RangeChange[];
versionId: number;
tree: ITextModelTreeSitter;
languageId: string;
hasInjections: boolean;
}
export interface TreeUpdateEvent extends ModelTreeUpdateEvent {
textModel: ITextModel;
}
export interface ITreeSitterParserService {
readonly _serviceBrand: undefined;
onDidAddLanguage: Event<{ id: string; language: Parser.Language }>;
getOrInitLanguage(languageId: string): Parser.Language | undefined;
getLanguage(languageId: string): Promise<Parser.Language | undefined>;
getParseResult(textModel: ITextModel): ITextModelTreeSitter | undefined;
getTree(content: string, languageId: string): Promise<Parser.Tree | undefined>;
getTreeSync(content: string, languageId: string): Parser.Tree | undefined;
onDidUpdateTree: Event<TreeUpdateEvent>;
/**
* For testing purposes so that the time to parse can be measured.
*/
getTextModelTreeSitter(model: ITextModel, parseImmediately?: boolean): Promise<ITextModelTreeSitter | undefined>;
}
export interface ITreeSitterParseResult {
readonly tree: Parser.Tree | undefined;
readonly language: Parser.Language;
readonly languageId: string;
readonly ranges: Parser.Range[] | undefined;
versionId: number;
}
export interface ITextModelTreeSitter {
/**
* For testing purposes so that the time to parse can be measured.
*/
parse(languageId?: string): Promise<ITreeSitterParseResult | undefined>;
textModel: ITextModel;
parseResult: ITreeSitterParseResult | undefined;
getInjection(offset: number, parentLanguage: string): ITreeSitterParseResult | undefined;
dispose(): void;
}
export const ITreeSitterImporter = createDecorator<ITreeSitterImporter>('treeSitterImporter');
export interface ITreeSitterImporter {
readonly _serviceBrand: undefined;
getParserClass(): Promise<typeof Parser.Parser>;
readonly parserClass: typeof Parser.Parser | undefined;
getLanguageClass(): Promise<typeof Parser.Language>;
getQueryClass(): Promise<typeof Parser.Query>;
}
export class TreeSitterImporter implements ITreeSitterImporter {
readonly _serviceBrand: undefined;
private _treeSitterImport: typeof import('@vscode/tree-sitter-wasm') | undefined;
constructor() { }
private async _getTreeSitterImport() {
if (!this._treeSitterImport) {
this._treeSitterImport = await importAMDNodeModule<typeof import('@vscode/tree-sitter-wasm')>('@vscode/tree-sitter-wasm', 'wasm/tree-sitter.js');
}
return this._treeSitterImport;
}
get parserClass() {
return this._parserClass;
}
private _parserClass: typeof Parser.Parser | undefined;
public async getParserClass() {
if (!this._parserClass) {
this._parserClass = (await this._getTreeSitterImport()).Parser;
}
return this._parserClass;
}
private _languageClass: typeof Parser.Language | undefined;
public async getLanguageClass() {
if (!this._languageClass) {
this._languageClass = (await this._getTreeSitterImport()).Language;
}
return this._languageClass;
}
private _queryClass: typeof Parser.Query | undefined;
public async getQueryClass() {
if (!this._queryClass) {
this._queryClass = (await this._getTreeSitterImport()).Query;
}
return this._queryClass;
}
}

View File

@ -36,6 +36,8 @@ import { TestNotificationService } from '../../../../../platform/notification/te
import { ColorScheme } from '../../../../../platform/theme/common/theme.js';
import { TestColorTheme, TestThemeService } from '../../../../../platform/theme/test/common/testThemeService.js';
import { UndoRedoService } from '../../../../../platform/undoRedo/common/undoRedoService.js';
import { ITreeSitterLibraryService } from '../../../../common/services/treeSitter/treeSitterLibraryService.js';
import { TestTreeSitterLibraryService } from '../../../../test/common/services/testTreeSitterLibraryService.js';
suite('ModelSemanticColoring', () => {
@ -55,6 +57,7 @@ suite('ModelSemanticColoring', () => {
const instantiationService = new TestInstantiationService();
instantiationService.set(ILanguageService, languageService);
instantiationService.set(ILanguageConfigurationService, new TestLanguageConfigurationService());
instantiationService.set(ITreeSitterLibraryService, new TestTreeSitterLibraryService());
modelService = disposables.add(new ModelService(
configService,
new TestTextResourcePropertiesService(configService),

View File

@ -96,9 +96,9 @@ import { onUnexpectedError } from '../../../base/common/errors.js';
import { ExtensionKind, IEnvironmentService, IExtensionHostDebugParams } from '../../../platform/environment/common/environment.js';
import { mainWindow } from '../../../base/browser/window.js';
import { ResourceMap } from '../../../base/common/map.js';
import { ITreeSitterParserService } from '../../common/services/treeSitterParserService.js';
import { StandaloneTreeSitterParserService } from './standaloneTreeSitterService.js';
import { IWebWorkerDescriptor } from '../../../base/browser/webWorkerFactory.js';
import { ITreeSitterLibraryService } from '../../common/services/treeSitter/treeSitterLibraryService.js';
import { StandaloneTreeSitterLibraryService } from './standaloneTreeSitterLibraryService.js';
class SimpleModel implements IResolvedTextEditorModel {
@ -1162,7 +1162,7 @@ registerSingleton(IClipboardService, BrowserClipboardService, InstantiationType.
registerSingleton(IContextMenuService, StandaloneContextMenuService, InstantiationType.Eager);
registerSingleton(IMenuService, MenuService, InstantiationType.Eager);
registerSingleton(IAccessibilitySignalService, StandaloneAccessbilitySignalService, InstantiationType.Eager);
registerSingleton(ITreeSitterParserService, StandaloneTreeSitterParserService, InstantiationType.Eager);
registerSingleton(ITreeSitterLibraryService, StandaloneTreeSitterLibraryService, InstantiationType.Eager);
/**
* We don't want to eagerly instantiate services because embedders get a one time chance

View File

@ -0,0 +1,40 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import type { Parser, Language, Query } from '@vscode/tree-sitter-wasm';
import { IReader } from '../../../base/common/observable.js';
import { ITreeSitterLibraryService } from '../../../editor/common/services/treeSitter/treeSitterLibraryService.js';
export class StandaloneTreeSitterLibraryService implements ITreeSitterLibraryService {
readonly _serviceBrand: undefined;
getParserClass(): Promise<typeof Parser> {
throw new Error('getParserClass is not implemented in StandaloneTreeSitterLibraryService');
}
supportsLanguage(languageId: string, reader: IReader | undefined): boolean {
return false;
}
getLanguage(languageId: string, reader: IReader | undefined): Language | undefined {
return undefined;
}
/**
* Return value of null indicates that there are no injection queries for this language.
* @param languageId
* @param reader
*/
getInjectionQueries(languageId: string, reader: IReader | undefined): Query | null | undefined {
return null;
}
/**
* Return value of null indicates that there are no highlights queries for this language.
* @param languageId
* @param reader
*/
getHighlightingQueries(languageId: string, reader: IReader | undefined): Query | null | undefined {
return null;
}
}

View File

@ -1,38 +0,0 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import type * as Parser from '@vscode/tree-sitter-wasm';
import { Event } from '../../../base/common/event.js';
import { ITextModel } from '../../common/model.js';
import { ITextModelTreeSitter, ITreeSitterParserService, TreeUpdateEvent } from '../../common/services/treeSitterParserService.js';
/**
* The monaco build doesn't like the dynamic import of tree sitter in the real service.
* We use a dummy service here to make the build happy.
*/
export class StandaloneTreeSitterParserService implements ITreeSitterParserService {
async getLanguage(languageId: string): Promise<Parser.Language | undefined> {
return undefined;
}
getTreeSync(content: string, languageId: string): Parser.Tree | undefined {
return undefined;
}
async getTextModelTreeSitter(model: ITextModel, parseImmediately?: boolean): Promise<ITextModelTreeSitter | undefined> {
return undefined;
}
async getTree(content: string, languageId: string): Promise<Parser.Tree | undefined> {
return undefined;
}
onDidUpdateTree: Event<TreeUpdateEvent> = Event.None;
readonly _serviceBrand: undefined;
onDidAddLanguage: Event<{ id: string; language: Parser.Language }> = Event.None;
getOrInitLanguage(_languageId: string): Parser.Language | undefined {
return undefined;
}
getParseResult(textModel: ITextModel): ITextModelTreeSitter | undefined {
return undefined;
}
}

View File

@ -1,180 +0,0 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import assert from 'assert';
import { ensureNoDisposablesAreLeakedInTestSuite } from '../../../../base/test/common/utils.js';
import type * as Parser from '@vscode/tree-sitter-wasm';
import { createTextModel } from '../../common/testTextModel.js';
import { timeout } from '../../../../base/common/async.js';
import { ConsoleMainLogger, ILogService } from '../../../../platform/log/common/log.js';
import { ITelemetryService } from '../../../../platform/telemetry/common/telemetry.js';
import { LogService } from '../../../../platform/log/common/logService.js';
import { mock } from '../../../../base/test/common/mock.js';
import { ITreeSitterImporter } from '../../../common/services/treeSitterParserService.js';
import { TextModelTreeSitter } from '../../../common/services/treeSitter/textModelTreeSitter.js';
import { TreeSitterLanguages } from '../../../common/services/treeSitter/treeSitterLanguages.js';
import { TestConfigurationService } from '../../../../platform/configuration/test/common/testConfigurationService.js';
class MockParser implements Parser.Parser {
language: Parser.Language | null = null;
delete(): void { }
setLanguage(language: Parser.Language | null) { return this; }
parse(callback: string | Parser.ParseCallback, oldTree?: Parser.Tree | null, options?: Parser.ParseOptions): Parser.Tree | null {
return new MockTree();
}
reset(): void { }
getIncludedRanges(): Parser.Range[] {
return [];
}
getTimeoutMicros(): number { return 0; }
setTimeoutMicros(timeout: number): void { }
setLogger(callback: Parser.LogCallback | boolean | null): this {
throw new Error('Method not implemented.');
}
getLogger(): Parser.LogCallback | null {
throw new Error('Method not implemented.');
}
}
class MockTreeSitterImporter implements ITreeSitterImporter {
_serviceBrand: undefined;
async getParserClass(): Promise<typeof Parser.Parser> {
return MockParser as any;
}
async getLanguageClass(): Promise<typeof Parser.Language> {
return MockLanguage as any;
}
async getQueryClass(): Promise<typeof Parser.Query> {
throw new Error('Method not implemented.');
}
parserClass = MockParser as any;
}
class MockTree implements Parser.Tree {
language: Parser.Language = new MockLanguage();
editorLanguage: string = '';
editorContents: string = '';
rootNode: Parser.Node = {} as any;
rootNodeWithOffset(offsetBytes: number, offsetExtent: Parser.Point): Parser.Node {
throw new Error('Method not implemented.');
}
copy(): Parser.Tree {
throw new Error('Method not implemented.');
}
delete(): void { }
edit(edit: Parser.Edit): Parser.Tree {
return this;
}
walk(): Parser.TreeCursor {
throw new Error('Method not implemented.');
}
getChangedRanges(other: Parser.Tree): Parser.Range[] {
throw new Error('Method not implemented.');
}
getIncludedRanges(): Parser.Range[] {
throw new Error('Method not implemented.');
}
getEditedRange(other: Parser.Tree): Parser.Range {
throw new Error('Method not implemented.');
}
getLanguage(): Parser.Language {
throw new Error('Method not implemented.');
}
}
class MockLanguage implements Parser.Language {
types: string[] = [];
fields: (string | null)[] = [];
get name(): string | null {
throw new Error('Method not implemented.');
}
get abiVersion(): number {
throw new Error('Method not implemented.');
}
get metadata(): Parser.LanguageMetadata | null {
throw new Error('Method not implemented.');
}
get supertypes(): number[] {
throw new Error('Method not implemented.');
}
subtypes(supertype: number): number[] {
throw new Error('Method not implemented.');
}
version: number = 0;
fieldCount: number = 0;
stateCount: number = 0;
nodeTypeCount: number = 0;
fieldNameForId(fieldId: number): string | null {
throw new Error('Method not implemented.');
}
fieldIdForName(fieldName: string): number | null {
throw new Error('Method not implemented.');
}
idForNodeType(type: string, named: boolean): number {
throw new Error('Method not implemented.');
}
nodeTypeForId(typeId: number): string | null {
throw new Error('Method not implemented.');
}
nodeTypeIsNamed(typeId: number): boolean {
throw new Error('Method not implemented.');
}
nodeTypeIsVisible(typeId: number): boolean {
throw new Error('Method not implemented.');
}
nextState(stateId: number, typeId: number): number {
throw new Error('Method not implemented.');
}
query(source: string): Parser.Query {
throw new Error('Method not implemented.');
}
lookaheadIterator(stateId: number): Parser.LookaheadIterator | null {
throw new Error('Method not implemented.');
}
languageId: string = '';
}
suite('TreeSitterParserService', function () {
const treeSitterImporter: ITreeSitterImporter = new MockTreeSitterImporter();
let logService: ILogService;
let telemetryService: ITelemetryService;
setup(function () {
logService = new LogService(new ConsoleMainLogger());
telemetryService = new class extends mock<ITelemetryService>() {
override async publicLog2() {
//
}
};
});
const store = ensureNoDisposablesAreLeakedInTestSuite();
test('TextModelTreeSitter race condition: first language is slow to load', async function () {
class MockTreeSitterLanguages extends TreeSitterLanguages {
private async _fetchJavascript(): Promise<void> {
await timeout(200);
const language = new MockLanguage();
language.languageId = 'javascript';
this._onDidAddLanguage.fire({ id: 'javascript', language });
}
public override getOrInitLanguage(languageId: string): Parser.Language | undefined {
if (languageId === 'javascript') {
this._fetchJavascript();
return undefined;
}
const language = new MockLanguage();
language.languageId = languageId;
return language;
}
}
const mockConfigurationService = new TestConfigurationService();
const treeSitterLanguages: TreeSitterLanguages = store.add(new MockTreeSitterLanguages(treeSitterImporter, {} as any, { isBuilt: false } as any, mockConfigurationService, new Map()));
const textModel = store.add(createTextModel('console.log("Hello, world!");', 'javascript'));
const textModelTreeSitter = store.add(new TextModelTreeSitter(textModel, treeSitterLanguages, false, treeSitterImporter, logService, telemetryService, { exists: async () => false } as any));
textModel.setLanguage('typescript');
await timeout(300);
assert.strictEqual((textModelTreeSitter.parseResult?.language as MockLanguage).languageId, 'typescript');
});
});

View File

@ -23,11 +23,9 @@ import { LanguageService } from '../../common/services/languageService.js';
import { IModelService } from '../../common/services/model.js';
import { ModelService } from '../../common/services/modelService.js';
import { ITextResourcePropertiesService } from '../../common/services/textResourceConfiguration.js';
import { ITreeSitterParserService } from '../../common/services/treeSitterParserService.js';
import { ViewModel } from '../../common/viewModel/viewModelImpl.js';
import { TestConfiguration } from './config/testConfiguration.js';
import { TestCodeEditorService, TestCommandService } from './editorTestServices.js';
import { TestTreeSitterParserService } from '../common/services/testTreeSitterService.js';
import { TestLanguageConfigurationService } from '../common/modes/testLanguageConfigurationService.js';
import { TestEditorWorkerService } from '../common/services/testEditorWorkerService.js';
import { TestTextResourcePropertiesService } from '../common/services/testTextResourcePropertiesService.js';
@ -61,6 +59,8 @@ import { IThemeService } from '../../../platform/theme/common/themeService.js';
import { TestThemeService } from '../../../platform/theme/test/common/testThemeService.js';
import { IUndoRedoService } from '../../../platform/undoRedo/common/undoRedo.js';
import { UndoRedoService } from '../../../platform/undoRedo/common/undoRedoService.js';
import { ITreeSitterLibraryService } from '../../common/services/treeSitter/treeSitterLibraryService.js';
import { TestTreeSitterLibraryService } from '../common/services/testTreeSitterLibraryService.js';
export interface ITestCodeEditor extends IActiveCodeEditor {
getViewModel(): ViewModel | undefined;
@ -220,7 +220,7 @@ export function createCodeEditorServices(disposables: Pick<DisposableStore, 'add
});
define(ILanguageFeatureDebounceService, LanguageFeatureDebounceService);
define(ILanguageFeaturesService, LanguageFeaturesService);
define(ITreeSitterParserService, TestTreeSitterParserService);
define(ITreeSitterLibraryService, TestTreeSitterLibraryService);
const instantiationService = disposables.add(new TestInstantiationService(services, true));
disposables.add(toDisposable(() => {

View File

@ -6,7 +6,7 @@
import assert from 'assert';
import { ensureNoDisposablesAreLeakedInTestSuite } from '../../../../base/test/common/utils.js';
import { TextModel } from '../../../common/model/textModel.js';
import { TokenQuality, TokenStore } from '../../../common/model/tokenStore.js';
import { TokenQuality, TokenStore } from '../../../common/model/tokens/treeSitter/tokenStore.js';
suite('TokenStore', () => {
let textModel: TextModel;

View File

@ -0,0 +1,32 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import type { Parser, Language, Query } from '@vscode/tree-sitter-wasm';
import { IReader } from '../../../../base/common/observable.js';
import { ITreeSitterLibraryService } from '../../../../editor/common/services/treeSitter/treeSitterLibraryService.js';
export class TestTreeSitterLibraryService implements ITreeSitterLibraryService {
readonly _serviceBrand: undefined;
getParserClass(): Promise<typeof Parser> {
throw new Error('getParserClass is not implemented in TestTreeSitterLibraryService');
}
supportsLanguage(languageId: string, reader: IReader | undefined): boolean {
return false;
}
getLanguage(languageId: string, reader: IReader | undefined): Language | undefined {
return undefined;
}
getInjectionQueries(languageId: string, reader: IReader | undefined): Query | null | undefined {
return null;
}
getHighlightingQueries(languageId: string, reader: IReader | undefined): Query | null | undefined {
return null;
}
}

View File

@ -1,37 +0,0 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import type * as Parser from '@vscode/tree-sitter-wasm';
import { Event } from '../../../../base/common/event.js';
import { ITextModel } from '../../../common/model.js';
import { ITreeSitterParserService, ITextModelTreeSitter, TreeUpdateEvent } from '../../../common/services/treeSitterParserService.js';
export class TestTreeSitterParserService implements ITreeSitterParserService {
getLanguage(languageId: string): Promise<Parser.Language | undefined> {
throw new Error('Method not implemented.');
}
getTreeSync(content: string, languageId: string): Parser.Tree | undefined {
throw new Error('Method not implemented.');
}
async getTextModelTreeSitter(model: ITextModel, parseImmediately?: boolean): Promise<ITextModelTreeSitter> {
throw new Error('Method not implemented.');
}
getTree(content: string, languageId: string): Promise<Parser.Tree | undefined> {
throw new Error('Method not implemented.');
}
onDidUpdateTree: Event<TreeUpdateEvent> = Event.None;
onDidAddLanguage: Event<{ id: string; language: Parser.Language }> = Event.None;
_serviceBrand: undefined;
getOrInitLanguage(languageId: string): Parser.Language | undefined {
throw new Error('Method not implemented.');
}
waitForLanguage(languageId: string): Promise<Parser.Language | undefined> {
throw new Error('Method not implemented.');
}
getParseResult(textModel: ITextModel): ITextModelTreeSitter | undefined {
throw new Error('Method not implemented.');
}
}

View File

@ -34,8 +34,8 @@ import { ILanguageFeaturesService } from '../../common/services/languageFeatures
import { LanguageFeaturesService } from '../../common/services/languageFeaturesService.js';
import { IEnvironmentService } from '../../../platform/environment/common/environment.js';
import { mock } from '../../../base/test/common/mock.js';
import { ITreeSitterParserService } from '../../common/services/treeSitterParserService.js';
import { TestTreeSitterParserService } from './services/testTreeSitterService.js';
import { ITreeSitterLibraryService } from '../../common/services/treeSitter/treeSitterLibraryService.js';
import { TestTreeSitterLibraryService } from './services/testTreeSitterLibraryService.js';
class TestTextModel extends TextModel {
public registerDisposable(disposable: IDisposable): void {
@ -107,6 +107,7 @@ export function createModelServices(disposables: DisposableStore, services: Serv
[ILanguageFeatureDebounceService, LanguageFeatureDebounceService],
[ILanguageFeaturesService, LanguageFeaturesService],
[IModelService, ModelService],
[ITreeSitterParserService, TestTreeSitterParserService]
[IModelService, ModelService],
[ITreeSitterLibraryService, TestTreeSitterLibraryService],
]));
}

View File

@ -38,6 +38,8 @@ import { TestLanguageConfigurationService } from '../../../../editor/test/common
import { IUndoRedoService } from '../../../../platform/undoRedo/common/undoRedo.js';
import { IQuickDiffModelService } from '../../../contrib/scm/browser/quickDiffModel.js';
import { ITextEditorDiffInformation } from '../../../../platform/editor/common/editor.js';
import { ITreeSitterLibraryService } from '../../../../editor/common/services/treeSitter/treeSitterLibraryService.js';
import { TestTreeSitterLibraryService } from '../../../../editor/test/common/services/testTreeSitterLibraryService.js';
suite('MainThreadDocumentsAndEditors', () => {
@ -70,6 +72,7 @@ suite('MainThreadDocumentsAndEditors', () => {
const instantiationService = new TestInstantiationService();
instantiationService.set(ILanguageService, disposables.add(new LanguageService()));
instantiationService.set(ILanguageConfigurationService, new TestLanguageConfigurationService());
instantiationService.set(ITreeSitterLibraryService, new TestTreeSitterLibraryService());
instantiationService.set(IUndoRedoService, undoRedoService);
modelService = new ModelService(
configService,

View File

@ -59,6 +59,8 @@ import { ILanguageService } from '../../../../editor/common/languages/language.j
import { LanguageService } from '../../../../editor/common/services/languageService.js';
import { ILanguageConfigurationService } from '../../../../editor/common/languages/languageConfigurationRegistry.js';
import { TestLanguageConfigurationService } from '../../../../editor/test/common/modes/testLanguageConfigurationService.js';
import { ITreeSitterLibraryService } from '../../../../editor/common/services/treeSitter/treeSitterLibraryService.js';
import { TestTreeSitterLibraryService } from '../../../../editor/test/common/services/testTreeSitterLibraryService.js';
suite('MainThreadEditors', () => {
@ -103,6 +105,7 @@ suite('MainThreadEditors', () => {
services.set(ICodeEditorService, new TestCodeEditorService(themeService));
services.set(IFileService, new TestFileService());
services.set(IUriIdentityService, new SyncDescriptor(UriIdentityService));
services.set(ITreeSitterLibraryService, new TestTreeSitterLibraryService());
services.set(IEditorService, disposables.add(new TestEditorService()));
services.set(ILifecycleService, new TestLifecycleService());
services.set(IWorkingCopyService, new TestWorkingCopyService());

View File

@ -16,7 +16,7 @@ import { Position } from '../../../../../editor/common/core/position.js';
import { Range } from '../../../../../editor/common/core/range.js';
import { IEditorContribution } from '../../../../../editor/common/editorCommon.js';
import { ITextModel } from '../../../../../editor/common/model.js';
import { SemanticTokensLegend, SemanticTokens, TreeSitterTokenizationRegistry } from '../../../../../editor/common/languages.js';
import { SemanticTokensLegend, SemanticTokens } from '../../../../../editor/common/languages.js';
import { FontStyle, ColorId, StandardTokenType, TokenMetadata } from '../../../../../editor/common/encodedTokenAttributes.js';
import { ILanguageService } from '../../../../../editor/common/languages/language.js';
import { INotificationService } from '../../../../../platform/notification/common/notification.js';
@ -31,8 +31,10 @@ import { IConfigurationService } from '../../../../../platform/configuration/com
import { SEMANTIC_HIGHLIGHTING_SETTING_ID, IEditorSemanticHighlightingOptions } from '../../../../../editor/contrib/semanticTokens/common/semanticTokensConfig.js';
import { Schemas } from '../../../../../base/common/network.js';
import { ILanguageFeaturesService } from '../../../../../editor/common/services/languageFeatures.js';
import { ITextModelTreeSitter, ITreeSitterParserService } from '../../../../../editor/common/services/treeSitterParserService.js';
import type * as Parser from '@vscode/tree-sitter-wasm';
import type * as TreeSitter from '@vscode/tree-sitter-wasm';
import { TreeSitterSyntaxTokenBackend } from '../../../../../editor/common/model/tokens/treeSitter/treeSitterSyntaxTokenBackend.js';
import { TokenizationTextModelPart } from '../../../../../editor/common/model/tokens/tokenizationTextModelPart.js';
import { TreeSitterTree } from '../../../../../editor/common/model/tokens/treeSitter/treeSitterTree.js';
const $ = dom.$;
@ -46,7 +48,6 @@ export class InspectEditorTokensController extends Disposable implements IEditor
private _editor: ICodeEditor;
private _textMateService: ITextMateTokenizationService;
private _treeSitterService: ITreeSitterParserService;
private _themeService: IWorkbenchThemeService;
private _languageService: ILanguageService;
private _notificationService: INotificationService;
@ -57,7 +58,6 @@ export class InspectEditorTokensController extends Disposable implements IEditor
constructor(
editor: ICodeEditor,
@ITextMateTokenizationService textMateService: ITextMateTokenizationService,
@ITreeSitterParserService treeSitterService: ITreeSitterParserService,
@ILanguageService languageService: ILanguageService,
@IWorkbenchThemeService themeService: IWorkbenchThemeService,
@INotificationService notificationService: INotificationService,
@ -67,7 +67,6 @@ export class InspectEditorTokensController extends Disposable implements IEditor
super();
this._editor = editor;
this._textMateService = textMateService;
this._treeSitterService = treeSitterService;
this._themeService = themeService;
this._languageService = languageService;
this._notificationService = notificationService;
@ -96,7 +95,7 @@ export class InspectEditorTokensController extends Disposable implements IEditor
// disable in notebooks
return;
}
this._widget = new InspectEditorTokensWidget(this._editor, this._textMateService, this._treeSitterService, this._languageService, this._themeService, this._notificationService, this._configurationService, this._languageFeaturesService);
this._widget = new InspectEditorTokensWidget(this._editor, this._textMateService, this._languageService, this._themeService, this._notificationService, this._configurationService, this._languageFeaturesService);
}
public stop(): void {
@ -192,7 +191,6 @@ class InspectEditorTokensWidget extends Disposable implements IContentWidget {
private readonly _languageService: ILanguageService;
private readonly _themeService: IWorkbenchThemeService;
private readonly _textMateService: ITextMateTokenizationService;
private readonly _treeSitterService: ITreeSitterParserService;
private readonly _notificationService: INotificationService;
private readonly _configurationService: IConfigurationService;
private readonly _languageFeaturesService: ILanguageFeaturesService;
@ -203,7 +201,6 @@ class InspectEditorTokensWidget extends Disposable implements IContentWidget {
constructor(
editor: IActiveCodeEditor,
textMateService: ITextMateTokenizationService,
treeSitterService: ITreeSitterParserService,
languageService: ILanguageService,
themeService: IWorkbenchThemeService,
notificationService: INotificationService,
@ -216,7 +213,6 @@ class InspectEditorTokensWidget extends Disposable implements IContentWidget {
this._languageService = languageService;
this._themeService = themeService;
this._textMateService = textMateService;
this._treeSitterService = treeSitterService;
this._notificationService = notificationService;
this._configurationService = configurationService;
this._languageFeaturesService = languageFeaturesService;
@ -245,7 +241,7 @@ class InspectEditorTokensWidget extends Disposable implements IContentWidget {
private _beginCompute(position: Position): void {
const grammar = this._textMateService.createTokenizer(this._model.getLanguageId());
const semanticTokens = this._computeSemanticTokens(position);
const tree = this._treeSitterService.getParseResult(this._model);
const treeSitterTree = ((this._model.tokenization as TokenizationTextModelPart).tokens.get() as TreeSitterSyntaxTokenBackend).tree.get();
dom.clearNode(this._domNode);
this._domNode.appendChild(document.createTextNode(nls.localize('inspectTMScopesWidget.loading', "Loading...")));
@ -254,7 +250,7 @@ class InspectEditorTokensWidget extends Disposable implements IContentWidget {
if (this._isDisposed) {
return;
}
this._compute(grammar, semanticTokens, tree, position);
this._compute(grammar, semanticTokens, treeSitterTree, position);
this._domNode.style.maxWidth = `${Math.max(this._editor.getLayoutInfo().width * 0.66, 500)}px`;
this._editor.layoutContentWidget(this);
}, (err) => {
@ -275,7 +271,7 @@ class InspectEditorTokensWidget extends Disposable implements IContentWidget {
return this._themeService.getColorTheme().semanticHighlighting;
}
private _compute(grammar: IGrammar | null, semanticTokens: SemanticTokensResult | null, tree: ITextModelTreeSitter | undefined, position: Position) {
private _compute(grammar: IGrammar | null, semanticTokens: SemanticTokensResult | null, tree: TreeSitterTree | undefined, position: Position) {
const textMateTokenInfo = grammar && this._getTokensAtPosition(grammar, position);
const semanticTokenInfo = semanticTokens && this._getSemanticTokenAtPosition(semanticTokens, position);
const treeSitterTokenInfo = tree && this._getTreeSitterTokenAtPosition(tree, position);
@ -425,8 +421,8 @@ class InspectEditorTokensWidget extends Disposable implements IContentWidget {
$('td.tiw-metadata-value.tiw-metadata-scopes', undefined, ...scopes),
));
const tokenizationSupport = TreeSitterTokenizationRegistry.get(this._model.getLanguageId());
const captures = tokenizationSupport?.captureAtPosition(position.lineNumber, position.column, this._model);
const tokenizationSupport = ((this._model.tokenization as TokenizationTextModelPart).tokens.get() as TreeSitterSyntaxTokenBackend).tokenizationImpl.get();
const captures = tokenizationSupport?.captureAtPosition(position.lineNumber, position.column);
if (captures && captures.length > 0) {
dom.append(tbody, $('tr', undefined,
$('td.tiw-metadata-key', undefined, 'foreground'),
@ -648,11 +644,11 @@ class InspectEditorTokensWidget extends Disposable implements IContentWidget {
return null;
}
private _walkTreeforPosition(cursor: Parser.TreeCursor, pos: Position): Parser.Node | null {
private _walkTreeforPosition(cursor: TreeSitter.TreeCursor, pos: Position): TreeSitter.Node | null {
const offset = this._model.getOffsetAt(pos);
cursor.gotoFirstChild();
let goChild: boolean = false;
let lastGoodNode: Parser.Node | null = null;
let lastGoodNode: TreeSitter.Node | null = null;
do {
if (cursor.currentNode.startIndex <= offset && offset < cursor.currentNode.endIndex) {
goChild = true;
@ -664,22 +660,22 @@ class InspectEditorTokensWidget extends Disposable implements IContentWidget {
return lastGoodNode;
}
private _getTreeSitterTokenAtPosition(textModelTreeSitter: ITextModelTreeSitter, pos: Position): Parser.Node[] | null {
let tree = textModelTreeSitter.parseResult;
if (!tree?.tree) {
return null;
}
const nodes: Parser.Node[] = [];
do {
const cursor = tree.tree.walk();
private _getTreeSitterTokenAtPosition(treeSitterTree: TreeSitterTree | undefined, pos: Position): TreeSitter.Node[] | null {
const nodes: TreeSitter.Node[] = [];
let tree = treeSitterTree?.tree.get();
while (tree) {
const cursor = tree.walk();
const node = this._walkTreeforPosition(cursor, pos);
cursor.delete();
if (node) {
nodes.push(node);
tree = textModelTreeSitter.getInjection(node.startIndex, tree.languageId);
treeSitterTree = treeSitterTree?.getInjectionTrees(node.startIndex, treeSitterTree.languageId);
tree = treeSitterTree?.tree.get();
} else {
tree = undefined;
}
} while (tree?.tree);
}
return nodes.length > 0 ? nodes : null;
}

View File

@ -13,7 +13,7 @@ import { IEditorService } from '../../../services/editor/common/editorService.js
import { EditorResourceAccessor } from '../../../common/editor.js';
import { ITextMateTokenizationService } from '../../../services/textMate/browser/textMateTokenizationFeature.js';
import type { IGrammar, StateStack } from 'vscode-textmate';
import { TokenizationRegistry, TreeSitterTokenizationRegistry } from '../../../../editor/common/languages.js';
import { TokenizationRegistry } from '../../../../editor/common/languages.js';
import { TokenMetadata } from '../../../../editor/common/encodedTokenAttributes.js';
import { ThemeRule, findMatchingThemeRule } from '../../../services/textMate/common/TMHelper.js';
import { Color } from '../../../../base/common/color.js';
@ -21,11 +21,15 @@ import { IFileService } from '../../../../platform/files/common/files.js';
import { basename } from '../../../../base/common/resources.js';
import { Schemas } from '../../../../base/common/network.js';
import { splitLines } from '../../../../base/common/strings.js';
import { ITextModelTreeSitter, ITreeSitterParserService } from '../../../../editor/common/services/treeSitterParserService.js';
import { ColorThemeData, findMetadata } from '../../../services/themes/common/colorThemeData.js';
import { IModelService } from '../../../../editor/common/services/model.js';
import { Event } from '../../../../base/common/event.js';
import { Range } from '../../../../editor/common/core/range.js';
import { TreeSitterTree } from '../../../../editor/common/model/tokens/treeSitter/treeSitterTree.js';
import { TokenizationTextModelPart } from '../../../../editor/common/model/tokens/tokenizationTextModelPart.js';
import { TreeSitterSyntaxTokenBackend } from '../../../../editor/common/model/tokens/treeSitter/treeSitterSyntaxTokenBackend.js';
import { TreeSitterTokenizationImpl } from '../../../../editor/common/model/tokens/treeSitter/treeSitterTokenizationImpl.js';
import { waitForState } from '../../../../base/common/observable.js';
interface IToken {
c: string; // token
@ -99,7 +103,6 @@ class Snapper {
@ILanguageService private readonly languageService: ILanguageService,
@IWorkbenchThemeService private readonly themeService: IWorkbenchThemeService,
@ITextMateTokenizationService private readonly textMateService: ITextMateTokenizationService,
@ITreeSitterParserService private readonly treeSitterParserService: ITreeSitterParserService,
@IModelService private readonly modelService: IModelService,
) {
}
@ -290,14 +293,17 @@ class Snapper {
}
}
private _treeSitterTokenize(textModelTreeSitter: ITextModelTreeSitter, tree: Parser.Tree, languageId: string): IToken[] {
private async _treeSitterTokenize(treeSitterTree: TreeSitterTree, tokenizationModel: TreeSitterTokenizationImpl, languageId: string): Promise<IToken[]> {
const tree = await waitForState(treeSitterTree.tree);
if (!tree) {
return [];
}
const cursor = tree.walk();
cursor.gotoFirstChild();
let cursorResult: boolean = true;
const tokens: IToken[] = [];
const tokenizationSupport = TreeSitterTokenizationRegistry.get(languageId);
const cursors: { cursor: Parser.TreeCursor; languageId: string; startOffset: number; endOffset: number }[] = [{ cursor, languageId, startOffset: 0, endOffset: textModelTreeSitter.textModel.getValueLength() }];
const cursors: { cursor: Parser.TreeCursor; languageId: string; startOffset: number; endOffset: number }[] = [{ cursor, languageId, startOffset: 0, endOffset: treeSitterTree.textModel.getValueLength() }];
do {
const current = cursors[cursors.length - 1];
const currentCursor = current.cursor;
@ -306,17 +312,18 @@ class Snapper {
if (!isOutsideRange && (currentCursor.currentNode.childCount === 0)) {
const range = new Range(currentCursor.currentNode.startPosition.row + 1, currentCursor.currentNode.startPosition.column + 1, currentCursor.currentNode.endPosition.row + 1, currentCursor.currentNode.endPosition.column + 1);
const injection = textModelTreeSitter.getInjection(currentCursor.currentNode.startIndex, currentLanguageId);
const injection = treeSitterTree.getInjectionTrees(currentCursor.currentNode.startIndex, currentLanguageId);
const treeSitterRange = injection?.ranges!.find(r => r.startIndex <= currentCursor.currentNode.startIndex && r.endIndex >= currentCursor.currentNode.endIndex);
if (injection?.tree && treeSitterRange && (treeSitterRange.startIndex === currentCursor.currentNode.startIndex)) {
const injectionLanguageId = injection.languageId;
const injectionTree = injection.tree;
const injectionTree = injection?.tree.get();
const injectionLanguageId = injection?.languageId;
if (injectionTree && injectionLanguageId && treeSitterRange && (treeSitterRange.startIndex === currentCursor.currentNode.startIndex)) {
const injectionCursor = injectionTree.walk();
this._moveInjectionCursorToRange(injectionCursor, treeSitterRange);
cursors.push({ cursor: injectionCursor, languageId: injectionLanguageId, startOffset: treeSitterRange.startIndex, endOffset: treeSitterRange.endIndex });
while ((currentCursor.endIndex <= treeSitterRange.endIndex) && (currentCursor.gotoNextSibling() || currentCursor.gotoParent())) { }
} else {
const capture = tokenizationSupport?.captureAtRangeTree(range, tree, textModelTreeSitter);
const capture = tokenizationModel.captureAtRangeTree(range);
tokens.push({
c: currentCursor.currentNode.text.replace(/\r/g, ''),
t: capture?.map(cap => cap.name).join(' ') ?? '',
@ -339,10 +346,12 @@ class Snapper {
cursorResult = currentCursor.gotoFirstChild();
}
if (cursors.length > 1 && ((!cursorResult && currentCursor === cursors[cursors.length - 1].cursor) || isOutsideRange)) {
current.cursor.delete();
cursors.pop();
cursorResult = true;
}
} while (cursorResult);
cursor.delete();
return tokens;
}
@ -364,35 +373,29 @@ class Snapper {
public async captureTreeSitterSyntaxTokens(resource: URI, content: string): Promise<IToken[]> {
const languageId = this.languageService.guessLanguageIdByFilepathOrFirstLine(resource);
if (languageId) {
const hasLanguage = TreeSitterTokenizationRegistry.get(languageId);
if (!hasLanguage) {
return [];
}
const model = this.modelService.getModel(resource) ?? this.modelService.createModel(content, { languageId, onDidChange: Event.None }, resource);
let textModelTreeSitter = this.treeSitterParserService.getParseResult(model);
let tree = textModelTreeSitter?.parseResult?.tree;
if (!textModelTreeSitter) {
return [];
}
if (!tree) {
let e = await Event.toPromise(this.treeSitterParserService.onDidUpdateTree);
// Once more for injections
if (e.hasInjections) {
e = await Event.toPromise(this.treeSitterParserService.onDidUpdateTree);
}
textModelTreeSitter = e.tree;
tree = textModelTreeSitter.parseResult?.tree;
}
if (!tree) {
return [];
}
const result = (await this._treeSitterTokenize(textModelTreeSitter, tree, languageId)).filter(t => t.c.length > 0);
const themeTokens = await this._getTreeSitterThemesResult(result, languageId);
this._enrichResult(result, themeTokens);
return result;
if (!languageId) {
return [];
}
return [];
const model = this.modelService.getModel(resource) ?? this.modelService.createModel(content, { languageId, onDidChange: Event.None }, resource);
const tokenizationPart = (model.tokenization as TokenizationTextModelPart).tokens.get();
if (!(tokenizationPart instanceof TreeSitterSyntaxTokenBackend)) {
return [];
}
const treeObs = tokenizationPart.tree;
const tokenizationImplObs = tokenizationPart.tokenizationImpl;
const treeSitterTree = treeObs.get() ?? await waitForState(treeObs);
const tokenizationImpl = tokenizationImplObs.get() ?? await waitForState(tokenizationImplObs);
// TODO: injections
if (!treeSitterTree) {
return [];
}
const result = (await this._treeSitterTokenize(treeSitterTree, tokenizationImpl, languageId)).filter(t => t.c.length > 0);
const themeTokens = await this._getTreeSitterThemesResult(result, languageId);
this._enrichResult(result, themeTokens);
return result;
}
}

View File

@ -0,0 +1,13 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { ITreeSitterLibraryService } from '../../../../editor/common/services/treeSitter/treeSitterLibraryService.js';
import { ITreeSitterThemeService } from '../../../../editor/common/services/treeSitter/treeSitterThemeService.js';
import { InstantiationType, registerSingleton } from '../../../../platform/instantiation/common/extensions.js';
import { TreeSitterLibraryService } from './treeSitterLibraryService.js';
import { TreeSitterThemeService } from './treeSitterThemeService.js';
registerSingleton(ITreeSitterLibraryService, TreeSitterLibraryService, InstantiationType.Eager);
registerSingleton(ITreeSitterThemeService, TreeSitterThemeService, InstantiationType.Eager);

View File

@ -1,134 +0,0 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { Emitter, Event } from '../../../../base/common/event.js';
import { Disposable, DisposableMap, DisposableStore } from '../../../../base/common/lifecycle.js';
import { ICodeEditor } from '../../../../editor/browser/editorBrowser.js';
import { ICodeEditorService } from '../../../../editor/browser/services/codeEditorService.js';
import { ITextModel } from '../../../../editor/common/model.js';
import { Range } from '../../../../editor/common/core/range.js';
import { ITreeSitterParserService } from '../../../../editor/common/services/treeSitterParserService.js';
export interface IViewPortChangeEvent {
model: ITextModel;
ranges: Range[];
}
export class TreeSitterCodeEditors extends Disposable {
private readonly _textModels = new Set<ITextModel>();
private readonly _languageEditors = this._register(new DisposableMap<ICodeEditor>);
private readonly _allEditors = this._register(new DisposableMap<ICodeEditor>());
private readonly _onDidChangeViewport = this._register(new Emitter<IViewPortChangeEvent>());
public readonly onDidChangeViewport = this._onDidChangeViewport.event;
constructor(private readonly _languageId: string,
@ICodeEditorService private readonly _codeEditorService: ICodeEditorService,
@ITreeSitterParserService private readonly _treeSitterParserService: ITreeSitterParserService) {
super();
this._register(this._codeEditorService.onCodeEditorAdd(this._onCodeEditorAdd, this));
this._register(this._codeEditorService.onCodeEditorRemove(this._onCodeEditorRemove, this));
this._codeEditorService.listCodeEditors().forEach(this._onCodeEditorAdd, this);
}
get textModels(): ITextModel[] {
return Array.from(this._textModels.keys());
}
getEditorForModel(model: ITextModel): ICodeEditor | undefined {
return this._codeEditorService.listCodeEditors().find(editor => editor.getModel() === model);
}
public async getInitialViewPorts(): Promise<IViewPortChangeEvent[]> {
await this._treeSitterParserService.getLanguage(this._languageId);
const editors = this._codeEditorService.listCodeEditors();
const viewports: IViewPortChangeEvent[] = [];
for (const editor of editors) {
const model = await this.getEditorModel(editor);
if (model && model.getLanguageId() === this._languageId) {
viewports.push({
model,
ranges: this._nonIntersectingViewPortRanges(editor)
});
}
}
return viewports;
}
private _onCodeEditorRemove(editor: ICodeEditor): void {
this._allEditors.deleteAndDispose(editor);
}
private async getEditorModel(editor: ICodeEditor): Promise<ITextModel | undefined> {
let model = editor.getModel() ?? undefined;
if (!model) {
const disposableStore: DisposableStore = this._register(new DisposableStore());
await Event.toPromise(Event.once(editor.onDidChangeModel), disposableStore);
model = editor.getModel() ?? undefined;
}
return model;
}
private async _onCodeEditorAdd(editor: ICodeEditor): Promise<void> {
const otherEditorDisposables = new DisposableStore();
otherEditorDisposables.add(editor.onDidChangeModel(() => this._onDidChangeModel(editor, editor.getModel()), this));
this._allEditors.set(editor, otherEditorDisposables);
const model = editor.getModel();
if (model) {
this._tryAddEditor(editor, model);
}
}
private _tryAddEditor(editor: ICodeEditor, model: ITextModel): void {
const language = model.getLanguageId();
if ((language === this._languageId)) {
if (!this._textModels.has(model)) {
this._textModels.add(model);
}
if (!this._languageEditors.has(editor)) {
const langaugeEditorDisposables = new DisposableStore();
langaugeEditorDisposables.add(editor.onDidScrollChange(() => this._onViewportChange(editor), this));
this._languageEditors.set(editor, langaugeEditorDisposables);
this._onViewportChange(editor);
}
}
}
private async _onDidChangeModel(editor: ICodeEditor, model: ITextModel | null): Promise<void> {
if (model) {
this._tryAddEditor(editor, model);
} else {
this._languageEditors.deleteAndDispose(editor);
}
}
private async _onViewportChange(editor: ICodeEditor): Promise<void> {
const ranges = this._nonIntersectingViewPortRanges(editor);
const model = editor.getModel();
if (!model) {
this._languageEditors.deleteAndDispose(editor);
return;
}
this._onDidChangeViewport.fire({ model: model, ranges });
}
private _nonIntersectingViewPortRanges(editor: ICodeEditor) {
const viewportRanges = editor.getVisibleRangesPlusViewportAboveBelow();
const nonIntersectingRanges: Range[] = [];
for (const range of viewportRanges) {
if (nonIntersectingRanges.length !== 0) {
const prev = nonIntersectingRanges[nonIntersectingRanges.length - 1];
if (Range.areOnlyIntersecting(prev, range)) {
const newRange = prev.plusRange(range);
nonIntersectingRanges[nonIntersectingRanges.length - 1] = newRange;
continue;
}
}
nonIntersectingRanges.push(range);
}
return nonIntersectingRanges;
}
}

View File

@ -0,0 +1,159 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import type { Parser, Language, Query } from '@vscode/tree-sitter-wasm';
import { IReader, ObservablePromise } from '../../../../base/common/observable.js';
import { ITreeSitterLibraryService } from '../../../../editor/common/services/treeSitter/treeSitterLibraryService.js';
import { canASAR, importAMDNodeModule } from '../../../../amdX.js';
import { Lazy } from '../../../../base/common/lazy.js';
import { IConfigurationService } from '../../../../platform/configuration/common/configuration.js';
import { FileOperationResult, IFileContent, IFileService, toFileOperationResult } from '../../../../platform/files/common/files.js';
import { observableConfigValue } from '../../../../platform/observable/common/platformObservableUtils.js';
import { CachedFunction } from '../../../../base/common/cache.js';
import { IEnvironmentService } from '../../../../platform/environment/common/environment.js';
import { AppResourcePath, FileAccess, nodeModulesAsarUnpackedPath, nodeModulesPath } from '../../../../base/common/network.js';
import { Disposable } from '../../../../base/common/lifecycle.js';
import { URI } from '../../../../base/common/uri.js';
export const EDITOR_EXPERIMENTAL_PREFER_TREESITTER = 'editor.experimental.preferTreeSitter';
export const TREESITTER_ALLOWED_SUPPORT = ['css', 'typescript', 'ini', 'regex'];
const MODULE_LOCATION_SUBPATH = `@vscode/tree-sitter-wasm/wasm`;
const FILENAME_TREESITTER_WASM = `tree-sitter.wasm`;
export function getModuleLocation(environmentService: IEnvironmentService): AppResourcePath {
return `${(canASAR && environmentService.isBuilt) ? nodeModulesAsarUnpackedPath : nodeModulesPath}/${MODULE_LOCATION_SUBPATH}`;
}
export class TreeSitterLibraryService extends Disposable implements ITreeSitterLibraryService {
_serviceBrand: undefined;
isTest: boolean = false;
private readonly _treeSitterImport = new Lazy(async () => {
const TreeSitter = await importAMDNodeModule<typeof import('@vscode/tree-sitter-wasm')>('@vscode/tree-sitter-wasm', 'wasm/tree-sitter.js');
const environmentService = this._environmentService;
const isTest = this.isTest;
await TreeSitter.Parser.init({
locateFile(_file: string, _folder: string) {
const location: AppResourcePath = `${getModuleLocation(environmentService)}/${FILENAME_TREESITTER_WASM}`;
if (isTest) {
return FileAccess.asFileUri(location).toString(true);
} else {
return FileAccess.asBrowserUri(location).toString(true);
}
}
});
return TreeSitter;
});
private readonly _supportsLanguage = new CachedFunction((languageId: string) => {
return observableConfigValue(`${EDITOR_EXPERIMENTAL_PREFER_TREESITTER}.${languageId}`, false, this._configurationService);
});
private readonly _languagesCache = new CachedFunction((languageId: string) => {
return ObservablePromise.fromFn(async () => {
const languageLocation = getModuleLocation(this._environmentService);
const grammarName = `tree-sitter-${languageId}`;
const wasmPath: AppResourcePath = `${languageLocation}/${grammarName}.wasm`;
const [treeSitter, languageFile] = await Promise.all([
this._treeSitterImport.value,
this._fileService.readFile(FileAccess.asFileUri(wasmPath))
]);
const Language = treeSitter.Language;
const language = await Language.load(languageFile.value.buffer);
return language;
});
});
private readonly _injectionQueries = new CachedFunction({ getCacheKey: JSON.stringify }, (arg: { languageId: string; kind: 'injections' | 'highlights' }) => {
const loadQuerySource = async () => {
const injectionsQueriesLocation: AppResourcePath = `vs/editor/common/languages/${arg.kind}/${arg.languageId}.scm`;
const uri = FileAccess.asFileUri(injectionsQueriesLocation);
if (!this._fileService.hasProvider(uri)) {
return undefined;
}
const query = await tryReadFile(this._fileService, uri);
if (query === undefined) {
return undefined;
}
return query.value.toString();
};
return ObservablePromise.fromFn(async () => {
const [
querySource,
language,
treeSitter
] = await Promise.all([
loadQuerySource(),
this._languagesCache.get(arg.languageId).promise,
this._treeSitterImport.value,
]);
if (querySource === undefined) {
return null;
}
const Query = treeSitter.Query;
return new Query(language, querySource);
}).resolvedValue;
});
constructor(
@IConfigurationService private readonly _configurationService: IConfigurationService,
@IFileService private readonly _fileService: IFileService,
@IEnvironmentService private readonly _environmentService: IEnvironmentService,
) {
super();
}
supportsLanguage(languageId: string, reader: IReader | undefined): boolean {
return this._supportsLanguage.get(languageId).read(reader);
}
async getParserClass(): Promise<typeof Parser> {
const treeSitter = await this._treeSitterImport.value;
return treeSitter.Parser;
}
getLanguage(languageId: string, reader: IReader | undefined): Language | undefined {
if (!this.supportsLanguage(languageId, reader)) {
return undefined;
}
const lang = this._languagesCache.get(languageId).resolvedValue.read(reader);
return lang;
}
getInjectionQueries(languageId: string, reader: IReader | undefined): Query | null | undefined {
if (!this.supportsLanguage(languageId, reader)) {
return undefined;
}
const query = this._injectionQueries.get({ languageId, kind: 'injections' }).read(reader);
return query;
}
getHighlightingQueries(languageId: string, reader: IReader | undefined): Query | null | undefined {
if (!this.supportsLanguage(languageId, reader)) {
return undefined;
}
const query = this._injectionQueries.get({ languageId, kind: 'highlights' }).read(reader);
return query;
}
}
async function tryReadFile(fileService: IFileService, uri: URI): Promise<IFileContent | undefined> {
try {
const result = await fileService.readFile(uri);
return result;
} catch (e) {
if (toFileOperationResult(e) === FileOperationResult.FILE_NOT_FOUND) {
return undefined;
}
throw e;
}
}

View File

@ -0,0 +1,29 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { derived, IObservable, IReader, observableFromEvent } from '../../../../base/common/observable.js';
import { ITreeSitterThemeService } from '../../../../editor/common/services/treeSitter/treeSitterThemeService.js';
import { ColorThemeData, findMetadata } from '../../themes/common/colorThemeData.js';
import { IWorkbenchThemeService } from '../../themes/common/workbenchThemeService.js';
export class TreeSitterThemeService implements ITreeSitterThemeService {
_serviceBrand: undefined;
public readonly onChange: IObservable<void>;
private readonly _colorTheme: IObservable<ColorThemeData>;
constructor(
@IWorkbenchThemeService private readonly _themeService: IWorkbenchThemeService,
) {
this._colorTheme = observableFromEvent(this._themeService.onDidColorThemeChange, () => this._themeService.getColorTheme() as ColorThemeData);
this.onChange = derived(this, (reader) => {
this._colorTheme.read(reader);
reader.reportChange(void 0);
});
}
findMetadata(captureNames: string[], languageId: number, bracket: boolean, reader: IReader | undefined): number {
return findMetadata(this._colorTheme.read(reader), captureNames, languageId, bracket);
}
}

View File

@ -1,69 +0,0 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { registerSingleton, InstantiationType } from '../../../../platform/instantiation/common/extensions.js';
import { IWorkbenchContribution, WorkbenchPhase, registerWorkbenchContribution2 } from '../../../common/contributions.js';
import { TreeSitterTextModelService } from '../../../../editor/common/services/treeSitter/treeSitterParserService.js';
import { ITreeSitterImporter, ITreeSitterParserService, TreeSitterImporter } from '../../../../editor/common/services/treeSitterParserService.js';
import { ITreeSitterTokenizationFeature } from './treeSitterTokenizationFeature.js';
import { ServicesAccessor } from '../../../../platform/instantiation/common/instantiation.js';
import { CommandsRegistry } from '../../../../platform/commands/common/commands.js';
import { URI } from '../../../../base/common/uri.js';
import { TreeSitterTokenizationRegistry } from '../../../../editor/common/languages.js';
import { ITextFileService } from '../../textfile/common/textfiles.js';
import { StopWatch } from '../../../../base/common/stopwatch.js';
/**
* Makes sure the ITreeSitterTokenizationService is instantiated
*/
class TreeSitterTokenizationInstantiator implements IWorkbenchContribution {
static readonly ID = 'workbench.contrib.treeSitterTokenizationInstantiator';
constructor(
@ITreeSitterParserService _treeSitterTokenizationService: ITreeSitterParserService,
@ITreeSitterTokenizationFeature _treeSitterTokenizationFeature: ITreeSitterTokenizationFeature
) { }
}
registerSingleton(ITreeSitterImporter, TreeSitterImporter, InstantiationType.Eager);
registerSingleton(ITreeSitterParserService, TreeSitterTextModelService, InstantiationType.Eager);
registerWorkbenchContribution2(TreeSitterTokenizationInstantiator.ID, TreeSitterTokenizationInstantiator, WorkbenchPhase.BlockRestore);
CommandsRegistry.registerCommand('_workbench.colorizeTreeSitterTokens', async (accessor: ServicesAccessor, resource?: URI): Promise<{ parseTime: number; captureTime: number; metadataTime: number }> => {
const treeSitterParserService = accessor.get(ITreeSitterParserService);
const textModelService = accessor.get(ITextFileService);
const textModel = resource ? (await textModelService.files.resolve(resource)).textEditorModel : undefined;
if (!textModel) {
throw new Error(`Cannot resolve text model for resource ${resource}`);
}
const tokenizer = await TreeSitterTokenizationRegistry.getOrCreate(textModel.getLanguageId());
if (!tokenizer) {
throw new Error(`Cannot resolve tokenizer for language ${textModel.getLanguageId()}`);
}
const textModelTreeSitter = await treeSitterParserService.getTextModelTreeSitter(textModel);
if (!textModelTreeSitter) {
throw new Error(`Cannot resolve tree sitter parser for language ${textModel.getLanguageId()}`);
}
const stopwatch = new StopWatch();
await textModelTreeSitter.parse();
stopwatch.stop();
let captureTime = 0;
let metadataTime = 0;
for (let i = 1; i <= textModel.getLineCount(); i++) {
const result = tokenizer.tokenizeEncodedInstrumented(i, textModel);
if (result) {
captureTime += result.captureTime;
metadataTime += result.metadataTime;
}
}
textModelTreeSitter.dispose();
textModel.dispose();
return { parseTime: stopwatch.elapsed(), captureTime, metadataTime };
});

View File

@ -1,837 +0,0 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import type * as Parser from '@vscode/tree-sitter-wasm';
import { Emitter, Event } from '../../../../base/common/event.js';
import { Disposable, DisposableMap, DisposableStore, IDisposable } from '../../../../base/common/lifecycle.js';
import { AppResourcePath, FileAccess } from '../../../../base/common/network.js';
import { ILanguageIdCodec, ITreeSitterTokenizationSupport, LazyTokenizationSupport, QueryCapture, TreeSitterTokenizationRegistry } from '../../../../editor/common/languages.js';
import { ITextModel } from '../../../../editor/common/model.js';
import { EDITOR_EXPERIMENTAL_PREFER_TREESITTER, ITreeSitterParserService, RangeChange, ITreeSitterImporter, TREESITTER_ALLOWED_SUPPORT, RangeWithOffsets, ITextModelTreeSitter } from '../../../../editor/common/services/treeSitterParserService.js';
import { IModelTokensChangedEvent } from '../../../../editor/common/textModelEvents.js';
import { IConfigurationService } from '../../../../platform/configuration/common/configuration.js';
import { IFileService } from '../../../../platform/files/common/files.js';
import { InstantiationType, registerSingleton } from '../../../../platform/instantiation/common/extensions.js';
import { createDecorator, IInstantiationService } from '../../../../platform/instantiation/common/instantiation.js';
import { ColorThemeData, findMetadata } from '../../themes/common/colorThemeData.js';
import { ILanguageService } from '../../../../editor/common/languages/language.js';
import { StopWatch } from '../../../../base/common/stopwatch.js';
import { ITreeSitterTokenizationStoreService } from '../../../../editor/common/model/treeSitterTokenStoreService.js';
import { LanguageId } from '../../../../editor/common/encodedTokenAttributes.js';
import { TokenQuality, TokenUpdate } from '../../../../editor/common/model/tokenStore.js';
import { Range } from '../../../../editor/common/core/range.js';
import { setTimeout0 } from '../../../../base/common/platform.js';
import { findLikelyRelevantLines } from '../../../../editor/common/model/textModelTokens.js';
import { TreeSitterCodeEditors } from './treeSitterCodeEditors.js';
import { IWorkbenchColorTheme, IWorkbenchThemeService } from '../../themes/common/workbenchThemeService.js';
import { Position } from '../../../../editor/common/core/position.js';
type TreeSitterQueries = string;
export const ITreeSitterTokenizationFeature = createDecorator<ITreeSitterTokenizationFeature>('treeSitterTokenizationFeature');
export interface ITreeSitterTokenizationFeature {
_serviceBrand: undefined;
}
interface EndOffsetToken {
endOffset: number;
metadata: number;
}
interface EndOffsetAndScopes {
endOffset: number;
scopes: string[];
bracket?: number[];
encodedLanguageId: LanguageId;
}
interface EndOffsetWithMeta extends EndOffsetAndScopes {
metadata?: number;
}
export const TREESITTER_BASE_SCOPES: Record<string, string> = {
'css': 'source.css',
'typescript': 'source.ts',
'ini': 'source.ini',
'regex': 'source.regex',
};
const BRACKETS = /[\{\}\[\]\<\>\(\)]/g;
export class TreeSitterTokenizationFeature extends Disposable implements ITreeSitterTokenizationFeature {
public _serviceBrand: undefined;
private readonly _tokenizersRegistrations: DisposableMap<string, DisposableStore> = this._register(new DisposableMap());
constructor(
@ITreeSitterImporter private readonly _treeSitterImporter: ITreeSitterImporter,
@ILanguageService private readonly _languageService: ILanguageService,
@IConfigurationService private readonly _configurationService: IConfigurationService,
@IInstantiationService private readonly _instantiationService: IInstantiationService,
@IFileService private readonly _fileService: IFileService
) {
super();
this._handleGrammarsExtPoint();
this._register(this._configurationService.onDidChangeConfiguration(e => {
if (e.affectsConfiguration(EDITOR_EXPERIMENTAL_PREFER_TREESITTER)) {
this._handleGrammarsExtPoint();
}
}));
}
private _getSetting(languageId: string): boolean {
return this._configurationService.getValue<boolean>(`${EDITOR_EXPERIMENTAL_PREFER_TREESITTER}.${languageId}`);
}
private _handleGrammarsExtPoint(): void {
// Eventually, this should actually use an extension point to add tree sitter grammars, but for now they are hard coded in core
for (const languageId of TREESITTER_ALLOWED_SUPPORT) {
const setting = this._getSetting(languageId);
if (setting && !this._tokenizersRegistrations.has(languageId)) {
const lazyTokenizationSupport = new LazyTokenizationSupport(() => this._createTokenizationSupport(languageId));
const disposableStore = new DisposableStore();
disposableStore.add(lazyTokenizationSupport);
disposableStore.add(TreeSitterTokenizationRegistry.registerFactory(languageId, lazyTokenizationSupport));
this._tokenizersRegistrations.set(languageId, disposableStore);
TreeSitterTokenizationRegistry.getOrCreate(languageId);
}
}
const languagesToUnregister = [...this._tokenizersRegistrations.keys()].filter(languageId => !this._getSetting(languageId));
for (const languageId of languagesToUnregister) {
this._tokenizersRegistrations.deleteAndDispose(languageId);
}
}
private async _fetchQueries(newLanguage: string): Promise<TreeSitterQueries> {
const languageLocation: AppResourcePath = `vs/editor/common/languages/highlights/${newLanguage}.scm`;
const query = await this._fileService.readFile(FileAccess.asFileUri(languageLocation));
return query.value.toString();
}
private async _createTokenizationSupport(languageId: string): Promise<ITreeSitterTokenizationSupport & IDisposable | null> {
const queries = await this._fetchQueries(languageId);
const Query = await this._treeSitterImporter.getQueryClass();
return this._instantiationService.createInstance(TreeSitterTokenizationSupport, queries, Query, languageId, this._languageService.languageIdCodec);
}
}
export class TreeSitterTokenizationSupport extends Disposable implements ITreeSitterTokenizationSupport {
private _query: Parser.Query | undefined;
private readonly _onDidChangeTokens: Emitter<{ textModel: ITextModel; changes: IModelTokensChangedEvent }> = this._register(new Emitter());
public readonly onDidChangeTokens: Event<{ textModel: ITextModel; changes: IModelTokensChangedEvent }> = this._onDidChangeTokens.event;
private readonly _onDidCompleteBackgroundTokenization: Emitter<{ textModel: ITextModel }> = this._register(new Emitter());
public readonly onDidChangeBackgroundTokenization: Event<{ textModel: ITextModel }> = this._onDidCompleteBackgroundTokenization.event;
private _colorThemeData!: ColorThemeData;
private _languageAddedListener: IDisposable | undefined;
private _codeEditors: TreeSitterCodeEditors;
private _encodedLanguage: LanguageId | undefined;
constructor(
private readonly _queries: TreeSitterQueries,
private readonly Query: typeof Parser.Query,
private readonly _languageId: string,
private readonly _languageIdCodec: ILanguageIdCodec,
@ITreeSitterParserService private readonly _treeSitterService: ITreeSitterParserService,
@IWorkbenchThemeService private readonly _themeService: IWorkbenchThemeService,
@ITreeSitterTokenizationStoreService private readonly _tokenizationStoreService: ITreeSitterTokenizationStoreService,
@IInstantiationService private readonly _instantiationService: IInstantiationService,
) {
super();
this._codeEditors = this._instantiationService.createInstance(TreeSitterCodeEditors, this._languageId);
this._register(this._codeEditors.onDidChangeViewport(e => {
this._parseAndTokenizeViewPort(e.model, e.ranges);
}));
this._codeEditors.getInitialViewPorts().then(async (viewports) => {
for (const viewport of viewports) {
this._parseAndTokenizeViewPort(viewport.model, viewport.ranges);
}
});
this._register(Event.runAndSubscribe(this._themeService.onDidColorThemeChange, (e) => this._updateTheme(e)));
this._register(this._treeSitterService.onDidUpdateTree((e) => {
if (e.languageId !== this._languageId) {
return;
}
if (this._tokenizationStoreService.hasTokens(e.textModel)) {
// Mark the range for refresh immediately
for (const range of e.ranges) {
this._tokenizationStoreService.markForRefresh(e.textModel, range.newRange);
}
}
if (e.versionId !== e.textModel.getVersionId()) {
return;
}
// First time we see a tree we need to build a token store.
if (!this._tokenizationStoreService.hasTokens(e.textModel)) {
// This will likely not happen as we first handle all models, which are ready before trees.
this._firstTreeUpdate(e.textModel, e.versionId, e.tree);
} else {
this._handleTreeUpdate(e.ranges, e.textModel, e.versionId, e.tree);
}
}));
}
private get _encodedLanguageId(): LanguageId {
if (!this._encodedLanguage) {
this._encodedLanguage = this._languageIdCodec.encodeLanguageId(this._languageId);
}
return this._encodedLanguage;
}
private _setInitialTokens(textModel: ITextModel) {
const tokens: TokenUpdate[] = this._createEmptyTokens(textModel);
this._tokenizationStoreService.setTokens(textModel, tokens, TokenQuality.None);
}
private _forceParseAndTokenizeContent(model: ITextModel, range: Range, startOffsetOfRangeInDocument: number, endOffsetOfRangeInDocument: number, content: string, asUpdate: true): TokenUpdate[] | undefined;
private _forceParseAndTokenizeContent(model: ITextModel, range: Range, startOffsetOfRangeInDocument: number, endOffsetOfRangeInDocument: number, content: string, asUpdate: false): EndOffsetToken[] | undefined;
private _forceParseAndTokenizeContent(model: ITextModel, range: Range, startOffsetOfRangeInDocument: number, endOffsetOfRangeInDocument: number, content: string, asUpdate: boolean): EndOffsetToken[] | TokenUpdate[] | undefined {
const likelyRelevantLines = findLikelyRelevantLines(model, range.startLineNumber).likelyRelevantLines;
const likelyRelevantPrefix = likelyRelevantLines.join(model.getEOL());
const tree = this._treeSitterService.getTreeSync(`${likelyRelevantPrefix}${content}`, this._languageId);
if (!tree) {
return;
}
const treeRange = new Range(1, 1, range.endLineNumber - range.startLineNumber + 1 + likelyRelevantLines.length, range.endColumn);
const captures = this._captureAtRange(treeRange, tree);
const tokens = this._tokenizeCapturesWithMetadata(tree, captures, likelyRelevantPrefix.length, endOffsetOfRangeInDocument - startOffsetOfRangeInDocument + likelyRelevantPrefix.length);
if (!tokens) {
return;
}
if (asUpdate) {
return this._rangeTokensAsUpdates(startOffsetOfRangeInDocument, tokens.endOffsetsAndMetadata, likelyRelevantPrefix.length);
} else {
return tokens.endOffsetsAndMetadata;
}
}
private async _parseAndTokenizeViewPort(model: ITextModel, viewportRanges: Range[]) {
if (!this._tokenizationStoreService.hasTokens(model)) {
this._setInitialTokens(model);
}
for (const range of viewportRanges) {
const startOffsetOfRangeInDocument = model.getOffsetAt(range.getStartPosition());
const endOffsetOfRangeInDocument = model.getOffsetAt(range.getEndPosition());
const version = model.getVersionId();
if (this._tokenizationStoreService.rangeHasTokens(model, range, TokenQuality.ViewportGuess)) {
continue;
}
const content = model.getValueInRange(range);
const tokenUpdates = await this._forceParseAndTokenizeContent(model, range, startOffsetOfRangeInDocument, endOffsetOfRangeInDocument, content, true);
if (!tokenUpdates || this._tokenizationStoreService.rangeHasTokens(model, range, TokenQuality.ViewportGuess)) {
continue;
}
if (tokenUpdates.length === 0) {
continue;
}
const lastToken = tokenUpdates[tokenUpdates.length - 1];
const oldRangeLength = lastToken.startOffsetInclusive + lastToken.length - tokenUpdates[0].startOffsetInclusive;
this._tokenizationStoreService.updateTokens(model, version, [{ newTokens: tokenUpdates, oldRangeLength }], TokenQuality.ViewportGuess);
this._onDidChangeTokens.fire({ textModel: model, changes: { semanticTokensApplied: false, ranges: [{ fromLineNumber: range.startLineNumber, toLineNumber: range.endLineNumber }] } });
}
}
guessTokensForLinesContent(lineNumber: number, textModel: ITextModel, lines: string[]): Uint32Array[] | undefined {
if (lines.length === 0) {
return undefined;
}
const lineContent = lines.join(textModel.getEOL());
const range = new Range(1, 1, lineNumber + lines.length, lines[lines.length - 1].length + 1);
const startOffset = textModel.getOffsetAt({ lineNumber, column: 1 });
const tokens = this._forceParseAndTokenizeContent(textModel, range, startOffset, startOffset + lineContent.length, lineContent, false);
if (!tokens) {
return undefined;
}
const tokensByLine: Uint32Array[] = new Array(lines.length);
let tokensIndex: number = 0;
let tokenStartOffset = 0;
let lineStartOffset = 0;
for (let i = 0; i < lines.length; i++) {
const tokensForLine: EndOffsetToken[] = [];
let moveToNextLine = false;
for (let j = tokensIndex; (!moveToNextLine && (j < tokens.length)); j++) {
const token = tokens[j];
const lineAdjustedEndOffset = token.endOffset - lineStartOffset;
const lineAdjustedStartOffset = tokenStartOffset - lineStartOffset;
if (lineAdjustedEndOffset <= lines[i].length) {
tokensForLine.push({ endOffset: lineAdjustedEndOffset, metadata: token.metadata });
tokensIndex++;
} else if (lineAdjustedStartOffset < lines[i].length) {
const partialToken: EndOffsetToken = { endOffset: lines[i].length, metadata: token.metadata };
tokensForLine.push(partialToken);
moveToNextLine = true;
} else {
moveToNextLine = true;
}
tokenStartOffset = token.endOffset;
}
tokensByLine[i] = this._endOffsetTokensToUint32Array(tokensForLine);
lineStartOffset += lines[i].length + textModel.getEOL().length;
}
return tokensByLine;
}
private _emptyTokensForOffsetAndLength(offset: number, length: number, emptyToken: number): TokenUpdate {
return { token: emptyToken, length: offset + length, startOffsetInclusive: 0 };
}
private _createEmptyTokens(textModel: ITextModel) {
const emptyToken = this._emptyToken();
const modelEndOffset = textModel.getValueLength();
const emptyTokens: TokenUpdate[] = [this._emptyTokensForOffsetAndLength(0, modelEndOffset, emptyToken)];
return emptyTokens;
}
private _firstTreeUpdate(textModel: ITextModel, versionId: number, tree: ITextModelTreeSitter) {
this._setInitialTokens(textModel);
return this._setViewPortTokens(textModel, versionId, tree);
}
private _setViewPortTokens(textModel: ITextModel, versionId: number, tree: ITextModelTreeSitter) {
const maxLine = textModel.getLineCount();
let rangeChanges: RangeChange[];
const editor = this._codeEditors.getEditorForModel(textModel);
if (editor) {
const viewPort = editor.getVisibleRangesPlusViewportAboveBelow();
const ranges: { readonly fromLineNumber: number; readonly toLineNumber: number }[] = new Array(viewPort.length);
rangeChanges = new Array(viewPort.length);
for (let i = 0; i < viewPort.length; i++) {
const range = viewPort[i];
ranges[i] = { fromLineNumber: range.startLineNumber, toLineNumber: range.endLineNumber < maxLine ? range.endLineNumber : maxLine };
const newRangeStartOffset = textModel.getOffsetAt(range.getStartPosition());
const newRangeEndOffset = textModel.getOffsetAt(range.getEndPosition());
rangeChanges[i] = {
newRange: range,
newRangeStartOffset,
newRangeEndOffset,
};
}
} else {
const valueLength = textModel.getValueLength();
rangeChanges = [{ newRange: new Range(1, 1, maxLine, textModel.getLineMaxColumn(maxLine)), newRangeStartOffset: 0, newRangeEndOffset: valueLength }];
}
return this._handleTreeUpdate(rangeChanges, textModel, versionId, tree);
}
/**
* Do not await in this method, it will cause a race
*/
private _handleTreeUpdate(ranges: RangeChange[], textModel: ITextModel, versionId: number, textModelTreeSitter: ITextModelTreeSitter) {
const tree = textModelTreeSitter.parseResult?.tree;
if (!tree) {
return;
}
const rangeChanges: RangeWithOffsets[] = [];
const chunkSize = 1000;
for (let i = 0; i < ranges.length; i++) {
const rangeLinesLength = ranges[i].newRange.endLineNumber - ranges[i].newRange.startLineNumber;
if (rangeLinesLength > chunkSize) {
// Split the range into chunks to avoid long operations
const fullRangeEndLineNumber = ranges[i].newRange.endLineNumber;
let chunkLineStart = ranges[i].newRange.startLineNumber;
let chunkColumnStart = ranges[i].newRange.startColumn;
let chunkLineEnd = chunkLineStart + chunkSize;
do {
const chunkStartingPosition = new Position(chunkLineStart, chunkColumnStart);
const chunkEndColumn = ((chunkLineEnd === ranges[i].newRange.endLineNumber) ? ranges[i].newRange.endColumn : textModel.getLineMaxColumn(chunkLineEnd));
const chunkEndPosition = new Position(chunkLineEnd, chunkEndColumn);
const chunkRange = Range.fromPositions(chunkStartingPosition, chunkEndPosition);
rangeChanges.push({
range: chunkRange,
startOffset: textModel.getOffsetAt(chunkRange.getStartPosition()),
endOffset: textModel.getOffsetAt(chunkRange.getEndPosition())
});
chunkLineStart = chunkLineEnd + 1;
chunkColumnStart = 1;
if (chunkLineEnd < fullRangeEndLineNumber && chunkLineEnd + chunkSize > fullRangeEndLineNumber) {
chunkLineEnd = fullRangeEndLineNumber;
} else {
chunkLineEnd = chunkLineEnd + chunkSize;
}
} while (chunkLineEnd <= fullRangeEndLineNumber);
} else {
// Check that the previous range doesn't overlap
if ((i === 0) || (rangeChanges[i - 1].endOffset < ranges[i].newRangeStartOffset)) {
rangeChanges.push({
range: ranges[i].newRange,
startOffset: ranges[i].newRangeStartOffset,
endOffset: ranges[i].newRangeEndOffset
});
} else if (rangeChanges[i - 1].endOffset < ranges[i].newRangeEndOffset) {
// clip the range to the previous range
const startPosition = textModel.getPositionAt(rangeChanges[i - 1].endOffset + 1);
const range = new Range(startPosition.lineNumber, startPosition.column, ranges[i].newRange.endLineNumber, ranges[i].newRange.endColumn);
rangeChanges.push({
range,
startOffset: rangeChanges[i - 1].endOffset + 1,
endOffset: ranges[i].newRangeEndOffset
});
}
}
}
// Get the captures immediately while the text model is correct
const captures = rangeChanges.map(range => this._getCaptures(range.range, textModelTreeSitter, tree));
// Don't block
return this._updateTreeForRanges(textModel, rangeChanges, versionId, tree, captures).then(() => {
const tree = this._getTree(textModel);
if (!textModel.isDisposed() && (tree?.parseResult?.versionId === textModel.getVersionId())) {
this._refreshNeedsRefresh(textModel, versionId);
}
});
}
private async _updateTreeForRanges(textModel: ITextModel, rangeChanges: RangeWithOffsets[], versionId: number, tree: Parser.Tree, captures: QueryCapture[][]) {
let tokenUpdate: { newTokens: TokenUpdate[] } | undefined;
for (let i = 0; i < rangeChanges.length; i++) {
if (!textModel.isDisposed() && versionId !== textModel.getVersionId()) {
// Our captures have become invalid and we need to re-capture
break;
}
const capture = captures[i];
const range = rangeChanges[i];
const updates = this.getTokensInRange(textModel, range.range, range.startOffset, range.endOffset, tree, capture);
if (updates) {
tokenUpdate = { newTokens: updates };
} else {
tokenUpdate = { newTokens: [] };
}
this._tokenizationStoreService.updateTokens(textModel, versionId, [tokenUpdate], TokenQuality.Accurate);
this._onDidChangeTokens.fire({
textModel: textModel,
changes: {
semanticTokensApplied: false,
ranges: [{ fromLineNumber: range.range.getStartPosition().lineNumber, toLineNumber: range.range.getEndPosition().lineNumber }]
}
});
await new Promise<void>(resolve => setTimeout0(resolve));
}
this._onDidCompleteBackgroundTokenization.fire({ textModel });
}
private _refreshNeedsRefresh(textModel: ITextModel, versionId: number) {
const rangesToRefresh = this._tokenizationStoreService.getNeedsRefresh(textModel);
if (rangesToRefresh.length === 0) {
return;
}
const rangeChanges: RangeChange[] = new Array(rangesToRefresh.length);
for (let i = 0; i < rangesToRefresh.length; i++) {
const range = rangesToRefresh[i];
rangeChanges[i] = {
newRange: range.range,
newRangeStartOffset: range.startOffset,
newRangeEndOffset: range.endOffset
};
}
const tree = this._getTree(textModel);
if (tree?.parseResult?.tree && tree.parseResult.versionId === versionId) {
this._handleTreeUpdate(rangeChanges, textModel, versionId, tree);
}
}
private _rangeTokensAsUpdates(rangeOffset: number, endOffsetToken: EndOffsetToken[], startingOffsetInArray?: number) {
const updates: TokenUpdate[] = [];
let lastEnd = 0;
for (const token of endOffsetToken) {
if (token.endOffset <= lastEnd || (startingOffsetInArray && (token.endOffset < startingOffsetInArray))) {
continue;
}
let tokenUpdate: TokenUpdate;
if (startingOffsetInArray && (lastEnd < startingOffsetInArray)) {
tokenUpdate = { startOffsetInclusive: rangeOffset + startingOffsetInArray, length: token.endOffset - startingOffsetInArray, token: token.metadata };
} else {
tokenUpdate = { startOffsetInclusive: rangeOffset + lastEnd, length: token.endOffset - lastEnd, token: token.metadata };
}
updates.push(tokenUpdate);
lastEnd = token.endOffset;
}
return updates;
}
public getTokensInRange(textModel: ITextModel, range: Range, rangeStartOffset: number, rangeEndOffset: number, tree?: Parser.Tree, captures?: QueryCapture[]): TokenUpdate[] | undefined {
const tokens = captures ? this._tokenizeCapturesWithMetadata(tree, captures, rangeStartOffset, rangeEndOffset) : this._tokenize(range, rangeStartOffset, rangeEndOffset, textModel);
if (tokens?.endOffsetsAndMetadata) {
return this._rangeTokensAsUpdates(rangeStartOffset, tokens.endOffsetsAndMetadata);
}
return undefined;
}
private _getTree(textModel: ITextModel): ITextModelTreeSitter | undefined {
return this._treeSitterService.getParseResult(textModel);
}
private _ensureQuery() {
if (!this._query) {
const language = this._treeSitterService.getOrInitLanguage(this._languageId);
if (!language) {
if (!this._languageAddedListener) {
this._languageAddedListener = this._register(Event.onceIf(this._treeSitterService.onDidAddLanguage, e => e.id === this._languageId)((e) => {
this._query = new this.Query(e.language, this._queries);
}));
}
return;
}
this._query = new this.Query(language, this._queries);
}
return this._query;
}
private _updateTheme(e: IWorkbenchColorTheme | undefined) {
this._colorThemeData = this._themeService.getColorTheme() as ColorThemeData;
for (const model of this._codeEditors.textModels) {
const modelRange = model.getFullModelRange();
this._tokenizationStoreService.markForRefresh(model, modelRange);
const editor = this._codeEditors.getEditorForModel(model);
if (editor) {
this._parseAndTokenizeViewPort(model, editor.getVisibleRangesPlusViewportAboveBelow());
}
}
}
captureAtPosition(lineNumber: number, column: number, textModel: ITextModel): QueryCapture[] {
const textModelTreeSitter = this._getTree(textModel);
if (!textModelTreeSitter?.parseResult?.tree) {
return [];
}
const captures = this._captureAtRangeWithInjections(new Range(lineNumber, column, lineNumber, column + 1), textModelTreeSitter, textModelTreeSitter.parseResult.tree);
return captures;
}
captureAtRangeTree(range: Range, tree: Parser.Tree, textModelTreeSitter: ITextModelTreeSitter | undefined): QueryCapture[] {
const captures = textModelTreeSitter ? this._captureAtRangeWithInjections(range, textModelTreeSitter, tree) : this._captureAtRange(range, tree);
return captures;
}
private _captureAtRange(range: Range, tree: Parser.Tree | undefined): QueryCapture[] {
const query = this._ensureQuery();
if (!tree || !query) {
return [];
}
// Tree sitter row is 0 based, column is 0 based
return query.captures(tree.rootNode, { startPosition: { row: range.startLineNumber - 1, column: range.startColumn - 1 }, endPosition: { row: range.endLineNumber - 1, column: range.endColumn - 1 } }).map(capture => (
{
name: capture.name,
text: capture.node.text,
node: {
startIndex: capture.node.startIndex,
endIndex: capture.node.endIndex,
startPosition: {
lineNumber: capture.node.startPosition.row + 1,
column: capture.node.startPosition.column + 1
},
endPosition: {
lineNumber: capture.node.endPosition.row + 1,
column: capture.node.endPosition.column + 1
}
},
encodedLanguageId: this._encodedLanguageId
}
));
}
private _captureAtRangeWithInjections(range: Range, textModelTreeSitter: ITextModelTreeSitter, tree: Parser.Tree): QueryCapture[] {
const query = this._ensureQuery();
if (!textModelTreeSitter?.parseResult || !query) {
return [];
}
const captures: QueryCapture[] = this._captureAtRange(range, tree);
for (let i = 0; i < captures.length; i++) {
const capture = captures[i];
const capStartLine = capture.node.startPosition.lineNumber;
const capEndLine = capture.node.endPosition.lineNumber;
const capStartColumn = capture.node.startPosition.column;
const capEndColumn = capture.node.endPosition.column;
const startLine = ((capStartLine > range.startLineNumber) && (capStartLine < range.endLineNumber)) ? capStartLine : range.startLineNumber;
const endLine = ((capEndLine > range.startLineNumber) && (capEndLine < range.endLineNumber)) ? capEndLine : range.endLineNumber;
const startColumn = (capStartLine === range.startLineNumber) ? (capStartColumn < range.startColumn ? range.startColumn : capStartColumn) : (capStartLine < range.startLineNumber ? range.startColumn : capStartColumn);
const endColumn = (capEndLine === range.endLineNumber) ? (capEndColumn > range.endColumn ? range.endColumn : capEndColumn) : (capEndLine > range.endLineNumber ? range.endColumn : capEndColumn);
const injectionRange = new Range(startLine, startColumn, endLine, endColumn);
const injection = this._getInjectionCaptures(textModelTreeSitter, capture, injectionRange);
if (injection && injection.length > 0) {
captures.splice(i + 1, 0, ...injection);
i += injection.length;
}
}
return captures;
}
/**
* Gets the tokens for a given line.
* Each token takes 2 elements in the array. The first element is the offset of the end of the token *in the line, not in the document*, and the second element is the metadata.
*
* @param lineNumber
* @returns
*/
public tokenizeEncoded(lineNumber: number, textModel: ITextModel) {
const tokens = this._tokenizeEncoded(lineNumber, textModel);
if (!tokens) {
return undefined;
}
const updates = this._rangeTokensAsUpdates(textModel.getOffsetAt({ lineNumber, column: 1 }), tokens.result);
if (tokens.versionId === textModel.getVersionId()) {
this._tokenizationStoreService.updateTokens(textModel, tokens.versionId, [{ newTokens: updates, oldRangeLength: textModel.getLineLength(lineNumber) }], TokenQuality.Accurate);
}
}
public tokenizeEncodedInstrumented(lineNumber: number, textModel: ITextModel): { result: Uint32Array; captureTime: number; metadataTime: number } | undefined {
const tokens = this._tokenizeEncoded(lineNumber, textModel);
if (!tokens) {
return undefined;
}
return { result: this._endOffsetTokensToUint32Array(tokens.result), captureTime: tokens.captureTime, metadataTime: tokens.metadataTime };
}
private _getCaptures(range: Range, textModelTreeSitter: ITextModelTreeSitter, tree: Parser.Tree): QueryCapture[] {
const captures = this._captureAtRangeWithInjections(range, textModelTreeSitter, tree);
return captures;
}
private _tokenize(range: Range, rangeStartOffset: number, rangeEndOffset: number, textModel: ITextModel): { endOffsetsAndMetadata: { endOffset: number; metadata: number }[]; versionId: number; captureTime: number; metadataTime: number } | undefined {
const tree = this._getTree(textModel);
if (!tree?.parseResult?.tree) {
return undefined;
}
const captures = this._getCaptures(range, tree, tree.parseResult.tree);
const result = this._tokenizeCapturesWithMetadata(tree.parseResult.tree, captures, rangeStartOffset, rangeEndOffset);
if (!result) {
return undefined;
}
return { ...result, versionId: tree.parseResult.versionId };
}
private _createTokensFromCaptures(tree: Parser.Tree | undefined, captures: QueryCapture[], rangeStartOffset: number, rangeEndOffset: number): { endOffsets: EndOffsetAndScopes[]; captureTime: number } | undefined {
const stopwatch = StopWatch.create();
const rangeLength = rangeEndOffset - rangeStartOffset;
const encodedLanguageId = this._languageIdCodec.encodeLanguageId(this._languageId);
const baseScope: string = TREESITTER_BASE_SCOPES[this._languageId] || 'source';
if (captures.length === 0) {
if (tree) {
stopwatch.stop();
const endOffsetsAndMetadata = [{ endOffset: rangeLength, scopes: [], encodedLanguageId }];
return { endOffsets: endOffsetsAndMetadata, captureTime: stopwatch.elapsed() };
}
return undefined;
}
const endOffsetsAndScopes: EndOffsetAndScopes[] = Array(captures.length);
endOffsetsAndScopes.fill({ endOffset: 0, scopes: [baseScope], encodedLanguageId });
let tokenIndex = 0;
const increaseSizeOfTokensByOneToken = () => {
endOffsetsAndScopes.push({ endOffset: 0, scopes: [baseScope], encodedLanguageId });
};
const brackets = (capture: QueryCapture, startOffset: number): number[] | undefined => {
return (capture.name.includes('punctuation') && capture.text) ? Array.from(capture.text.matchAll(BRACKETS)).map(match => startOffset + match.index) : undefined;
};
const addCurrentTokenToArray = (capture: QueryCapture, startOffset: number, endOffset: number, position?: number) => {
if (position !== undefined) {
const oldScopes = endOffsetsAndScopes[position].scopes;
let oldBracket = endOffsetsAndScopes[position].bracket;
// Check that the previous token ends at the same point that the current token starts
const prevEndOffset = position > 0 ? endOffsetsAndScopes[position - 1].endOffset : 0;
if (prevEndOffset !== startOffset) {
let preInsertBracket: number[] | undefined = undefined;
if (oldBracket && oldBracket.length > 0) {
preInsertBracket = [];
const postInsertBracket: number[] = [];
for (let i = 0; i < oldBracket.length; i++) {
const bracket = oldBracket[i];
if (bracket < startOffset) {
preInsertBracket.push(bracket);
} else if (bracket > endOffset) {
postInsertBracket.push(bracket);
}
}
if (preInsertBracket.length === 0) {
preInsertBracket = undefined;
}
if (postInsertBracket.length === 0) {
oldBracket = undefined;
} else {
oldBracket = postInsertBracket;
}
}
// We need to add some of the position token to cover the space
endOffsetsAndScopes.splice(position, 0, { endOffset: startOffset, scopes: [...oldScopes], bracket: preInsertBracket, encodedLanguageId: capture.encodedLanguageId });
position++;
increaseSizeOfTokensByOneToken();
tokenIndex++;
}
endOffsetsAndScopes.splice(position, 0, { endOffset: endOffset, scopes: [...oldScopes, capture.name], bracket: brackets(capture, startOffset), encodedLanguageId: capture.encodedLanguageId });
endOffsetsAndScopes[tokenIndex].bracket = oldBracket;
} else {
endOffsetsAndScopes[tokenIndex] = { endOffset: endOffset, scopes: [baseScope, capture.name], bracket: brackets(capture, startOffset), encodedLanguageId: capture.encodedLanguageId };
}
tokenIndex++;
};
for (let captureIndex = 0; captureIndex < captures.length; captureIndex++) {
const capture = captures[captureIndex];
const tokenEndIndex = capture.node.endIndex < rangeEndOffset ? ((capture.node.endIndex < rangeStartOffset) ? rangeStartOffset : capture.node.endIndex) : rangeEndOffset;
const tokenStartIndex = capture.node.startIndex < rangeStartOffset ? rangeStartOffset : capture.node.startIndex;
const endOffset = tokenEndIndex - rangeStartOffset;
// Not every character will get captured, so we need to make sure that our current capture doesn't bleed toward the start of the line and cover characters that it doesn't apply to.
// We do this by creating a new token in the array if the previous token ends before the current token starts.
let previousEndOffset: number;
const currentTokenLength = tokenEndIndex - tokenStartIndex;
if (captureIndex > 0) {
previousEndOffset = endOffsetsAndScopes[(tokenIndex - 1)].endOffset;
} else {
previousEndOffset = tokenStartIndex - rangeStartOffset - 1;
}
const startOffset = endOffset - currentTokenLength;
if ((previousEndOffset >= 0) && (previousEndOffset < startOffset)) {
// Add en empty token to cover the space where there were no captures
endOffsetsAndScopes[tokenIndex] = { endOffset: startOffset, scopes: [baseScope], encodedLanguageId: this._encodedLanguageId };
tokenIndex++;
increaseSizeOfTokensByOneToken();
}
if (currentTokenLength < 0) {
// This happens when we have a token "gap" right at the end of the capture range. The last capture isn't used because it's start index isn't included in the range.
continue;
}
if (previousEndOffset >= endOffset) {
// walk back through the tokens until we find the one that contains the current token
let withinTokenIndex = tokenIndex - 1;
let previousTokenEndOffset = endOffsetsAndScopes[withinTokenIndex].endOffset;
let previousTokenStartOffset = ((withinTokenIndex >= 2) ? endOffsetsAndScopes[withinTokenIndex - 1].endOffset : 0);
do {
// Check that the current token doesn't just replace the last token
if ((previousTokenStartOffset + currentTokenLength) === previousTokenEndOffset) {
if (previousTokenStartOffset === startOffset) {
// Current token and previous token span the exact same characters, add the scopes to the previous token
endOffsetsAndScopes[withinTokenIndex].scopes.push(capture.name);
const oldBracket = endOffsetsAndScopes[withinTokenIndex].bracket;
endOffsetsAndScopes[withinTokenIndex].bracket = ((oldBracket && (oldBracket.length > 0)) ? oldBracket : brackets(capture, startOffset));
}
} else if (previousTokenStartOffset <= startOffset) {
addCurrentTokenToArray(capture, startOffset, endOffset, withinTokenIndex);
break;
}
withinTokenIndex--;
previousTokenStartOffset = ((withinTokenIndex >= 1) ? endOffsetsAndScopes[withinTokenIndex - 1].endOffset : 0);
previousTokenEndOffset = ((withinTokenIndex >= 0) ? endOffsetsAndScopes[withinTokenIndex].endOffset : 0);
} while (previousTokenEndOffset > startOffset);
} else {
// Just add the token to the array
addCurrentTokenToArray(capture, startOffset, endOffset);
}
}
// Account for uncaptured characters at the end of the line
if ((endOffsetsAndScopes[tokenIndex - 1].endOffset < rangeLength)) {
if (rangeLength - endOffsetsAndScopes[tokenIndex - 1].endOffset > 0) {
increaseSizeOfTokensByOneToken();
endOffsetsAndScopes[tokenIndex] = { endOffset: rangeLength, scopes: endOffsetsAndScopes[tokenIndex].scopes, encodedLanguageId: this._encodedLanguageId };
tokenIndex++;
}
}
for (let i = 0; i < endOffsetsAndScopes.length; i++) {
const token = endOffsetsAndScopes[i];
if (token.endOffset === 0 && i !== 0) {
endOffsetsAndScopes.splice(i, endOffsetsAndScopes.length - i);
break;
}
}
const captureTime = stopwatch.elapsed();
return { endOffsets: endOffsetsAndScopes as { endOffset: number; scopes: string[]; encodedLanguageId: LanguageId }[], captureTime };
}
private _getInjectionCaptures(textModelTreeSitter: ITextModelTreeSitter, parentCapture: QueryCapture, range: Range) {
const injection = textModelTreeSitter.getInjection(parentCapture.node.startIndex, this._languageId);
if (!injection?.tree || injection.versionId !== textModelTreeSitter.parseResult?.versionId) {
return undefined;
}
const feature = TreeSitterTokenizationRegistry.get(injection.languageId);
if (!feature) {
return undefined;
}
return feature.captureAtRangeTree(range, injection.tree, textModelTreeSitter);
}
private _tokenizeCapturesWithMetadata(tree: Parser.Tree | undefined, captures: QueryCapture[], rangeStartOffset: number, rangeEndOffset: number): { endOffsetsAndMetadata: EndOffsetToken[]; captureTime: number; metadataTime: number } | undefined {
const stopwatch = StopWatch.create();
const emptyTokens = this._createTokensFromCaptures(tree, captures, rangeStartOffset, rangeEndOffset);
if (!emptyTokens) {
return undefined;
}
const endOffsetsAndScopes: EndOffsetWithMeta[] = emptyTokens.endOffsets;
for (let i = 0; i < endOffsetsAndScopes.length; i++) {
const token = endOffsetsAndScopes[i];
token.metadata = findMetadata(this._colorThemeData, token.scopes, token.encodedLanguageId, !!token.bracket && (token.bracket.length > 0));
}
const metadataTime = stopwatch.elapsed();
return { endOffsetsAndMetadata: endOffsetsAndScopes as { endOffset: number; scopes: string[]; metadata: number }[], captureTime: emptyTokens.captureTime, metadataTime };
}
private _emptyToken() {
return findMetadata(this._colorThemeData, [], this._encodedLanguageId, false);
}
private _tokenizeEncoded(lineNumber: number, textModel: ITextModel): { result: EndOffsetToken[]; captureTime: number; metadataTime: number; versionId: number } | undefined {
const lineOffset = textModel.getOffsetAt({ lineNumber: lineNumber, column: 1 });
const maxLine = textModel.getLineCount();
const lineEndOffset = (lineNumber + 1 <= maxLine) ? textModel.getOffsetAt({ lineNumber: lineNumber + 1, column: 1 }) : textModel.getValueLength();
const lineLength = lineEndOffset - lineOffset;
const result = this._tokenize(new Range(lineNumber, 1, lineNumber, lineLength + 1), lineOffset, lineEndOffset, textModel);
if (!result) {
return undefined;
}
return { result: result.endOffsetsAndMetadata, captureTime: result.captureTime, metadataTime: result.metadataTime, versionId: result.versionId };
}
private _endOffsetTokensToUint32Array(endOffsetsAndMetadata: EndOffsetToken[]): Uint32Array {
const uint32Array = new Uint32Array(endOffsetsAndMetadata.length * 2);
for (let i = 0; i < endOffsetsAndMetadata.length; i++) {
uint32Array[i * 2] = endOffsetsAndMetadata[i].endOffset;
uint32Array[i * 2 + 1] = endOffsetsAndMetadata[i].metadata;
}
return uint32Array;
}
override dispose() {
super.dispose();
this._query?.delete();
this._query = undefined;
}
}
registerSingleton(ITreeSitterTokenizationFeature, TreeSitterTokenizationFeature, InstantiationType.Eager);

View File

@ -37,6 +37,8 @@ import { IEditorService } from '../../../../services/editor/common/editorService
import { IStorageService } from '../../../../../platform/storage/common/storage.js';
import { DisposableStore } from '../../../../../base/common/lifecycle.js';
import { ensureNoDisposablesAreLeakedInTestSuite } from '../../../../../base/test/common/utils.js';
import { ITreeSitterLibraryService } from '../../../../../editor/common/services/treeSitter/treeSitterLibraryService.js';
import { TestTreeSitterLibraryService } from '../../../../../editor/test/common/services/testTreeSitterLibraryService.js';
suite('EditorModel', () => {
@ -65,6 +67,7 @@ suite('EditorModel', () => {
instantiationService.stub(IThemeService, new TestThemeService());
instantiationService.stub(ILanguageConfigurationService, disposables.add(new TestLanguageConfigurationService()));
instantiationService.stub(IStorageService, disposables.add(new TestStorageService()));
instantiationService.stub(ITreeSitterLibraryService, new TestTreeSitterLibraryService());
return disposables.add(instantiationService.createInstance(ModelService));
}

View File

@ -183,6 +183,8 @@ import { IHoverService } from '../../../platform/hover/browser/hover.js';
import { NullHoverService } from '../../../platform/hover/test/browser/nullHoverService.js';
import { IActionViewItemService, NullActionViewItemService } from '../../../platform/actions/browser/actionViewItemService.js';
import { IMarkdownString } from '../../../base/common/htmlContent.js';
import { ITreeSitterLibraryService } from '../../../editor/common/services/treeSitter/treeSitterLibraryService.js';
import { TestTreeSitterLibraryService } from '../../../editor/test/common/services/testTreeSitterLibraryService.js';
export function createFileEditorInput(instantiationService: IInstantiationService, resource: URI): FileEditorInput {
return instantiationService.createInstance(FileEditorInput, resource, undefined, undefined, undefined, undefined, undefined, undefined);
@ -310,6 +312,7 @@ export function workbenchInstantiationService(
const themeService = new TestThemeService();
instantiationService.stub(IThemeService, themeService);
instantiationService.stub(ILanguageConfigurationService, disposables.add(new TestLanguageConfigurationService()));
instantiationService.stub(ITreeSitterLibraryService, new TestTreeSitterLibraryService());
instantiationService.stub(IModelService, disposables.add(instantiationService.createInstance(ModelService)));
const fileService = overrides?.fileService ? overrides.fileService(instantiationService) : disposables.add(new TestFileService());
instantiationService.stub(IFileService, fileService);

View File

@ -6,7 +6,6 @@
import assert from 'assert';
import { TestInstantiationService } from '../../../platform/instantiation/test/common/instantiationServiceMock.js';
import { ensureNoDisposablesAreLeakedInTestSuite } from '../../../base/test/common/utils.js';
import { TreeSitterTextModelService } from '../../../editor/common/services/treeSitter/treeSitterParserService.js';
import { IModelService } from '../../../editor/common/services/model.js';
import { Event } from '../../../base/common/event.js';
import { URI } from '../../../base/common/uri.js';
@ -18,10 +17,7 @@ import { IConfigurationService } from '../../../platform/configuration/common/co
import { TestConfigurationService } from '../../../platform/configuration/test/common/testConfigurationService.js';
import { IEnvironmentService } from '../../../platform/environment/common/environment.js';
import { ModelService } from '../../../editor/common/services/modelService.js';
// eslint-disable-next-line local/code-layering, local/code-import-patterns
import { TreeSitterTokenizationFeature } from '../../services/treeSitter/browser/treeSitterTokenizationFeature.js';
import { ITreeSitterImporter, ITreeSitterParserService, TreeSitterImporter, TreeUpdateEvent } from '../../../editor/common/services/treeSitterParserService.js';
import { ITreeSitterTokenizationSupport, TreeSitterTokenizationRegistry } from '../../../editor/common/languages.js';
import { FileService } from '../../../platform/files/common/fileService.js';
import { Schemas } from '../../../base/common/network.js';
import { DiskFileSystemProvider } from '../../../platform/files/node/diskFileSystemProvider.js';
@ -41,15 +37,20 @@ import { DisposableStore, IDisposable } from '../../../base/common/lifecycle.js'
import { ProbeScope, TokenStyle } from '../../../platform/theme/common/tokenClassificationRegistry.js';
import { TextMateThemingRuleDefinitions } from '../../services/themes/common/colorThemeData.js';
import { Color } from '../../../base/common/color.js';
import { ITreeSitterTokenizationStoreService } from '../../../editor/common/model/treeSitterTokenStoreService.js';
import { Range } from '../../../editor/common/core/range.js';
import { TokenUpdate } from '../../../editor/common/model/tokens/treeSitter/tokenStore.js';
import { ITreeSitterLibraryService } from '../../../editor/common/services/treeSitter/treeSitterLibraryService.js';
// eslint-disable-next-line local/code-layering, local/code-import-patterns
import { TreeSitterLibraryService } from '../../services/treeSitter/browser/treeSitterLibraryService.js';
import { TokenizationTextModelPart } from '../../../editor/common/model/tokens/tokenizationTextModelPart.js';
import { TreeSitterSyntaxTokenBackend } from '../../../editor/common/model/tokens/treeSitter/treeSitterSyntaxTokenBackend.js';
import { TreeParseUpdateEvent, TreeSitterTree } from '../../../editor/common/model/tokens/treeSitter/treeSitterTree.js';
import { ITextModel } from '../../../editor/common/model.js';
import { TokenQuality, TokenUpdate } from '../../../editor/common/model/tokenStore.js';
import { TreeSitterTokenizationImpl } from '../../../editor/common/model/tokens/treeSitter/treeSitterTokenizationImpl.js';
import { autorunHandleChanges, recordChanges, waitForState } from '../../../base/common/observable.js';
import { ITreeSitterThemeService } from '../../../editor/common/services/treeSitter/treeSitterThemeService.js';
// eslint-disable-next-line local/code-layering, local/code-import-patterns
import { ICodeEditorService } from '../../../editor/browser/services/codeEditorService.js';
// eslint-disable-next-line local/code-layering, local/code-import-patterns
import { TestCodeEditorService } from '../../../editor/test/browser/editorTestServices.js';
import { IModelContentChangedEvent } from '../../../editor/common/textModelEvents.js';
import { TreeSitterThemeService } from '../../services/treeSitter/browser/treeSitterThemeService.js';
class MockTelemetryService implements ITelemetryService {
_serviceBrand: undefined;
@ -72,37 +73,6 @@ class MockTelemetryService implements ITelemetryService {
}
}
class MockTokenStoreService implements ITreeSitterTokenizationStoreService {
delete(model: ITextModel): void {
throw new Error('Method not implemented.');
}
handleContentChanged(model: ITextModel, e: IModelContentChangedEvent): void {
}
rangeHasTokens(model: ITextModel, range: Range, minimumTokenQuality: TokenQuality): boolean {
return true;
}
rangHasAnyTokens(model: ITextModel): boolean {
return true;
}
getNeedsRefresh(model: ITextModel): { range: Range; startOffset: number; endOffset: number }[] {
return [];
}
_serviceBrand: undefined;
setTokens(model: ITextModel, tokens: TokenUpdate[]): void {
}
getTokens(model: ITextModel, line: number): Uint32Array | undefined {
return undefined;
}
updateTokens(model: ITextModel, version: number, updates: { oldRangeLength: number; newTokens: TokenUpdate[] }[]): void {
}
markForRefresh(model: ITextModel, range: Range): void {
}
hasTokens(model: ITextModel, accurateForRange?: Range): boolean {
return true;
}
}
class TestTreeSitterColorTheme extends TestColorTheme {
public resolveScopes(scopes: ProbeScope[], definitions?: TextMateThemingRuleDefinitions): TokenStyle | undefined {
@ -126,9 +96,6 @@ suite('Tree Sitter TokenizationFeature', function () {
let themeService: IThemeService;
let languageService: ILanguageService;
let environmentService: IEnvironmentService;
let tokenStoreService: ITreeSitterTokenizationStoreService;
let treeSitterParserService: TreeSitterTextModelService;
let treeSitterTokenizationSupport: ITreeSitterTokenizationSupport;
let disposables: DisposableStore;
@ -141,13 +108,11 @@ suite('Tree Sitter TokenizationFeature', function () {
configurationService = new TestConfigurationService({ 'editor.experimental.preferTreeSitter.typescript': true });
themeService = new TestThemeService(new TestTreeSitterColorTheme());
environmentService = {} as IEnvironmentService;
tokenStoreService = new MockTokenStoreService();
instantiationService.set(IEnvironmentService, environmentService);
instantiationService.set(IConfigurationService, configurationService);
instantiationService.set(ILogService, logService);
instantiationService.set(ITelemetryService, telemetryService);
instantiationService.set(ITreeSitterTokenizationStoreService, tokenStoreService);
languageService = disposables.add(instantiationService.createInstance(LanguageService));
instantiationService.set(ILanguageService, languageService);
instantiationService.set(IThemeService, themeService);
@ -155,15 +120,18 @@ suite('Tree Sitter TokenizationFeature', function () {
instantiationService.set(ITextResourcePropertiesService, textResourcePropertiesService);
languageConfigurationService = disposables.add(instantiationService.createInstance(TestLanguageConfigurationService));
instantiationService.set(ILanguageConfigurationService, languageConfigurationService);
instantiationService.set(ITreeSitterImporter, instantiationService.createInstance(TreeSitterImporter));
instantiationService.set(ICodeEditorService, instantiationService.createInstance(TestCodeEditorService));
fileService = disposables.add(instantiationService.createInstance(FileService));
const diskFileSystemProvider = disposables.add(new DiskFileSystemProvider(logService));
disposables.add(fileService.registerProvider(Schemas.file, diskFileSystemProvider));
instantiationService.set(IFileService, fileService);
const libraryService = disposables.add(instantiationService.createInstance(TreeSitterLibraryService));
libraryService.isTest = true;
instantiationService.set(ITreeSitterLibraryService, libraryService);
instantiationService.set(ITreeSitterThemeService, instantiationService.createInstance(TreeSitterThemeService));
const dialogService = new TestDialogService();
const notificationService = new TestNotificationService();
const undoRedoService = new UndoRedoService(dialogService, notificationService);
@ -175,11 +143,6 @@ suite('Tree Sitter TokenizationFeature', function () {
instantiationService
);
instantiationService.set(IModelService, modelService);
treeSitterParserService = disposables.add(instantiationService.createInstance(TreeSitterTextModelService));
treeSitterParserService.isTest = true;
instantiationService.set(ITreeSitterParserService, treeSitterParserService);
disposables.add(instantiationService.createInstance(TreeSitterTokenizationFeature));
treeSitterTokenizationSupport = disposables.add(await TreeSitterTokenizationRegistry.getOrCreate('typescript') as (ITreeSitterTokenizationSupport & IDisposable));
});
teardown(() => {
@ -193,22 +156,18 @@ suite('Tree Sitter TokenizationFeature', function () {
}
let nameNumber = 1;
async function getModelAndPrepTree(content: string) {
async function getModelAndPrepTree(content: string): Promise<{ model: ITextModel; treeSitterTree: TreeSitterTree; tokenizationImpl: TreeSitterTokenizationImpl }> {
const model = disposables.add(modelService.createModel(content, { languageId: 'typescript', onDidChange: Event.None }, URI.file(`file${nameNumber++}.ts`)));
const tree = disposables.add(await treeSitterParserService.getTextModelTreeSitter(model));
const treeParseResult = new Promise<void>(resolve => {
const disposable = treeSitterParserService.onDidUpdateTree(e => {
if (e.textModel === model) {
disposable.dispose();
resolve();
}
});
});
await tree.parse();
await treeParseResult;
const treeSitterTreeObs = disposables.add((model.tokenization as TokenizationTextModelPart).tokens.get() as TreeSitterSyntaxTokenBackend).tree;
const tokenizationImplObs = disposables.add((model.tokenization as TokenizationTextModelPart).tokens.get() as TreeSitterSyntaxTokenBackend).tokenizationImpl;
const treeSitterTree = treeSitterTreeObs.get() ?? await waitForState(treeSitterTreeObs);
if (!treeSitterTree.tree.get()) {
await waitForState(treeSitterTree.tree);
}
const tokenizationImpl = tokenizationImplObs.get() ?? await waitForState(tokenizationImplObs);
assert.ok(tree);
return model;
assert.ok(treeSitterTree);
return { model, treeSitterTree, tokenizationImpl };
}
function verifyTokens(tokens: TokenUpdate[] | undefined) {
@ -231,16 +190,17 @@ class x {
class y {
}`;
const model = await getModelAndPrepTree(content);
const { model, treeSitterTree } = await getModelAndPrepTree(content);
let updateListener: IDisposable | undefined;
let change: TreeUpdateEvent | undefined;
const updatePromise = new Promise<void>(resolve => {
updateListener = treeSitterParserService.onDidUpdateTree(async e => {
if (e.textModel === model) {
change = e;
resolve();
const changePromise = new Promise<TreeParseUpdateEvent | undefined>(resolve => {
updateListener = autorunHandleChanges({
owner: this,
changeTracker: recordChanges({ tree: treeSitterTree.tree }),
}, (reader, ctx) => {
const changeEvent = ctx.changes.at(0)?.change;
if (changeEvent) {
resolve(changeEvent);
}
});
});
@ -258,7 +218,7 @@ class y {
resolve();
});
const edits = Promise.all([edit1, edit2, edit3]);
await updatePromise;
const change = await changePromise;
await edits;
assert.ok(change);
@ -274,8 +234,8 @@ class y {
test('File single line file', async () => {
const content = `console.log('x');`;
const model = await getModelAndPrepTree(content);
const tokens = treeSitterTokenizationSupport.getTokensInRange(model, new Range(1, 1, 1, 18), 0, 17);
const { model, tokenizationImpl } = await getModelAndPrepTree(content);
const tokens = tokenizationImpl.getTokensInRange(new Range(1, 1, 1, 18), 0, 17);
verifyTokens(tokens);
assert.deepStrictEqual(tokens?.length, 9);
assert.deepStrictEqual(tokensContentSize(tokens), content.length);
@ -286,8 +246,8 @@ class y {
const content = `
console.log('x');
`;
const model = await getModelAndPrepTree(content);
const tokens = treeSitterTokenizationSupport.getTokensInRange(model, new Range(1, 1, 3, 1), 0, 19);
const { model, tokenizationImpl } = await getModelAndPrepTree(content);
const tokens = tokenizationImpl.getTokensInRange(new Range(1, 1, 3, 1), 0, 19);
verifyTokens(tokens);
assert.deepStrictEqual(tokens?.length, 11);
assert.deepStrictEqual(tokensContentSize(tokens), content.length);
@ -296,8 +256,8 @@ console.log('x');
test('File with new lines at beginning and end \\r\\n', async () => {
const content = '\r\nconsole.log(\'x\');\r\n';
const model = await getModelAndPrepTree(content);
const tokens = treeSitterTokenizationSupport.getTokensInRange(model, new Range(1, 1, 3, 1), 0, 21);
const { model, tokenizationImpl } = await getModelAndPrepTree(content);
const tokens = tokenizationImpl.getTokensInRange(new Range(1, 1, 3, 1), 0, 21);
verifyTokens(tokens);
assert.deepStrictEqual(tokens?.length, 11);
assert.deepStrictEqual(tokensContentSize(tokens), content.length);
@ -310,8 +270,8 @@ console.log('x');
console.log('7');
`;
const model = await getModelAndPrepTree(content);
const tokens = treeSitterTokenizationSupport.getTokensInRange(model, new Range(1, 1, 5, 1), 0, 38);
const { model, tokenizationImpl } = await getModelAndPrepTree(content);
const tokens = tokenizationImpl.getTokensInRange(new Range(1, 1, 5, 1), 0, 38);
verifyTokens(tokens);
assert.deepStrictEqual(tokens?.length, 21);
assert.deepStrictEqual(tokensContentSize(tokens), content.length);
@ -320,8 +280,8 @@ console.log('7');
test('File with empty lines in the middle \\r\\n', async () => {
const content = '\r\nconsole.log(\'x\');\r\n\r\nconsole.log(\'7\');\r\n';
const model = await getModelAndPrepTree(content);
const tokens = treeSitterTokenizationSupport.getTokensInRange(model, new Range(1, 1, 5, 1), 0, 42);
const { model, tokenizationImpl } = await getModelAndPrepTree(content);
const tokens = tokenizationImpl.getTokensInRange(new Range(1, 1, 5, 1), 0, 42);
verifyTokens(tokens);
assert.deepStrictEqual(tokens?.length, 21);
assert.deepStrictEqual(tokensContentSize(tokens), content.length);
@ -334,8 +294,8 @@ console.log('7');
{
}
`;
const model = await getModelAndPrepTree(content);
const tokens = treeSitterTokenizationSupport.getTokensInRange(model, new Range(1, 1, 5, 1), 0, 24);
const { model, tokenizationImpl } = await getModelAndPrepTree(content);
const tokens = tokenizationImpl.getTokensInRange(new Range(1, 1, 5, 1), 0, 24);
verifyTokens(tokens);
assert.deepStrictEqual(tokens?.length, 16);
assert.deepStrictEqual(tokensContentSize(tokens), content.length);
@ -344,8 +304,8 @@ console.log('7');
test('File with non-empty lines that match no scopes \\r\\n', async () => {
const content = 'console.log(\'x\');\r\n;\r\n{\r\n}\r\n';
const model = await getModelAndPrepTree(content);
const tokens = treeSitterTokenizationSupport.getTokensInRange(model, new Range(1, 1, 5, 1), 0, 28);
const { model, tokenizationImpl } = await getModelAndPrepTree(content);
const tokens = tokenizationImpl.getTokensInRange(new Range(1, 1, 5, 1), 0, 28);
verifyTokens(tokens);
assert.deepStrictEqual(tokens?.length, 16);
assert.deepStrictEqual(tokensContentSize(tokens), content.length);
@ -359,8 +319,8 @@ console.log('7');
console.log('x');
`;
const model = await getModelAndPrepTree(content);
const tokens = treeSitterTokenizationSupport.getTokensInRange(model, new Range(1, 1, 6, 1), 0, 28);
const { model, tokenizationImpl } = await getModelAndPrepTree(content);
const tokens = tokenizationImpl.getTokensInRange(new Range(1, 1, 6, 1), 0, 28);
verifyTokens(tokens);
assert.deepStrictEqual(tokens?.length, 12);
assert.deepStrictEqual(tokensContentSize(tokens), content.length);
@ -369,8 +329,8 @@ console.log('x');
test('File with tree-sitter token that spans multiple lines \\r\\n', async () => {
const content = '/**\r\n**/\r\n\r\nconsole.log(\'x\');\r\n\r\n';
const model = await getModelAndPrepTree(content);
const tokens = treeSitterTokenizationSupport.getTokensInRange(model, new Range(1, 1, 6, 1), 0, 33);
const { model, tokenizationImpl } = await getModelAndPrepTree(content);
const tokens = tokenizationImpl.getTokensInRange(new Range(1, 1, 6, 1), 0, 33);
verifyTokens(tokens);
assert.deepStrictEqual(tokens?.length, 12);
assert.deepStrictEqual(tokensContentSize(tokens), content.length);
@ -385,8 +345,8 @@ console.log('x');
class Y {
private z = false;
}`;
const model = await getModelAndPrepTree(content);
const tokens = treeSitterTokenizationSupport.getTokensInRange(model, new Range(1, 1, 7, 1), 0, 63);
const { model, tokenizationImpl } = await getModelAndPrepTree(content);
const tokens = tokenizationImpl.getTokensInRange(new Range(1, 1, 7, 1), 0, 63);
verifyTokens(tokens);
assert.deepStrictEqual(tokens?.length, 30);
assert.deepStrictEqual(tokensContentSize(tokens), content.length);
@ -395,8 +355,8 @@ class Y {
test('File with tabs \\r\\n', async () => {
const content = 'function x() {\r\n\treturn true;\r\n}\r\n\r\nclass Y {\r\n\tprivate z = false;\r\n}';
const model = await getModelAndPrepTree(content);
const tokens = treeSitterTokenizationSupport.getTokensInRange(model, new Range(1, 1, 7, 1), 0, 69);
const { model, tokenizationImpl } = await getModelAndPrepTree(content);
const tokens = tokenizationImpl.getTokensInRange(new Range(1, 1, 7, 1), 0, 69);
verifyTokens(tokens);
assert.deepStrictEqual(tokens?.length, 30);
assert.deepStrictEqual(tokensContentSize(tokens), content.length);
@ -405,8 +365,8 @@ class Y {
test('Template string', async () => {
const content = '`t ${6}`';
const model = await getModelAndPrepTree(content);
const tokens = treeSitterTokenizationSupport.getTokensInRange(model, new Range(1, 1, 1, 8), 0, 8);
const { model, tokenizationImpl } = await getModelAndPrepTree(content);
const tokens = tokenizationImpl.getTokensInRange(new Range(1, 1, 1, 8), 0, 8);
verifyTokens(tokens);
assert.deepStrictEqual(tokens?.length, 6);
assert.deepStrictEqual(tokensContentSize(tokens), content.length);
@ -420,8 +380,8 @@ class Y {
// To make sure the translators don't break the link
comment: ["{Locked=']({'}"]
}));`;
const model = await getModelAndPrepTree(content);
const tokens = treeSitterTokenizationSupport.getTokensInRange(model, new Range(1, 1, 6, 5), 0, 238);
const { model, tokenizationImpl } = await getModelAndPrepTree(content);
const tokens = tokenizationImpl.getTokensInRange(new Range(1, 1, 6, 5), 0, 238);
verifyTokens(tokens);
assert.deepStrictEqual(tokens?.length, 65);
assert.deepStrictEqual(tokensContentSize(tokens), content.length);

View File

@ -121,7 +121,7 @@ import '../editor/common/services/languageFeaturesService.js';
import '../editor/common/services/semanticTokensStylingService.js';
import '../editor/common/services/treeViewsDndService.js';
import './services/textMate/browser/textMateTokenizationFeature.contribution.js';
import './services/treeSitter/browser/treeSitterTokenizationFeature.contribution.js';
import './services/treeSitter/browser/treeSitter.contribution.js';
import './services/userActivity/common/userActivityService.js';
import './services/userActivity/browser/userActivityBrowser.js';
import './services/editor/browser/editorPaneService.js';