From 8e006a43d332b8a186055ebe48756bc0b1952a2b Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 6 Mar 2026 20:15:38 +0100 Subject: [PATCH 01/20] refactor: more doc for configure --- src/project/flowr-analyzer-builder.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/project/flowr-analyzer-builder.ts b/src/project/flowr-analyzer-builder.ts index 46dc0609b76..6c8f56e0889 100644 --- a/src/project/flowr-analyzer-builder.ts +++ b/src/project/flowr-analyzer-builder.ts @@ -79,6 +79,9 @@ export class FlowrAnalyzerBuilder { } // we have a type safe export to ease auto-completion + /** + * Set a specific value in the configuration used by the resulting analyzer. + */ public configure>( key: K, value: PathValue From fc133cb52fa55a53da3eac7b4b1e3e7d5e84384a Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 6 Mar 2026 20:16:31 +0100 Subject: [PATCH 02/20] refactor: `this: void` for `FlowrConfig` helper --- src/config.ts | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/config.ts b/src/config.ts index f46fed7641a..2a560647890 100644 --- a/src/config.ts +++ b/src/config.ts @@ -250,7 +250,7 @@ export const FlowrConfig = { * The default configuration for flowR, used when no config file is found or when a config file is missing some options. * You can use this as a base for your own config and only specify the options you want to change. */ - default(): FlowrConfig { + default(this: void): FlowrConfig { return { ignoreSourceCalls: false, semantics: { @@ -364,7 +364,7 @@ export const FlowrConfig = { /** * Parses the given JSON string as a flowR config file, returning the resulting config object if the parsing and validation were successful, or `undefined` if there was an error. */ - parse(jsonString: string): FlowrConfig | undefined { + parse(this: void, jsonString: string): FlowrConfig | undefined { try { const parsed = JSON.parse(jsonString) as FlowrConfig; const validate = FlowrConfig.Schema.validate(parsed); @@ -383,14 +383,14 @@ export const FlowrConfig = { * Creates a new flowr config that has the updated values. */ // eslint-disable-next-line @typescript-eslint/no-invalid-void-type - amend(config: FlowrConfig, amendmentFunc: (config: DeepWritable) => FlowrConfig | void): FlowrConfig { + amend(this: void, config: FlowrConfig, amendmentFunc: (config: DeepWritable) => FlowrConfig | void): FlowrConfig { const newConfig = FlowrConfig.clone(config); return amendmentFunc(newConfig as DeepWritable) ?? newConfig; }, /** * Clones the given flowr config object. */ - clone(config: FlowrConfig): FlowrConfig { + clone(this: void, config: FlowrConfig): FlowrConfig { return deepClonePreserveUnclonable(config); }, /** @@ -399,7 +399,7 @@ export const FlowrConfig = { * infer the config from flowR's default locations. * This is mostly useful for user-facing features. */ - fromFile(configFile?: string, configWorkingDirectory = process.cwd()): FlowrConfig { + fromFile(this: void, configFile?: string, configWorkingDirectory = process.cwd()): FlowrConfig { try { return loadConfigFromFile(configFile, configWorkingDirectory); } catch(e) { @@ -410,7 +410,7 @@ export const FlowrConfig = { /** * Gets the configuration for the given engine type from the config. */ - getForEngine(config: FlowrConfig, engine: T): EngineConfig & { type: T } | undefined { + getForEngine(this: void, config: FlowrConfig, engine: T): EngineConfig & { type: T } | undefined { const engines = config.engines; if(engines.length > 0) { return engines.find(e => e.type === engine) as EngineConfig & { type: T } | undefined; @@ -429,7 +429,7 @@ export const FlowrConfig = { * console.log(newConfig.solver.variables); // Output: "builtin" * ``` */ - setInConfig(config: FlowrConfig, key: Path, value: PathValue): FlowrConfig { + setInConfig(this: void, config: FlowrConfig, key: Path, value: PathValue): FlowrConfig { const clone = FlowrConfig.clone(config); objectPath.set(clone, key, value); return clone; @@ -438,7 +438,7 @@ export const FlowrConfig = { * Modifies the given config object in place by setting the given value at the given key, where the key is a dot-separated path to the value in the config object. * @see {@link setInConfig} for a version that returns a new config object instead of modifying the given one in place. */ - setInConfigInPlace(config: FlowrConfig, key: Path, value: PathValue): void { + setInConfigInPlace(this: void, config: FlowrConfig, key: Path, value: PathValue): void { objectPath.set(config, key, value); } } as const; From 448776e989f4d22cd28b653191624553c63c3f45 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 11 Mar 2026 15:29:41 +0100 Subject: [PATCH 03/20] refactor: a first wrong incrementality --- package-lock.json | 20 +++++-- package.json | 1 + .../call/built-in/built-in-source.ts | 2 +- src/project/cache/flowr-analyzer-cache.ts | 43 +++++++++---- src/project/cache/flowr-cache.ts | 45 +++++++++----- src/project/context/flowr-analyzer-context.ts | 16 ++++- .../context/flowr-analyzer-files-context.ts | 9 +++ ...r-analyzer-incremental-analysis-context.ts | 42 +++++++++++++ src/project/context/flowr-file.ts | 47 ++++++++++++++- src/project/flowr-analyzer.ts | 15 ++++- .../incremental-parse/incremental-parse.ts | 60 +++++++++++++++++++ .../file-plugins/files/flowr-jupyter-file.ts | 2 +- .../files/flowr-rmarkdown-file.ts | 2 +- .../file-plugins/files/flowr-sweave-file.ts | 2 +- .../tree-sitter/tree-sitter-executor.ts | 26 +++++++- src/r-bridge/parser.ts | 23 +++++-- src/r-bridge/shell-executor.ts | 1 + src/r-bridge/shell.ts | 2 +- .../incremental/incremental-parsing.test.ts | 21 +++++++ 19 files changed, 332 insertions(+), 47 deletions(-) create mode 100644 src/project/context/flowr-analyzer-incremental-analysis-context.ts create mode 100644 src/project/incremental/incremental-parse/incremental-parse.ts create mode 100644 test/functionality/incremental/incremental-parsing.test.ts diff --git a/package-lock.json b/package-lock.json index 724eb5171c4..d506b92ba76 100644 --- a/package-lock.json +++ b/package-lock.json @@ -17,6 +17,7 @@ "command-line-usage": "^7.0.3", "commonmark": "^0.31.2", "dagre": "^0.8.5", + "diff": "^8.0.3", "gray-matter": "^4.0.3", "joi": "^18.0.1", "lz-string": "^1.5.0", @@ -1799,6 +1800,17 @@ "release-it": "16 || 17 || 18 || 19" } }, + "node_modules/@j-ulrich/release-it-regex-bumper/node_modules/diff": { + "version": "5.2.2", + "resolved": "https://registry.npmjs.org/diff/-/diff-5.2.2.tgz", + "integrity": "sha512-vtcDfH3TOjP8UekytvnHH1o1P4FcUdt4eQ1Y+Abap1tk/OB2MWQvcwS2ClCd1zuIhc3JKOx6p3kod8Vfys3E+A==", + "dev": true, + "license": "BSD-3-Clause", + "optional": true, + "engines": { + "node": ">=0.3.1" + } + }, "node_modules/@jridgewell/gen-mapping": { "version": "0.3.13", "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.13.tgz", @@ -5113,12 +5125,10 @@ "license": "MIT" }, "node_modules/diff": { - "version": "5.2.2", - "resolved": "https://registry.npmjs.org/diff/-/diff-5.2.2.tgz", - "integrity": "sha512-vtcDfH3TOjP8UekytvnHH1o1P4FcUdt4eQ1Y+Abap1tk/OB2MWQvcwS2ClCd1zuIhc3JKOx6p3kod8Vfys3E+A==", - "dev": true, + "version": "8.0.3", + "resolved": "https://registry.npmjs.org/diff/-/diff-8.0.3.tgz", + "integrity": "sha512-qejHi7bcSD4hQAZE0tNAawRK1ZtafHDmMTMkrrIGgSLl7hTnQHmKCeB45xAcbfTqK2zowkM3j3bHt/4b/ARbYQ==", "license": "BSD-3-Clause", - "optional": true, "engines": { "node": ">=0.3.1" } diff --git a/package.json b/package.json index 33e9e219582..242a0d5da76 100644 --- a/package.json +++ b/package.json @@ -207,6 +207,7 @@ "command-line-usage": "^7.0.3", "commonmark": "^0.31.2", "dagre": "^0.8.5", + "diff": "^8.0.3", "gray-matter": "^4.0.3", "joi": "^18.0.1", "lz-string": "^1.5.0", diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-source.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-source.ts index 38e5414a52f..78907aa7eda 100644 --- a/src/dataflow/internal/process/functions/call/built-in/built-in-source.ts +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-source.ts @@ -246,7 +246,7 @@ export function sourceRequest(rootId: NodeId, request: RParseRequest } else { guard(textRequest !== undefined, `Expected text request to be defined for sourced file ${JSON.stringify(request)}`); } - const parsed = (!data.parser.async ? data.parser : new RShellExecutor()).parse(textRequest.r); + const parsed = (!data.parser.async ? data.parser : new RShellExecutor()).parse(textRequest.r, data.ctx.inc); const normalized = (typeof parsed !== 'string' ? normalizeTreeSitter({ files: [{ parsed, filePath: textRequest.path }] }, getId, data.ctx.config) : normalize({ files: [{ parsed, filePath: textRequest.path }] }, getId)) as NormalizedAst; diff --git a/src/project/cache/flowr-analyzer-cache.ts b/src/project/cache/flowr-analyzer-cache.ts index 478e498a8d4..3964265bae4 100644 --- a/src/project/cache/flowr-analyzer-cache.ts +++ b/src/project/cache/flowr-analyzer-cache.ts @@ -1,5 +1,5 @@ import type { KnownParser } from '../../r-bridge/parser'; -import { type CacheInvalidationEvent, CacheInvalidationEventType, FlowrCache } from './flowr-cache'; +import { type InvalidationEvent, InvalidationEventType, FlowrCache } from './flowr-cache'; import { createDataflowPipeline, type DEFAULT_DATAFLOW_PIPELINE, @@ -18,6 +18,12 @@ import type { FlowrAnalyzerContext } from '../context/flowr-analyzer-context'; import { FlowrAnalyzerControlFlowCache } from './flowr-analyzer-controlflow-cache'; import type { CallGraph } from '../../dataflow/graph/call-graph'; import { computeCallGraph } from '../../dataflow/graph/call-graph'; +import type { + ReparseAction } from '../incremental/incremental-parse/incremental-parse'; +import { + coarseCheckWhetherToInvalidate, + shouldWeReparse +} from '../incremental/incremental-parse/incremental-parse'; interface FlowrAnalyzerCacheOptions { parser: Parser; @@ -49,37 +55,52 @@ export class FlowrAnalyzerCache extends FlowrCache; this.controlFlowCache = new FlowrAnalyzerControlFlowCache(); this.callGraphCache = undefined; + this.computeIfAbsent(true, () => this.pipeline?.getResults(true)); } public static create(data: FlowrAnalyzerCacheOptions): FlowrAnalyzerCache { return new FlowrAnalyzerCache(data); } - public override receive(event: CacheInvalidationEvent): void { + public override receive(event: InvalidationEvent): void { super.receive(event); - switch(event.type) { - case CacheInvalidationEventType.Full: + const type = event.type; + switch(type) { + case InvalidationEventType.Full: this.initCacheProviders(); break; + case InvalidationEventType.FileInvalidate: { + if(!coarseCheckWhetherToInvalidate(this.args.context, event)) { + return; + } + const reparse = shouldWeReparse(this.args.context, event); + this.initCacheProviders(reparse === 'full' ? undefined : reparse); + break; + } default: - assertUnreachable(event.type); + assertUnreachable(type); } } private get(): AnalyzerCacheType { /* this will do a ref assignment, so indirect force */ - return this.computeIfAbsent(false, () => this.pipeline.getResults(true)); + return this.computeIfAbsent(false, () => this.pipeline?.getResults(true)); } public reset() { - this.receive({ type: CacheInvalidationEventType.Full }); + this.receive({ type: InvalidationEventType.Full }); } private async runTapeUntil(force: boolean | undefined, until: () => T | undefined): Promise { @@ -112,7 +133,7 @@ export class FlowrAnalyzerCache extends FlowrCache['parse']> | undefined { - return this.get().parse; + return this.get()?.parse; } /** @@ -131,7 +152,7 @@ export class FlowrAnalyzerCache extends FlowrCache['normalize']> | undefined { - return this.get().normalize; + return this.get()?.normalize; } /** @@ -150,7 +171,7 @@ export class FlowrAnalyzerCache extends FlowrCache['dataflow']> | undefined { - return this.get().dataflow; + return this.get()?.dataflow; } /** diff --git a/src/project/cache/flowr-cache.ts b/src/project/cache/flowr-cache.ts index ef10fcd4839..ca9313a7547 100644 --- a/src/project/cache/flowr-cache.ts +++ b/src/project/cache/flowr-cache.ts @@ -1,37 +1,52 @@ import { assertUnreachable } from '../../util/assert'; +import type { FlowrFileProvider, StringableContent } from '../context/flowr-file'; -export const enum CacheInvalidationEventType { - Full = 'full' +export const enum InvalidationEventType { + Full = 'full', + FileInvalidate = 'file-invalidate', } -export type CacheInvalidationEvent = - { type: CacheInvalidationEventType.Full }; -export interface CacheInvalidationEventReceiver { - receive(event: CacheInvalidationEvent): void +export interface FileContentInvalidateEvent { + readonly type: InvalidationEventType.FileInvalidate; + readonly oldContent: Content | undefined; + readonly file: FlowrFileProvider; +} + +export type InvalidationEvent = + { type: InvalidationEventType.Full } + | FileContentInvalidateEvent; + + +export type InvalidationEventHandler = (event: InvalidationEvent) => void; + +export interface InvalidationEventReceiver { + receive: InvalidationEventHandler } /** * Central class for caching analysis results in FlowR. */ -export abstract class FlowrCache implements CacheInvalidationEventReceiver { +export abstract class FlowrCache implements InvalidationEventReceiver { private value: Cache | undefined = undefined; - private dependents: CacheInvalidationEventReceiver[] = []; + private dependents: InvalidationEventReceiver[] = []; - public registerDependent(dependent: CacheInvalidationEventReceiver) { + public registerDependent(dependent: InvalidationEventReceiver) { this.dependents.push(dependent); } - public removeDependent(dependent: CacheInvalidationEventReceiver) { + public removeDependent(dependent: InvalidationEventReceiver) { this.dependents = this.dependents.filter(d => d !== dependent); } - receive(event: CacheInvalidationEvent): void { + receive(event: InvalidationEvent): void { + const type = event.type; /* we will update this as soon as we support incremental update patterns */ - switch(event.type) { - case CacheInvalidationEventType.Full: + switch(type) { + case InvalidationEventType.Full: + case InvalidationEventType.FileInvalidate: this.value = undefined; break; default: - assertUnreachable(event.type); + assertUnreachable(type); } /* in the future we want to defer this *after* the dataflow is re-computed, then all receivers can decide whether they need to update */ this.notifyDependents(event); @@ -40,7 +55,7 @@ export abstract class FlowrCache implements CacheInvalidationEventReceive /** * Notify all dependents of a cache invalidation event. */ - public notifyDependents(event: CacheInvalidationEvent) { + public notifyDependents(event: InvalidationEvent) { for(const dependent of this.dependents) { dependent.receive(event); } diff --git a/src/project/context/flowr-analyzer-context.ts b/src/project/context/flowr-analyzer-context.ts index fe0a029d698..e004517683a 100644 --- a/src/project/context/flowr-analyzer-context.ts +++ b/src/project/context/flowr-analyzer-context.ts @@ -31,6 +31,12 @@ import { FlowrAnalyzerEnvironmentContext } from './flowr-analyzer-environment-co import type { ReadOnlyFlowrAnalyzerMetaContext } from './flowr-analyzer-meta-context'; import { FlowrAnalyzerMetaContext } from './flowr-analyzer-meta-context'; import type { FlowrAnalyzer } from '../flowr-analyzer'; +import type { + ReadOnlyFlowrAnalyzerIncrementalAnalysisContext +} from './flowr-analyzer-incremental-analysis-context'; +import { + FlowrAnalyzerIncrementalAnalysisContext +} from './flowr-analyzer-incremental-analysis-context'; /** * This is a read-only interface to the {@link FlowrAnalyzerContext}. @@ -54,6 +60,10 @@ export interface ReadOnlyFlowrAnalyzerContext { * The environment context provides access to the environment information used during analysis. */ readonly env: ReadOnlyFlowrAnalyzerEnvironmentContext; + /** + * The incremental context provides potential information for the next incremental analysis run + */ + readonly inc: ReadOnlyFlowrAnalyzerIncrementalAnalysisContext; /** * The configuration options used by the analyzer. */ @@ -81,6 +91,8 @@ export class FlowrAnalyzerContext implements ReadOnlyFlowrAnalyzerContext { public readonly files: FlowrAnalyzerFilesContext; public readonly deps: FlowrAnalyzerDependenciesContext; public readonly env: FlowrAnalyzerEnvironmentContext; + // TODO: docment this in the wiki-analyzer wiki! + public readonly inc: FlowrAnalyzerIncrementalAnalysisContext; private _analyzer: FlowrAnalyzer | undefined; public readonly config: FlowrConfig; @@ -88,9 +100,10 @@ export class FlowrAnalyzerContext implements ReadOnlyFlowrAnalyzerContext { constructor(config: FlowrConfig, plugins: ReadonlyMap) { this.config = config; const loadingOrder = new FlowrAnalyzerLoadingOrderContext(this, plugins.get(PluginType.LoadingOrder) as FlowrAnalyzerLoadingOrderPlugin[]); - this.files = new FlowrAnalyzerFilesContext(loadingOrder, (plugins.get(PluginType.ProjectDiscovery) ?? []) as FlowrAnalyzerProjectDiscoveryPlugin[], + this.files = new FlowrAnalyzerFilesContext(this, loadingOrder, (plugins.get(PluginType.ProjectDiscovery) ?? []) as FlowrAnalyzerProjectDiscoveryPlugin[], (plugins.get(PluginType.FileLoad) ?? []) as FlowrAnalyzerFilePlugin[]); this.env = new FlowrAnalyzerEnvironmentContext(this); + this.inc = new FlowrAnalyzerIncrementalAnalysisContext(); const functions = new FlowrAnalyzerFunctionsContext(this); this.deps = new FlowrAnalyzerDependenciesContext(functions, (plugins.get(PluginType.DependencyIdentification) ?? []) as FlowrAnalyzerPackageVersionsPlugin[]); this.meta = new FlowrAnalyzerMetaContext(); @@ -144,6 +157,7 @@ export class FlowrAnalyzerContext implements ReadOnlyFlowrAnalyzerContext { this.files.reset(); this.deps.reset(); this.meta.reset(); + this.inc.reset(); } } diff --git a/src/project/context/flowr-analyzer-files-context.ts b/src/project/context/flowr-analyzer-files-context.ts index 0476ab141c6..c75def0efa4 100644 --- a/src/project/context/flowr-analyzer-files-context.ts +++ b/src/project/context/flowr-analyzer-files-context.ts @@ -20,6 +20,8 @@ import fs from 'fs'; import path from 'path'; import type { FlowrNewsFile } from '../plugins/file-plugins/files/flowr-news-file'; import type { FlowrNamespaceFile } from '../plugins/file-plugins/files/flowr-namespace-file'; +import { FlowrAnalyzer } from '../flowr-analyzer'; +import type { FlowrAnalyzerContext } from './flowr-analyzer-context'; const fileLog = log.getSubLogger({ name: 'flowr-analyzer-files-context' }); @@ -129,6 +131,7 @@ export class FlowrAnalyzerFilesContext extends AbstractFlowrAnalyzerContext = new Map(); private inlineFiles: FlowrFileProvider[] = []; private readonly fileLoaders: readonly FlowrAnalyzerFilePlugin[]; + private readonly context: FlowrAnalyzerContext; /** these are all the paths of files that have been considered by the dataflow graph (even if not added) */ private readonly consideredFiles: string[] = []; @@ -136,11 +139,13 @@ export class FlowrAnalyzerFilesContext extends AbstractFlowrAnalyzerContext(Object.values(FileRole).map(k => [k, []])) as RoleBasedFiles; constructor( + context: FlowrAnalyzerContext, loadingOrder: FlowrAnalyzerLoadingOrderContext, plugins: readonly FlowrAnalyzerProjectDiscoveryPlugin[], fileLoaders: readonly FlowrAnalyzerFilePlugin[] ) { super(loadingOrder.getAttachedContext(), FlowrAnalyzerProjectDiscoveryPlugin.defaultPlugin(), plugins); + this.context = context; this.fileLoaders = [...fileLoaders, FlowrAnalyzerFilePlugin.defaultPlugin()]; this.loadingOrder = loadingOrder; } @@ -211,6 +216,10 @@ export class FlowrAnalyzerFilesContext extends AbstractFlowrAnalyzerContext { + this.context.analyzer?.receive(c); + }); + if(f.path() === FlowrFile.INLINE_PATH) { this.inlineFiles.push(f); } else { diff --git a/src/project/context/flowr-analyzer-incremental-analysis-context.ts b/src/project/context/flowr-analyzer-incremental-analysis-context.ts new file mode 100644 index 00000000000..fd906c46e90 --- /dev/null +++ b/src/project/context/flowr-analyzer-incremental-analysis-context.ts @@ -0,0 +1,42 @@ +import type { ParseStepOutput } from '../../r-bridge/parser'; +import type { Tree } from 'web-tree-sitter'; +import type { ReparseAction } from '../incremental/incremental-parse/incremental-parse'; + +export interface ReadOnlyFlowrAnalyzerIncrementalAnalysisContext { + /** + * The name of this context. + */ + readonly name: string; + + getParse(): ParseStepOutput | undefined; + getReparseActions(): readonly ReparseAction[] | undefined; +} + +/** + * Information to carry over for future incremental builds + */ +export class FlowrAnalyzerIncrementalAnalysisContext implements ReadOnlyFlowrAnalyzerIncrementalAnalysisContext { + public readonly name = 'flowr-analyzer-incremental-analysis-context'; + + private parseStepOutput: ParseStepOutput | undefined; + private reparseActions: readonly ReparseAction[] | undefined; + + public reset(): void { + this.parseStepOutput = undefined; + this.reparseActions = undefined; + } + + public storeParse(parse: ParseStepOutput | undefined, reparseAction: readonly ReparseAction[] | undefined): void { + this.parseStepOutput = parse; + this.reparseActions = reparseAction; + } + + public getParse(): ParseStepOutput | undefined { + return this.parseStepOutput; + } + + public getReparseActions(): readonly ReparseAction[] | undefined { + return this.reparseActions; + } + +} diff --git a/src/project/context/flowr-file.ts b/src/project/context/flowr-file.ts index bd0aacb5fdb..7f6a0569ac9 100644 --- a/src/project/context/flowr-file.ts +++ b/src/project/context/flowr-file.ts @@ -1,6 +1,8 @@ import type { PathLike } from 'fs'; import fs from 'fs'; import type { RParseRequest } from '../../r-bridge/retriever'; +import type { InvalidationEventHandler } from '../cache/flowr-cache'; +import { InvalidationEventType } from '../cache/flowr-cache'; /** * Just a readable alias for file paths, mostly for documentation purposes. @@ -41,6 +43,8 @@ export enum FileRole { export type StringableContent = { toString(): string }; + + /** * This is the basic interface for all files known to the FlowrAnalyzer. * You can implement this interface to provide custom file loading mechanisms. @@ -79,6 +83,21 @@ export interface FlowrFileProvider): void; + + /** + * Remove a previously added callback + */ + removeOnInvalidate(callback: InvalidationEventHandler): void; + + /** + * Reload the file content because something has changed. + */ + invalidate(): void; } /** @@ -90,6 +109,7 @@ export abstract class FlowrFile[] = []; public static readonly INLINE_PATH = '@inline'; public constructor(filePath: PathLike, roles?: readonly FileRole[]) { @@ -145,6 +165,22 @@ export abstract class FlowrFile): void { + this.onInvalidate.push(callback); + } + + public removeOnInvalidate(callback: InvalidationEventHandler): void { + this.onInvalidate = this.onInvalidate.filter(cb => cb !== callback); + } + + public invalidate(): void { + const oldContent = this.contentCache; + this.contentCache = undefined; + for(const invalidator of this.onInvalidate) { + invalidator({ type: InvalidationEventType.FileInvalidate, oldContent, file: this }); + } + } } /** @@ -162,7 +198,7 @@ export class FlowrTextFile extends FlowrFile { * These will be handled by the {@link FlowrAnalyzerDescriptionFilePlugin} (e.g., by using the {@link FlowrDescriptionFile#from} method decorator). */ export class FlowrInlineTextFile extends FlowrFile { - private readonly contentStr: string; + private contentStr: string; constructor(path: PathLike, content: string) { super(path); @@ -172,4 +208,13 @@ export class FlowrInlineTextFile extends FlowrFile { protected loadContent(): string { return this.contentStr; } + + /** + * Update the content of this inline file and invalidate the cache to trigger updates in the analysis. + * @see {@link FlowrFile#invalidate} + */ + public updateInlineContent(newContent: string): void { + this.contentStr = newContent; + this.invalidate(); + } } diff --git a/src/project/flowr-analyzer.ts b/src/project/flowr-analyzer.ts index a5536824274..672454334b6 100644 --- a/src/project/flowr-analyzer.ts +++ b/src/project/flowr-analyzer.ts @@ -21,6 +21,7 @@ import type { CallGraph } from '../dataflow/graph/call-graph'; import type { Tree } from 'web-tree-sitter'; import { normalizeTreeSitterTreeToAst } from '../r-bridge/lang-4.x/tree-sitter/tree-sitter-normalize'; import { TreeSitterExecutor } from '../r-bridge/lang-4.x/tree-sitter/tree-sitter-executor'; +import type { InvalidationEvent } from './cache/flowr-cache'; /** * Extends the {@link ReadonlyFlowrAnalysisProvider} with methods that allow modifying the analyzer state. @@ -49,6 +50,11 @@ export interface FlowrAnalysisProvider * Reset the analyzer state, including the context and the cache. */ reset(): void; + + /** + * Receive cache invalidation events from the cache and propagate them to the context and other relevant components. + */ + receive(event: InvalidationEvent): void } /** @@ -203,13 +209,18 @@ export class FlowrAnalyzer implements this.cache.reset(); } + public receive(event: InvalidationEvent): void { + // TODO: ctx + this.cache.receive(event); + } + public parseStandalone(data: `${typeof fileProtocol}${string}` | string | RParseRequest): Tree { const request = isParseRequest(data) ? data : requestFromInput(data); if(this.parser.name === 'tree-sitter') { - return this.parser.parse(request); + return this.parser.parse(request, undefined); } else { const ts = new TreeSitterExecutor(); - return ts.parse(request); + return ts.parse(request, undefined); } } diff --git a/src/project/incremental/incremental-parse/incremental-parse.ts b/src/project/incremental/incremental-parse/incremental-parse.ts new file mode 100644 index 00000000000..3daa1874186 --- /dev/null +++ b/src/project/incremental/incremental-parse/incremental-parse.ts @@ -0,0 +1,60 @@ +import type { FlowrAnalyzerContext } from '../../context/flowr-analyzer-context'; +import type { InvalidationEvent } from '../../cache/flowr-cache'; +import { InvalidationEventType } from '../../cache/flowr-cache'; +import type { FlowrFileProvider } from '../../context/flowr-file'; + +/** + * Is this file even relevant to us? + */ +export function coarseCheckWhetherToInvalidate(ctx: FlowrAnalyzerContext, event: InvalidationEvent): boolean { + if(event?.type === InvalidationEventType.Full) { + return true; + } + // const path = event.file.path(); + // if the file has not been considered by the analysis we do not have to continue + // TODO: make sure this also works for descriptions/other meta files maybe we have to check for the role + /* + if(event.file.roles?.includes(FileRole.Source) && !ctx.files.consideredFilesList().includes(path)) { + return false; + } + */ + const newContent = event.file.content(); + if(event.oldContent === newContent) { + // TODO: maybe we want to allow a 'force' flag? + // nothing changed + console.debug('File content did not change, skipping invalidation'); + return false; + } + + return true; +} + +interface LineRange { + line: number; +} +export interface ReparseAction { + file: FlowrFileProvider, + range: 'full' | LineRange[]; +} + +/** + * + */ +export function shouldWeReparse(ctx: FlowrAnalyzerContext, event: InvalidationEvent): 'full' | ReparseAction[] { + if(event?.type === InvalidationEventType.Full) { + return 'full'; + } + + const changedLines: LineRange[] = []; + const oldContent = (event.oldContent?.toString() ?? '').split('\n'); + const newContent = (event.file.content().toString()).split('\n'); + for(let line = 0; line < Math.max(oldContent.length, newContent.length); line++) { + if(oldContent[line] !== newContent[line]) { + changedLines.push({ line }); + } + } + return [{ + file: event.file, + range: changedLines + }]; +} \ No newline at end of file diff --git a/src/project/plugins/file-plugins/files/flowr-jupyter-file.ts b/src/project/plugins/file-plugins/files/flowr-jupyter-file.ts index 57f04dbf9b8..fb31b6ce38a 100644 --- a/src/project/plugins/file-plugins/files/flowr-jupyter-file.ts +++ b/src/project/plugins/file-plugins/files/flowr-jupyter-file.ts @@ -7,7 +7,7 @@ import { FileRole, FlowrFile } from '../../../context/flowr-file'; * This decorates a text file and parses its contents as a Jupyter file. * Finnaly, it provides access to the single cells, and all cells fused together as one R file. */ -export class FlowrJupyterFile extends FlowrFile { +export class FlowrJupyterFile extends FlowrFile { private readonly wrapped: FlowrFileProvider; /** diff --git a/src/project/plugins/file-plugins/files/flowr-rmarkdown-file.ts b/src/project/plugins/file-plugins/files/flowr-rmarkdown-file.ts index f201afcef6f..531e5566aae 100644 --- a/src/project/plugins/file-plugins/files/flowr-rmarkdown-file.ts +++ b/src/project/plugins/file-plugins/files/flowr-rmarkdown-file.ts @@ -10,7 +10,7 @@ import { log } from '../../../../util/log'; * This decorates a text file and parses its contents as a R Markdown file. * Finnaly, it provides access to the single cells, and all cells fused together as one R file. */ -export class FlowrRMarkdownFile extends FlowrFile { +export class FlowrRMarkdownFile extends FlowrFile { private data?: RmdInfo; private readonly wrapped: FlowrFileProvider; diff --git a/src/project/plugins/file-plugins/files/flowr-sweave-file.ts b/src/project/plugins/file-plugins/files/flowr-sweave-file.ts index 9f03459a83e..4af0d895a47 100644 --- a/src/project/plugins/file-plugins/files/flowr-sweave-file.ts +++ b/src/project/plugins/file-plugins/files/flowr-sweave-file.ts @@ -7,7 +7,7 @@ import { guard } from '../../../../util/assert'; * Finally, it provides access to the single cells, and all cells fused together as one R file. * So far, this does *not* support `\Sexpr` calls. */ -export class FlowrSweaveFile extends FlowrFile { +export class FlowrSweaveFile extends FlowrFile { private readonly wrapped: FlowrFileProvider; private data?: SweaveInfo; diff --git a/src/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.ts b/src/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.ts index 946150a52c3..14963d64b46 100644 --- a/src/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.ts +++ b/src/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.ts @@ -6,6 +6,9 @@ import type { TreeSitterEngineConfig } from '../../../config'; import { log } from '../../../util/log'; import fs from 'fs'; import type { ReadonlyFlowrAnalysisProvider } from '../../../project/flowr-analyzer'; +import type { + FlowrAnalyzerIncrementalAnalysisContext +} from '../../../project/context/flowr-analyzer-incremental-analysis-context'; export const DEFAULT_TREE_SITTER_R_WASM_PATH = './node_modules/@eagleoutice/tree-sitter-r/tree-sitter-r.wasm'; export const DEFAULT_TREE_SITTER_WASM_PATH = './node_modules/web-tree-sitter/tree-sitter.wasm'; @@ -16,10 +19,10 @@ const wasmLog = log.getSubLogger({ name: 'tree-sitter-wasm' }); * Synchronous and (way) faster alternative to the {@link RShell} using tree-sitter. */ export class TreeSitterExecutor implements SyncParser { - public readonly name = 'tree-sitter'; private readonly parser: Parser; private static language: Parser.Language; + public incremental = true; /** * Initializes the underlying tree-sitter parser. This only needs to be called once globally. @@ -69,13 +72,32 @@ export class TreeSitterExecutor implements SyncParser { return this.parser.getLanguage().version; } - public parse(request: RParseRequest): Parser.Tree { + public parse(request: RParseRequest & { filePath?: string }, inc: FlowrAnalyzerIncrementalAnalysisContext | undefined): Parser.Tree { let sourceCode: string; if(request.request === 'file') { sourceCode = fs.readFileSync(request.content, 'utf8'); } else { sourceCode = request.content; } + const parseActions = inc?.getReparseActions(); + console.log(request); + + if(request.filePath !== undefined && parseActions && parseActions.length > 0) { + const previosParse = inc?.getParse(); + const previousFile = previosParse?.files.find(f => f.filePath === request.filePath); + if(previousFile && typeof previousFile.parsed !== 'string') { + const previous = previousFile.parsed; + previous.edit({ + startIndex: 0, + oldEndIndex: 3, + newEndIndex: 5, + startPosition: { row: 0, column: 0 }, + oldEndPosition: { row: 0, column: 3 }, + newEndPosition: { row: 0, column: 5 }, + }); + return this.parser.parse(sourceCode, previous); + } + } return this.parser.parse(sourceCode); } diff --git a/src/r-bridge/parser.ts b/src/r-bridge/parser.ts index 1750e437800..28e2cb7a808 100644 --- a/src/r-bridge/parser.ts +++ b/src/r-bridge/parser.ts @@ -5,11 +5,21 @@ import type { TreeSitterExecutor } from './lang-4.x/tree-sitter/tree-sitter-exec import type { Query, QueryCapture, SyntaxNode } from 'web-tree-sitter'; import type { FlowrAnalysisProvider } from '../project/flowr-analyzer'; import type { FlowrAnalyzerContext } from '../project/context/flowr-analyzer-context'; +import type { FlowrAnalyzerIncrementalAnalysisContext } from '../project/context/flowr-analyzer-incremental-analysis-context'; interface ParserContent { - readonly name: string; + readonly name: string; + /** + * Whether the parser has incremental parsing capabilities + */ + readonly incremental: boolean; information(analyzer: FlowrAnalysisProvider): BaseParserInformation; - parse(request: RParseRequestFromText): T; + + /** + * Parses the given request and uses the provided incremental context (only if the parser + * itself supports incrementality {@link ParserContent#incremental}). + */ + parse(request: RParseRequestFromText & { filePath?: string }, inc: FlowrAnalyzerIncrementalAnalysisContext | undefined): T; close(): void; } @@ -93,7 +103,8 @@ function countChildren(node: SyntaxNode): number { */ export async function parseRequests(_results: unknown, input: Partial>): Promise> { - const loadingOrder = (input.context as FlowrAnalyzerContext).files.loadingOrder.getLoadingOrder(); + const ctx = input.context as FlowrAnalyzerContext; + const loadingOrder = ctx.files.loadingOrder.getLoadingOrder(); /* in the future, we want to expose all cases */ const translatedRequests = loadingOrder.map(r => (input.context as FlowrAnalyzerContext).files.resolveRequest(r)); @@ -101,7 +112,7 @@ Promise> { /* sadly we cannot Promise.all with the Rshell as it has to process commands in order and is not thread safe */ const files: ParseStepOutputSingleFile[] = []; for(const req of translatedRequests) { - const parsed = await (input.parser).parse(req.r); + const parsed = await (input.parser).parse(req.r, ctx.inc); files.push({ parsed, filePath: req.path, @@ -115,7 +126,9 @@ Promise> { const p = input.parser as SyncParser; return { files: translatedRequests.map(r => { - const parsed = p.parse(r.r); + const withPath: RParseRequestFromText & { filePath?: string } = r.r; + withPath.filePath = r.path; + const parsed = p.parse(withPath, ctx.inc); return { parsed, filePath: r.path, diff --git a/src/r-bridge/shell-executor.ts b/src/r-bridge/shell-executor.ts index 625c352f2d9..d19824b45b1 100644 --- a/src/r-bridge/shell-executor.ts +++ b/src/r-bridge/shell-executor.ts @@ -23,6 +23,7 @@ const executorLog = log.getSubLogger({ name: 'RShellExecutor' }); */ export class RShellExecutor implements SyncParser { public readonly name = 'r-shell'; + public readonly incremental = false; public readonly options: Readonly; private readonly prerequisites: string[]; diff --git a/src/r-bridge/shell.ts b/src/r-bridge/shell.ts index 46fe5352d45..e2864253d15 100644 --- a/src/r-bridge/shell.ts +++ b/src/r-bridge/shell.ts @@ -141,10 +141,10 @@ export function getDefaultRShellOptions(config?: RShellEngineConfig): RShellOpti * (leaving this as a legacy mode :D) */ export class RShell implements AsyncParser { - public readonly name = 'r-shell'; public readonly async = true; public readonly options: Readonly; + public readonly incremental = false; private session: RShellSession; private readonly log: Logger; private versionCache: SemVer | null = null; diff --git a/test/functionality/incremental/incremental-parsing.test.ts b/test/functionality/incremental/incremental-parsing.test.ts new file mode 100644 index 00000000000..ba07f5609cc --- /dev/null +++ b/test/functionality/incremental/incremental-parsing.test.ts @@ -0,0 +1,21 @@ +import { describe, test } from 'vitest'; +import { FlowrAnalyzerBuilder } from '../../../src/project/flowr-analyzer-builder'; +import { FlowrInlineTextFile } from '../../../src/project/context/flowr-file'; +import { RProject } from '../../../src/r-bridge/lang-4.x/ast/model/nodes/r-project'; + +describe('Incremental Parsing', () => { + test('should ', async() => { + const analyzer = await new FlowrAnalyzerBuilder() + .setEngine('tree-sitter') + .build(); + const f = new FlowrInlineTextFile('a.R', 'x <- 42\nprint(x)'); + analyzer.addFile(f); + analyzer.addRequest({ request: 'file', content: 'a.R' }); + + console.log(RProject.collectAllIds((await analyzer.normalize()).ast)); + + f.updateInlineContent('x <- 42\ny <- 32\nprint(x)'); + + console.log(RProject.collectAllIds((await analyzer.normalize()).ast)); + }); +}); \ No newline at end of file From b877c4c13639b80e7b0d0b08061ca461ce7368bd Mon Sep 17 00:00:00 2001 From: Jonathan Riesland Date: Tue, 24 Mar 2026 13:35:07 +0100 Subject: [PATCH 04/20] feat-fix: implement real incremental parsing with TreeSitter --- src/project/cache/flowr-analyzer-cache.ts | 16 ++-- ...r-analyzer-incremental-analysis-context.ts | 28 +++---- src/project/flowr-analyzer.ts | 5 +- .../incremental-parse/incremental-parse.ts | 83 ++++++++++++++----- .../tree-sitter/tree-sitter-executor.ts | 22 ++--- 5 files changed, 93 insertions(+), 61 deletions(-) diff --git a/src/project/cache/flowr-analyzer-cache.ts b/src/project/cache/flowr-analyzer-cache.ts index 3964265bae4..b19413c5531 100644 --- a/src/project/cache/flowr-analyzer-cache.ts +++ b/src/project/cache/flowr-analyzer-cache.ts @@ -22,7 +22,7 @@ import type { ReparseAction } from '../incremental/incremental-parse/incremental-parse'; import { coarseCheckWhetherToInvalidate, - shouldWeReparse + computeReparseAction } from '../incremental/incremental-parse/incremental-parse'; interface FlowrAnalyzerCacheOptions { @@ -55,12 +55,12 @@ export class FlowrAnalyzerCache extends FlowrCache extends FlowrCache | undefined; + nextReparseAction: ReparseAction | undefined; +} + export interface ReadOnlyFlowrAnalyzerIncrementalAnalysisContext { /** * The name of this context. */ readonly name: string; - getParse(): ParseStepOutput | undefined; - getReparseActions(): readonly ReparseAction[] | undefined; + getParseInfo(): ParseInfo | undefined; } /** @@ -18,25 +22,17 @@ export interface ReadOnlyFlowrAnalyzerIncrementalAnalysisContext { export class FlowrAnalyzerIncrementalAnalysisContext implements ReadOnlyFlowrAnalyzerIncrementalAnalysisContext { public readonly name = 'flowr-analyzer-incremental-analysis-context'; - private parseStepOutput: ParseStepOutput | undefined; - private reparseActions: readonly ReparseAction[] | undefined; + private parseInfo?: ParseInfo; public reset(): void { - this.parseStepOutput = undefined; - this.reparseActions = undefined; + this.parseInfo = undefined; } - public storeParse(parse: ParseStepOutput | undefined, reparseAction: readonly ReparseAction[] | undefined): void { - this.parseStepOutput = parse; - this.reparseActions = reparseAction; + public storeParseInfo(parseInfo?: ParseInfo): void { + this.parseInfo = parseInfo; } - public getParse(): ParseStepOutput | undefined { - return this.parseStepOutput; + public getParseInfo(): ParseInfo | undefined { + return this.parseInfo; } - - public getReparseActions(): readonly ReparseAction[] | undefined { - return this.reparseActions; - } - } diff --git a/src/project/flowr-analyzer.ts b/src/project/flowr-analyzer.ts index 672454334b6..fe36ecd75c0 100644 --- a/src/project/flowr-analyzer.ts +++ b/src/project/flowr-analyzer.ts @@ -215,12 +215,13 @@ export class FlowrAnalyzer implements } public parseStandalone(data: `${typeof fileProtocol}${string}` | string | RParseRequest): Tree { + console.log('parseStandalone'); const request = isParseRequest(data) ? data : requestFromInput(data); if(this.parser.name === 'tree-sitter') { - return this.parser.parse(request, undefined); + return this.parser.parse(request, this.ctx.inc); } else { const ts = new TreeSitterExecutor(); - return ts.parse(request, undefined); + return ts.parse(request, this.ctx.inc); } } diff --git a/src/project/incremental/incremental-parse/incremental-parse.ts b/src/project/incremental/incremental-parse/incremental-parse.ts index 3daa1874186..bb1d98459db 100644 --- a/src/project/incremental/incremental-parse/incremental-parse.ts +++ b/src/project/incremental/incremental-parse/incremental-parse.ts @@ -1,7 +1,8 @@ import type { FlowrAnalyzerContext } from '../../context/flowr-analyzer-context'; -import type { InvalidationEvent } from '../../cache/flowr-cache'; +import type { FileContentInvalidateEvent, InvalidationEvent } from '../../cache/flowr-cache'; import { InvalidationEventType } from '../../cache/flowr-cache'; import type { FlowrFileProvider } from '../../context/flowr-file'; +import type Parser from 'web-tree-sitter'; /** * Is this file even relevant to us? @@ -29,32 +30,74 @@ export function coarseCheckWhetherToInvalidate(ctx: FlowrAnalyzerContext, event: return true; } -interface LineRange { - line: number; -} export interface ReparseAction { - file: FlowrFileProvider, - range: 'full' | LineRange[]; + file: FlowrFileProvider, + edit: Parser.Edit; } /** - * + * Compute the reparse action for the given file. + * @param event - The invalidation event. + * @returns The reparse action. */ -export function shouldWeReparse(ctx: FlowrAnalyzerContext, event: InvalidationEvent): 'full' | ReparseAction[] { - if(event?.type === InvalidationEventType.Full) { - return 'full'; +export function computeReparseAction(event: FileContentInvalidateEvent): ReparseAction { + const oldContent = event.oldContent?.toString() ?? ''; + const newContent = event.file.content().toString(); + + const oldLen = oldContent.length; + const newLen = newContent.length; + + // 1) Longest common prefix + let startIndex = 0; + while( + startIndex < oldLen && + startIndex < newLen && + oldContent[startIndex] === newContent[startIndex] + ) { + startIndex++; } - const changedLines: LineRange[] = []; - const oldContent = (event.oldContent?.toString() ?? '').split('\n'); - const newContent = (event.file.content().toString()).split('\n'); - for(let line = 0; line < Math.max(oldContent.length, newContent.length); line++) { - if(oldContent[line] !== newContent[line]) { - changedLines.push({ line }); + // 2) Longest common suffix, without overlapping the prefix + let oldSuffixIndex = oldLen; + let newSuffixIndex = newLen; + while( + oldSuffixIndex > startIndex && + newSuffixIndex > startIndex && + oldContent[oldSuffixIndex - 1] === newContent[newSuffixIndex - 1] + ) { + oldSuffixIndex--; + newSuffixIndex--; + } + + const oldEndIndex = oldSuffixIndex; + const newEndIndex = newSuffixIndex; + + return { + file: event.file, + edit: { + startIndex, + oldEndIndex, + newEndIndex, + startPosition: indexToPoint(oldContent, startIndex), + oldEndPosition: indexToPoint(oldContent, oldEndIndex), + newEndPosition: indexToPoint(newContent, newEndIndex), + } + }; +} + + +function indexToPoint(text: string, index: number): Parser.Point { + let row = 0; + let column = 0; + + for(let i = 0; i < index; i++) { + if(text[i] === '\n') { + row++; + column = 0; + } else { + column++; } } - return [{ - file: event.file, - range: changedLines - }]; + + return { row, column }; } \ No newline at end of file diff --git a/src/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.ts b/src/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.ts index 14963d64b46..74b6e9aff06 100644 --- a/src/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.ts +++ b/src/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.ts @@ -79,23 +79,15 @@ export class TreeSitterExecutor implements SyncParser { } else { sourceCode = request.content; } - const parseActions = inc?.getReparseActions(); - console.log(request); - if(request.filePath !== undefined && parseActions && parseActions.length > 0) { - const previosParse = inc?.getParse(); - const previousFile = previosParse?.files.find(f => f.filePath === request.filePath); + const parseInfo = inc?.getParseInfo(); + const nextReparseAction = parseInfo?.nextReparseAction; + if(request.filePath !== undefined && nextReparseAction) { + const previousFile = parseInfo.lastParseStepOutput?.files.find(f => f.filePath === request.filePath); if(previousFile && typeof previousFile.parsed !== 'string') { - const previous = previousFile.parsed; - previous.edit({ - startIndex: 0, - oldEndIndex: 3, - newEndIndex: 5, - startPosition: { row: 0, column: 0 }, - oldEndPosition: { row: 0, column: 3 }, - newEndPosition: { row: 0, column: 5 }, - }); - return this.parser.parse(sourceCode, previous); + const previousTree = previousFile.parsed; + previousTree.edit(nextReparseAction.edit); + return this.parser.parse(sourceCode, previousTree); } } return this.parser.parse(sourceCode); From 1857dad8e938ab4b9567ebdaf47e74e829b74bdb Mon Sep 17 00:00:00 2001 From: Jonathan Riesland Date: Tue, 24 Mar 2026 13:36:11 +0100 Subject: [PATCH 05/20] test: add incremental parsing tests --- .../incremental/incremental-parsing.test.ts | 865 +++++++++++++++++- 1 file changed, 852 insertions(+), 13 deletions(-) diff --git a/test/functionality/incremental/incremental-parsing.test.ts b/test/functionality/incremental/incremental-parsing.test.ts index ba07f5609cc..19c5a7f8d2c 100644 --- a/test/functionality/incremental/incremental-parsing.test.ts +++ b/test/functionality/incremental/incremental-parsing.test.ts @@ -1,21 +1,860 @@ -import { describe, test } from 'vitest'; +import { assert, describe, it } from 'vitest'; import { FlowrAnalyzerBuilder } from '../../../src/project/flowr-analyzer-builder'; import { FlowrInlineTextFile } from '../../../src/project/context/flowr-file'; -import { RProject } from '../../../src/r-bridge/lang-4.x/ast/model/nodes/r-project'; - -describe('Incremental Parsing', () => { - test('should ', async() => { - const analyzer = await new FlowrAnalyzerBuilder() - .setEngine('tree-sitter') - .build(); - const f = new FlowrInlineTextFile('a.R', 'x <- 42\nprint(x)'); +import type { NormalizedAst } from '../../../src/r-bridge/lang-4.x/ast/model/processing/decorate'; +import { printNormalizedAstToMermaid } from '../../../src/core/print/normalize-printer'; + + +interface IncrementalParsingTestInput { + path: string; + originalContent: string; + updatedContent: string; +} + +interface SingleFileCase { + name: string; + input: IncrementalParsingTestInput; +} + +interface MultiFileCase { + name: string; + inputs: IncrementalParsingTestInput[]; +} + + +async function executeFullParse(inputs: readonly IncrementalParsingTestInput[]): Promise { + const analyzer = await new FlowrAnalyzerBuilder() + .setEngine('tree-sitter') + .build(); + for(const input of inputs) { + const f = new FlowrInlineTextFile(input.path, input.updatedContent); analyzer.addFile(f); - analyzer.addRequest({ request: 'file', content: 'a.R' }); + analyzer.addRequest({ request: 'file', content: input.path }); + } + return (await analyzer.normalize()); +} + + +async function executeIncrementalParse(inputs: readonly IncrementalParsingTestInput[]): Promise { + const analyzer = await new FlowrAnalyzerBuilder() + .setEngine('tree-sitter') + .build(); + const files = new Map(); + for(const input of inputs) { + const f = new FlowrInlineTextFile(input.path, input.originalContent); + analyzer.addFile(f); + analyzer.addRequest({ request: 'file', content: input.path }); + files.set(input.path, f); + } + await analyzer.normalize(); + + for(const input of inputs) { + const f = files.get(input.path); + f?.updateInlineContent(input.updatedContent); + } + + return (await analyzer.normalize()); +} + + +async function executeAndCompareResults(inputs: readonly IncrementalParsingTestInput[]): Promise { + const fullParse = await executeFullParse(inputs); + const incrementalParse = await executeIncrementalParse(inputs); + + const fullParseMermaid = printNormalizedAstToMermaid(fullParse); + const incrementalParseMermaid = printNormalizedAstToMermaid(incrementalParse); + + assert.equal(fullParseMermaid, incrementalParseMermaid, 'The incremental parse result does not match the full parse result'); +} + +const lines = (...xs: string[]): string => xs.join('\n'); + +const file = ( + path: string, + originalContent: string, + updatedContent: string +): IncrementalParsingTestInput => ({ + path, + originalContent, + updatedContent +}); + +const singleFileCase = ( + name: string, + originalContent: string, + updatedContent: string +): SingleFileCase => ({ + name, + input: { + path: 'a.R', + originalContent, + updatedContent + } +}); + +const singleFileNoOpCases: SingleFileCase[] = [ + singleFileCase('empty file', '', ''), + singleFileCase('file with top-level content', 'x <- 42', 'x <- 42'), + singleFileCase( + 'file with nested content', + lines( + 'f <- function(x) {', + '\ty <- x + 1', + '\tprint(y)', + '}' + ), + lines( + 'f <- function(x) {', + '\ty <- x + 1', + '\tprint(y)', + '}' + ) + ), + singleFileCase('syntactically invalid file', 'print(', 'print(') +]; + +const singleFileInsertCases: SingleFileCase[] = [ + singleFileCase('one full line into an empty file', '', 'x <- 42'), + singleFileCase( + 'one full line at the start of a file', + 'x <- 42', + lines('y <- 21', 'x <- 42') + ), + singleFileCase( + 'one full line in the middle of a file', + lines('x <- 42', 'print(x)'), + lines('x <- 42', 'x <- 2 * x', 'print(x)') + ), + singleFileCase( + 'one full line at the end of a file', + 'x <- 42', + lines('x <- 42', 'print(x)') + ), + singleFileCase( + 'multiple lines into an empty file', + '', + lines('x <- 42', 'y <- 21', 'z <- 10') + ), + singleFileCase( + 'multiple lines at different positions', + lines('x <- 42', 'print(x)'), + lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)') + ), + singleFileCase('a single character inside a number', 'x <- 42', 'x <- 420'), + singleFileCase('a single character inside an identifier', 'x <- 42', 'xy <- 42'), + singleFileCase('a token inside an expression', 'x <- 1 + 2', 'x <- 1 + 2 + 3'), + singleFileCase( + 'a token inside a nested argument list', + 'print(sum(1, 3))', + 'print(sum(1, 2, 3))' + ), + singleFileCase('a trailing newline at end of file', 'x <- 42', 'x <- 42\n') +]; + +const singleFileRemoveCases: SingleFileCase[] = [ + singleFileCase('one full line such that the file becomes empty', 'x <- 42', ''), + singleFileCase( + 'one full line at the start of a file', + lines('y <- 21', 'x <- 42'), + 'x <- 42' + ), + singleFileCase( + 'one full line in the middle of a file', + lines('x <- 42', 'x <- 2 * x', 'print(x)'), + lines('x <- 42', 'print(x)') + ), + singleFileCase( + 'one full line at the end of a file', + lines('x <- 42', 'print(x)'), + 'x <- 42' + ), + singleFileCase( + 'multiple lines such that the file becomes empty', + lines('x <- 42', 'y <- 21', 'z <- 10'), + '' + ), + singleFileCase( + 'multiple lines at different positions', + lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'), + lines('x <- 42', 'print(x)') + ), + singleFileCase('a single character from a number', 'x <- 420', 'x <- 42'), + singleFileCase('a single character from an identifier', 'xy <- 42', 'x <- 42'), + singleFileCase('a token from an expression', 'x <- 1 + 2 + 3', 'x <- 1 + 2'), + singleFileCase( + 'a token from a nested argument list', + 'print(sum(1, 2, 3))', + 'print(sum(1, 3))' + ), + singleFileCase('a trailing newline at end of file', 'x <- 42\n', 'x <- 42') +]; + +const singleFileReplaceCases: SingleFileCase[] = [ + singleFileCase( + 'one full line at the start of a file', + lines('y <- 21', 'x <- 42'), + lines('x <- 84', 'x <- 42') + ), + singleFileCase( + 'one full line in the middle of a file', + lines('x <- 42', 'x <- 2 * x', 'print(x)'), + lines('x <- 42', 'y <- 21', 'print(x)') + ), + singleFileCase( + 'one full line at the end of a file', + lines('x <- 42', 'print(x)'), + lines('x <- 42', 'x <- x * x') + ), + singleFileCase( + 'a partially replaced multi-line region', + lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'), + lines('y <- 21', 'x <- 21', 'y <- y * y', 'print(x)', 'print(y)') + ), + singleFileCase( + 'a fully replaced content', + lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'), + lines('z <- 10', 'z <- z + 32', 'print(z)') + ), + singleFileCase('a single character in a number', 'x <- 42', 'x <- 43'), + singleFileCase('an operator token', 'x <- 1 + 2', 'x <- 1 * 2'), + singleFileCase( + 'an identifier token', + lines('x <- 42', 'print(x)'), + lines('value <- 42', 'print(value)') + ), + singleFileCase( + 'part of a single line expression', + 'x <- (1 + 2) * 3', + 'x <- (1 + 20) * 3' + ), + singleFileCase('whitespace only on a single line', 'x <- 42', 'x <- 42'), + singleFileCase( + 'whitespace only across multiple lines', + lines('f <- function(x) {', '\ty <- x + 1', '\tprint(y)', '}'), + lines('f <- function(x) {', '\t', '\ty <- x + 1', '\tprint(y)', '}') + ), + singleFileCase('comment text', 'x <- 42 # old comment', 'x <- 42 # new comment'), + singleFileCase('a string literal', 'msg <- "abc"', 'msg <- "abcd"'), + singleFileCase('a UTF-8 string literal', 'msg <- "äöü"', 'msg <- "äöü€"'), + singleFileCase('a UTF-8 comment', 'x <- 42 # gruß', 'x <- 42 # grüße €') +]; + +const singleFileSyntaxTransitionCases: SingleFileCase[] = [ + singleFileCase( + 'valid to invalid by removing the right-hand side of an assignment', + 'x <- 42', + 'x <-' + ), + singleFileCase( + 'valid to invalid by removing a closing brace', + lines( + 'f <- function(x) {', + '\tprint(x)', + '}' + ), + lines( + 'f <- function(x) {', + '\tprint(x)' + ) + ), + singleFileCase( + 'valid to invalid by removing a closing parenthesis', + 'print(sum(1, 2))', + 'print(sum(1, 2)' + ), + singleFileCase( + 'invalid to valid by completing an assignment', + 'x <-', + 'x <- 42' + ), + singleFileCase( + 'invalid to valid by restoring a closing brace', + lines( + 'f <- function(x) {', + '\tprint(x)' + ), + lines( + 'f <- function(x) {', + '\tprint(x)', + '}' + ) + ), + singleFileCase( + 'invalid to valid by restoring a closing parenthesis', + 'print(sum(1, 2)', + 'print(sum(1, 2))' + ), + singleFileCase( + 'invalid to invalid across different incomplete forms', + 'print(', + 'function(,' + ) +]; + +const singleFileNestedStructureCases: SingleFileCase[] = [ + singleFileCase( + 'inside a function body', + lines( + 'f <- function(x) {', + '\ty <- x + 1', + '\tprint(y)', + '}' + ), + lines( + 'f <- function(x) {', + '\ty <- x * 2', + '\tprint(y)', + '}' + ) + ), + singleFileCase( + 'inside an if branch', + lines( + 'if (x > 0) {', + '\ty <- 1', + '}' + ), + lines( + 'if (x > 0) {', + '\ty <- 1', + '\tz <- 2', + '}' + ) + ), + singleFileCase( + 'inside a for loop body', + lines( + 'for (i in 1:3) {', + '\tprint(i)', + '}' + ), + lines( + 'for (i in 1:3) {', + '\ttotal <- i + 1', + '\tprint(total)', + '}' + ) + ), + singleFileCase( + 'inside a nested argument list', + 'print(sum(1, 2, 3))', + 'print(sum(1, 20, 3))' + ), + singleFileCase( + 'inside nested brackets and subexpressions', + 'x <- list(a = list(b = 1))', + 'x <- list(a = list(b = 2))' + ) +]; + +const repeatedUpdatePairwiseCases: SingleFileCase[] = [ + singleFileCase( + 'sequence step 1: empty file to initial assignment', + '', + 'x <- 1' + ), + singleFileCase( + 'sequence step 2: initial assignment to character-level update', + 'x <- 1', + 'x <- 10' + ), + singleFileCase( + 'sequence step 3: character-level update to added statement', + 'x <- 10', + lines('x <- 10', 'print(x)') + ), + singleFileCase( + 'sequence step 4: added statement to nested function', + lines('x <- 10', 'print(x)'), + lines( + 'f <- function() {', + '\tprint(x)', + '}', + 'f()' + ) + ), + singleFileCase( + 'sequence step 5: nested function to temporarily invalid syntax', + lines( + 'f <- function() {', + '\tprint(x)', + '}', + 'f()' + ), + lines( + 'f <- function() {', + '\tprint(x)', + 'f()' + ) + ), + singleFileCase( + 'sequence step 6: temporarily invalid syntax back to valid syntax', + lines( + 'f <- function() {', + '\tprint(x)', + 'f()' + ), + lines( + 'f <- function() {', + '\tprint(x)', + '}', + 'f()' + ) + ) +]; + +const multiFileCases: MultiFileCase[] = [ + { + name: 'editing only the first file while the second file stays unchanged', + inputs: [ + file('a.R', lines('x <- 42', 'print(x)'), lines('x <- 42', 'x <- x + 1', 'print(x)')), + file('b.R', lines('y <- 21', 'print(y)'), lines('y <- 21', 'print(y)')) + ] + }, + { + name: 'editing only the second file while the first file stays unchanged', + inputs: [ + file('a.R', lines('x <- 42', 'print(x)'), lines('x <- 42', 'print(x)')), + file('b.R', lines('y <- 21', 'print(y)'), lines('y <- 21', 'y <- y * 2', 'print(y)')) + ] + }, + { + name: 'editing both files independently in the same run', + inputs: [ + file('a.R', lines('x <- 1', 'print(x)'), lines('x <- 2', 'x <- x * 3', 'print(x)')), + file('b.R', lines('y <- 10', 'print(y)'), lines('z <- 10', 'print(z + 1)')) + ] + }, + { + name: 'adding a new file while another file stays unchanged', + inputs: [ + file('a.R', lines('x <- 42', 'print(x)'), lines('x <- 42', 'print(x)')), + file('b.R', '', lines('helper <- function(x) {', '\tx * 2', '}', 'print(helper(21))')) + ] + }, + { + name: 'removing one file while another file stays unchanged', + inputs: [ + file('a.R', lines('x <- 42', 'print(x)'), lines('x <- 42', 'print(x)')), + file('b.R', lines('tmp <- 1', 'print(tmp)'), '') + ] + }, + { + name: 'mixing file modification, file addition, and file removal in one run', + inputs: [ + file('a.R', lines('x <- 1', 'print(x)'), lines('x <- 1', 'x <- x + 1', 'print(x)')), + file('b.R', '', lines('y <- 21', 'print(y)')), + file('c.R', lines('obsolete <- TRUE', 'print(obsolete)'), '') + ] + }, + { + name: 'making one file invalid while another file remains unchanged and valid', + inputs: [ + file( + 'a.R', + lines('f <- function(x) {', '\tprint(x)', '}'), + lines('f <- function(x) {', '\tprint(x)') + ), + file('b.R', lines('y <- 21', 'print(y)'), lines('y <- 21', 'print(y)')) + ] + }, + { + name: 'editing UTF-8 content in one file while another file stays unchanged', + inputs: [ + file('a.R', lines('msg <- "äöü"', 'print(msg)'), lines('msg <- "äöü€"', 'print(msg)')), + file('b.R', lines('x <- 42', 'print(x)'), lines('x <- 42', 'print(x)')) + ] + }, + { + name: 'editing inside a nested construct in one file and at top level in another', + inputs: [ + file( + 'a.R', + lines('f <- function(x) {', '\ty <- x + 1', '\tprint(y)', '}'), + lines('f <- function(x) {', '\ty <- x * 2', '\tprint(y)', '}') + ), + file('b.R', lines('z <- 3', 'print(z)'), lines('z <- 3', 'z <- z + 1', 'print(z)')) + ] + }, + { + name: 'editing only one of two syntactically invalid files', + inputs: [ + file('a.R', 'print(', 'print(1)'), + file('b.R', 'x <-', 'x <-') + ] + } +]; + +const repeatedMultiFilePairwiseCases: MultiFileCase[] = [ + { + name: 'pairwise sequence step 1 across files', + inputs: [ + file('a.R', '', 'x <- 1'), + file('b.R', '', 'y <- 2') + ] + }, + { + name: 'pairwise sequence step 2 across files', + inputs: [ + file('a.R', 'x <- 1', lines('x <- 1', 'print(x)')), + file('b.R', 'y <- 2', 'y <- 20') + ] + }, + { + name: 'pairwise sequence step 3 across files with temporary invalidity', + inputs: [ + file('a.R', lines('x <- 1', 'print(x)'), 'x <-'), + file('b.R', 'y <- 20', lines('f <- function() {', '\tprint(y)', '}', 'f()')) + ] + }, + { + name: 'pairwise sequence step 4 across files returning to valid syntax', + inputs: [ + file('a.R', 'x <-', lines('x <- 1', 'print(x)')), + file('b.R', lines('f <- function() {', '\tprint(y)', '}', 'f()'), lines('f <- function() {', '\tprint(y + 1)', '}', 'f()')) + ] + } +]; + + +describe('Incremental Parsing produces same results as Full Parsing', () => { + describe('single-file', () => { + describe('no-op', () => { + it.each(singleFileNoOpCases)('$name', async({ input }) => { + await executeAndCompareResults([input]); + }); + }); + + describe('insert', () => { + it.each(singleFileInsertCases)('$name', async({ input }) => { + await executeAndCompareResults([input]); + }); + }); - console.log(RProject.collectAllIds((await analyzer.normalize()).ast)); + describe('remove', () => { + it.each(singleFileRemoveCases)('$name', async({ input }) => { + await executeAndCompareResults([input]); + }); + }); - f.updateInlineContent('x <- 42\ny <- 32\nprint(x)'); + describe('replace', () => { + it.each(singleFileReplaceCases)('$name', async({ input }) => { + await executeAndCompareResults([input]); + }); + }); + + describe('syntax transitions', () => { + it.each(singleFileSyntaxTransitionCases)('$name', async({ input }) => { + await executeAndCompareResults([input]); + }); + }); + + describe('nested structures', () => { + it.each(singleFileNestedStructureCases)('$name', async({ input }) => { + await executeAndCompareResults([input]); + }); + }); + + describe('pairwise successive states', () => { + it.each(repeatedUpdatePairwiseCases)('$name', async({ input }) => { + await executeAndCompareResults([input]); + }); + }); + }); + + describe('multi-file', () => { + it.each(multiFileCases)('$name', async({ inputs }) => { + await executeAndCompareResults(inputs); + }); + + describe('pairwise successive states across files', () => { + it.each(repeatedMultiFilePairwiseCases)('$name', async({ inputs }) => { + await executeAndCompareResults(inputs); + }); + }); + }); +}); + + + + + + + + +describe('Incremental Parsing produces same results as Full Parsing for one file for', () => { + describe('no change', () => { + describe('to an empty file', async() => { + const inputs: IncrementalParsingTestInput[] = [{ + originalContent: '', + updatedContent: '', + path: 'a.R' + }]; + await executeAndCompareResults(inputs); + }); + describe('to a file with content', async() => { + const inputs: IncrementalParsingTestInput[] = [{ + originalContent: 'x <- 42', + updatedContent: 'x <- 42', + path: 'a.R' + }]; + await executeAndCompareResults(inputs); + }); + }); + describe('one line', () => { + describe('inserted', () => { + describe('into an empty file', async() => { + const inputs: IncrementalParsingTestInput[] = [{ + originalContent: '', + updatedContent: 'x <- 42', + path: 'a.R' + }]; + await executeAndCompareResults(inputs); + }); + describe('at the start', async() => { + const inputs: IncrementalParsingTestInput[] = [{ + originalContent: 'x <- 42', + updatedContent: 'y <- 21\nx <- 42', + path: 'a.R' + }]; + await executeAndCompareResults(inputs); + }); + describe('at the end', async() => { + const inputs: IncrementalParsingTestInput[] = [{ + originalContent: 'x <- 42', + updatedContent: 'x <- 42\nprint(x)', + path: 'a.R' + }]; + await executeAndCompareResults(inputs); + }); + describe('in the middle', async() => { + const inputs: IncrementalParsingTestInput[] = [{ + originalContent: 'x <- 42\nprint(x)', + updatedContent: 'x <- 42\nx <- 2 * x\nprint(x)', + path: 'a.R' + }]; + await executeAndCompareResults(inputs); + }); + }); + describe('removed', () => { + describe('such that the file becomes empty', async() => { + const inputs: IncrementalParsingTestInput[] = [{ + originalContent: 'x <- 42', + updatedContent: '', + path: 'a.R' + }]; + await executeAndCompareResults(inputs); + }); + describe('at the start', async() => { + const inputs: IncrementalParsingTestInput[] = [{ + originalContent: 'y <- 21\nx <- 42', + updatedContent: 'x <- 42', + path: 'a.R' + }]; + await executeAndCompareResults(inputs); + }); + describe('at the end', async() => { + const inputs: IncrementalParsingTestInput[] = [{ + originalContent: 'x <- 42\nprint(x)', + updatedContent: 'x <- 42', + path: 'a.R' + }]; + await executeAndCompareResults(inputs); + }); + describe('in the middle', async() => { + const inputs: IncrementalParsingTestInput[] = [{ + originalContent: 'x <- 42\nx <- 2 * x\nprint(x)', + updatedContent: 'x <- 42\nprint(x)', + path: 'a.R' + }]; + await executeAndCompareResults(inputs); + }); + }); + describe('replaced', () => { + describe('at the start', async() => { + const inputs: IncrementalParsingTestInput[] = [{ + originalContent: 'y <- 21\nx <- 42', + updatedContent: 'x <- 21\nx <- 42', + path: 'a.R' + }]; + await executeAndCompareResults(inputs); + }); + describe('at the end', async() => { + const inputs: IncrementalParsingTestInput[] = [{ + originalContent: 'x <- 42\nprint(x)', + updatedContent: 'x <- 42\nx <- x * x', + path: 'a.R' + }]; + await executeAndCompareResults(inputs); + }); + describe('in the middle', async() => { + const inputs: IncrementalParsingTestInput[] = [{ + originalContent: 'x <- 42\nx <- 2 * x\nprint(x)', + updatedContent: 'x <- 42\nx <- 21\nprint(x)', + path: 'a.R' + }]; + await executeAndCompareResults(inputs); + }); + }); + }); + describe('multiple lines', () => { + describe('inserted', () => { + describe('into an empty file', async() => { + const inputs: IncrementalParsingTestInput[] = [{ + originalContent: '', + updatedContent: 'x <- 42\ny <- 21\nz <- 10', + path: 'a.R' + }]; + await executeAndCompareResults(inputs); + }); + describe('at different positions', async() => { + const inputs: IncrementalParsingTestInput[] = [{ + originalContent: 'x <- 42\nprint(x)', + updatedContent: 'y <- 21\nx <- 42\ny <- y * 2\nprint(x)\nprint(y)', + path: 'a.R' + }]; + await executeAndCompareResults(inputs); + }); + }); + describe('removed', () => { + describe('such that the file becomes empty', async() => { + const inputs: IncrementalParsingTestInput[] = [{ + originalContent: 'x <- 42\ny <- 21\nz <- 10', + updatedContent: '', + path: 'a.R' + }]; + await executeAndCompareResults(inputs); + }); + describe('at different positions', async() => { + const inputs: IncrementalParsingTestInput[] = [{ + originalContent: 'y <- 21\nx <- 42\ny <- y * 2\nprint(x)\nprint(y)', + updatedContent: 'x <- 42\nprint(x)', + path: 'a.R' + }]; + await executeAndCompareResults(inputs); + }); + }); + describe('replaced', () => { + describe('partially', async() => { + const inputs: IncrementalParsingTestInput[] = [{ + originalContent: 'y <- 21\nx <- 42\ny <- y * 2\nprint(x)\nprint(y)', + updatedContent: 'y <- 21\nx <- 21\ny <- y * y\nprint(x)\nprint(y + 2)', + path: 'a.R' + }]; + await executeAndCompareResults(inputs); + }); + describe('fully', async() => { + const inputs: IncrementalParsingTestInput[] = [{ + originalContent: 'y <- 21\nx <- 42\ny <- y * 2\nprint(x)\nprint(y)', + updatedContent: 'z <- 10\nz <- z + 32\nprint(z)', + path: 'a.R' + }]; + await executeAndCompareResults(inputs); + }); + }); + }); +}); + + +describe('Incremental Parsing produces same results as Full Parsing across multiple files for', () => { + it('editing only the first file while the second file stays unchanged', async() => { + const inputs: IncrementalParsingTestInput[] = [ + { + originalContent: 'x <- 42\nprint(x)', + updatedContent: 'x <- 42\nx <- x + 1\nprint(x)', + path: 'a.R' + }, + { + originalContent: 'y <- 21\nprint(y)', + updatedContent: 'y <- 21\nprint(y)', + path: 'b.R' + } + ]; + await executeAndCompareResults(inputs); + }); + + it('editing only the second file while the first file stays unchanged', async() => { + const inputs: IncrementalParsingTestInput[] = [ + { + originalContent: 'x <- 42\nprint(x)', + updatedContent: 'x <- 42\nprint(x)', + path: 'a.R' + }, + { + originalContent: 'y <- 21\nprint(y)', + updatedContent: 'y <- 21\ny <- y * 2\nprint(y)', + path: 'b.R' + } + ]; + await executeAndCompareResults(inputs); + }); + + it('editing both files independently in the same run', async() => { + const inputs: IncrementalParsingTestInput[] = [ + { + originalContent: 'x <- 1\nprint(x)', + updatedContent: 'x <- 2\nx <- x * 3\nprint(x)', + path: 'a.R' + }, + { + originalContent: 'y <- 10\nprint(y)', + updatedContent: 'z <- 10\nprint(z + 1)', + path: 'b.R' + } + ]; + await executeAndCompareResults(inputs); + }); + + it('adding a new file while another file stays unchanged', async() => { + const inputs: IncrementalParsingTestInput[] = [ + { + originalContent: 'x <- 42\nprint(x)', + updatedContent: 'x <- 42\nprint(x)', + path: 'a.R' + }, + { + originalContent: '', + updatedContent: 'foo <- function(x) x * 2\nprint(foo(21))', + path: 'b.R' + } + ]; + await executeAndCompareResults(inputs); + }); + + it('removing one file while another file stays unchanged', async() => { + const inputs: IncrementalParsingTestInput[] = [ + { + originalContent: 'x <- 42\nprint(x)', + updatedContent: 'x <- 42\nprint(x)', + path: 'a.R' + }, + { + originalContent: 'tmp <- 1\nprint(tmp)', + updatedContent: '', + path: 'b.R' + } + ]; + await executeAndCompareResults(inputs); + }); - console.log(RProject.collectAllIds((await analyzer.normalize()).ast)); + it('mixing file modification, file addition, and file removal in one run', async() => { + const inputs: IncrementalParsingTestInput[] = [ + { + originalContent: 'x <- 1\nprint(x)', + updatedContent: 'x <- 1\nx <- x + 1\nprint(x)', + path: 'a.R' + }, + { + originalContent: '', + updatedContent: 'y <- 21\nprint(y)', + path: 'b.R' + }, + { + originalContent: 'obsolete <- TRUE\nprint(obsolete)', + updatedContent: '', + path: 'c.R' + } + ]; + await executeAndCompareResults(inputs); }); }); \ No newline at end of file From 984c3b15ed5ad639852355cd867efbac63e2362b Mon Sep 17 00:00:00 2001 From: Jonathan Riesland Date: Wed, 1 Apr 2026 11:58:32 +0200 Subject: [PATCH 06/20] refactor: edit computation into separate file --- .../incremental-parse/edit-computation.ts | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 src/project/incremental/incremental-parse/edit-computation.ts diff --git a/src/project/incremental/incremental-parse/edit-computation.ts b/src/project/incremental/incremental-parse/edit-computation.ts new file mode 100644 index 00000000000..099ad9bc82b --- /dev/null +++ b/src/project/incremental/incremental-parse/edit-computation.ts @@ -0,0 +1,63 @@ +import type Parser from 'web-tree-sitter'; + + +/** + * Computes a single minimal change region ({@link Parser.Edit}) that contains all modifications. + * @param oldContent - The original content. + * @param newContent - The changed content. + */ +export function computeEditRegion(oldContent: string, newContent: string): Parser.Edit { + const oldLen = oldContent.length; + const newLen = newContent.length; + + // 1) Longest common prefix + let startIndex = 0; + while( + startIndex < oldLen && + startIndex < newLen && + oldContent[startIndex] === newContent[startIndex] + ) { + startIndex++; + } + + // 2) Longest common suffix, without overlapping the prefix + let oldSuffixIndex = oldLen; + let newSuffixIndex = newLen; + while( + oldSuffixIndex > startIndex && + newSuffixIndex > startIndex && + oldContent[oldSuffixIndex - 1] === newContent[newSuffixIndex - 1] + ) { + oldSuffixIndex--; + newSuffixIndex--; + } + + const oldEndIndex = oldSuffixIndex; + const newEndIndex = newSuffixIndex; + + return { + startIndex, + oldEndIndex, + newEndIndex, + startPosition: indexToPoint(oldContent, startIndex), + oldEndPosition: indexToPoint(oldContent, oldEndIndex), + newEndPosition: indexToPoint(newContent, newEndIndex), + }; +} + + +function indexToPoint(text: string, index: number): Parser.Point { + let row = 0; + let column = 0; + + for(let i = 0; i < index; i++) { + if(text[i] === '\n') { + row++; + column = 0; + } else { + column++; + } + } + + return { row, column }; +} \ No newline at end of file From e2780450494c1449ad0a8d7c64374266daa7d349 Mon Sep 17 00:00:00 2001 From: Jonathan Riesland Date: Wed, 1 Apr 2026 11:59:13 +0200 Subject: [PATCH 07/20] test: edit computation --- .../incremental/edit-computation.test.ts | 91 +++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 test/functionality/incremental/edit-computation.test.ts diff --git a/test/functionality/incremental/edit-computation.test.ts b/test/functionality/incremental/edit-computation.test.ts new file mode 100644 index 00000000000..167343a4372 --- /dev/null +++ b/test/functionality/incremental/edit-computation.test.ts @@ -0,0 +1,91 @@ +import { describe, expect, it } from 'vitest'; +import { computeEditRegion } from '../../../src/project/incremental/incremental-parse/edit-computation'; + +describe('computeEdit', () => { + it('returns a no-op edit for identical content', () => { + const result = computeEditRegion('abc', 'abc'); + + expect(result.startIndex).toEqual(result.oldEndIndex); + expect(result.startIndex).toEqual(result.newEndIndex); + + expect(result.startPosition).toEqual(result.oldEndPosition); + expect(result.startPosition).toEqual(result.newEndPosition); + }); + + it('detects an insertion in the middle', () => { + expect(computeEditRegion('abef', 'abcdef')).toEqual({ + startIndex: 2, + oldEndIndex: 2, + newEndIndex: 4, + startPosition: { row: 0, column: 2 }, + oldEndPosition: { row: 0, column: 2 }, + newEndPosition: { row: 0, column: 4 }, + }); + }); + + it('detects a deletion in the middle', () => { + expect(computeEditRegion('abcdef', 'abef')).toEqual({ + startIndex: 2, + oldEndIndex: 4, + newEndIndex: 2, + startPosition: { row: 0, column: 2 }, + oldEndPosition: { row: 0, column: 4 }, + newEndPosition: { row: 0, column: 2 }, + }); + }); + + it('detects a replacement in the middle', () => { + expect(computeEditRegion('abcdef', 'abXYef')).toEqual({ + startIndex: 2, + oldEndIndex: 4, + newEndIndex: 4, + startPosition: { row: 0, column: 2 }, + oldEndPosition: { row: 0, column: 4 }, + newEndPosition: { row: 0, column: 4 }, + }); + }); + + it('detects an insertion at the beginning', () => { + expect(computeEditRegion('world', 'hello world')).toEqual({ + startIndex: 0, + oldEndIndex: 0, + newEndIndex: 6, + startPosition: { row: 0, column: 0 }, + oldEndPosition: { row: 0, column: 0 }, + newEndPosition: { row: 0, column: 6 }, + }); + }); + + it('detects a replacement of the whole content', () => { + expect(computeEditRegion('abc', 'xyz')).toEqual({ + startIndex: 0, + oldEndIndex: 3, + newEndIndex: 3, + startPosition: { row: 0, column: 0 }, + oldEndPosition: { row: 0, column: 3 }, + newEndPosition: { row: 0, column: 3 }, + }); + }); + + it('computes row/column positions correctly for multi-line edits', () => { + expect(computeEditRegion('a\nbc\ndef', 'a\nXY\ndef')).toEqual({ + startIndex: 2, + oldEndIndex: 4, + newEndIndex: 4, + startPosition: { row: 1, column: 0 }, + oldEndPosition: { row: 1, column: 2 }, + newEndPosition: { row: 1, column: 2 }, + }); + }); + + it('does not let suffix matching overlap with the prefix', () => { + expect(computeEditRegion('aaa', 'aa')).toEqual({ + startIndex: 2, + oldEndIndex: 3, + newEndIndex: 2, + startPosition: { row: 0, column: 2 }, + oldEndPosition: { row: 0, column: 3 }, + newEndPosition: { row: 0, column: 2 }, + }); + }); +}); \ No newline at end of file From 68502d55ebc105b3942e461c1d3d7b3fd5f58891 Mon Sep 17 00:00:00 2001 From: Jonathan Riesland Date: Wed, 1 Apr 2026 12:03:49 +0200 Subject: [PATCH 08/20] feat-fix: inc context now stores multiple ReparseInfo --- src/project/cache/flowr-analyzer-cache.ts | 30 +++++++++++-------- ...r-analyzer-incremental-analysis-context.ts | 29 ++++++++++-------- .../tree-sitter/tree-sitter-executor.ts | 12 ++++---- 3 files changed, 39 insertions(+), 32 deletions(-) diff --git a/src/project/cache/flowr-analyzer-cache.ts b/src/project/cache/flowr-analyzer-cache.ts index b19413c5531..38b4706f1ed 100644 --- a/src/project/cache/flowr-analyzer-cache.ts +++ b/src/project/cache/flowr-analyzer-cache.ts @@ -18,12 +18,10 @@ import type { FlowrAnalyzerContext } from '../context/flowr-analyzer-context'; import { FlowrAnalyzerControlFlowCache } from './flowr-analyzer-controlflow-cache'; import type { CallGraph } from '../../dataflow/graph/call-graph'; import { computeCallGraph } from '../../dataflow/graph/call-graph'; -import type { - ReparseAction } from '../incremental/incremental-parse/incremental-parse'; import { - coarseCheckWhetherToInvalidate, - computeReparseAction + coarseCheckWhetherToInvalidate } from '../incremental/incremental-parse/incremental-parse'; +import { computeEditRegion } from '../incremental/incremental-parse/edit-computation'; interface FlowrAnalyzerCacheOptions { parser: Parser; @@ -55,12 +53,10 @@ export class FlowrAnalyzerCache extends FlowrCache extends FlowrCache f.filePath === event.file.path())?.parsed; + const reparseInfo = { + previousTree, + editRegion + }; + + this.args.context.inc.storeReparseInfo(event.file.path(), reparseInfo); + this.initCacheProviders(false); break; } default: diff --git a/src/project/context/flowr-analyzer-incremental-analysis-context.ts b/src/project/context/flowr-analyzer-incremental-analysis-context.ts index 5a8f86c6941..226db5cd9df 100644 --- a/src/project/context/flowr-analyzer-incremental-analysis-context.ts +++ b/src/project/context/flowr-analyzer-incremental-analysis-context.ts @@ -1,10 +1,9 @@ -import type { ParseStepOutput } from '../../r-bridge/parser'; -import type { Tree } from 'web-tree-sitter'; -import type { ReparseAction } from '../incremental/incremental-parse/incremental-parse'; +import type Parser from 'web-tree-sitter'; -export interface ParseInfo { - lastParseStepOutput: ParseStepOutput | undefined; - nextReparseAction: ReparseAction | undefined; + +export interface ReparseInfo { + previousTree: string | Parser.Tree | undefined; + editRegion: Parser.Edit; } export interface ReadOnlyFlowrAnalyzerIncrementalAnalysisContext { @@ -13,7 +12,7 @@ export interface ReadOnlyFlowrAnalyzerIncrementalAnalysisContext { */ readonly name: string; - getParseInfo(): ParseInfo | undefined; + getAndRemoveParseInfo(filePath: string): ReparseInfo | undefined; } /** @@ -22,17 +21,21 @@ export interface ReadOnlyFlowrAnalyzerIncrementalAnalysisContext { export class FlowrAnalyzerIncrementalAnalysisContext implements ReadOnlyFlowrAnalyzerIncrementalAnalysisContext { public readonly name = 'flowr-analyzer-incremental-analysis-context'; - private parseInfo?: ParseInfo; + private reparseInfoMap: Map = new Map(); public reset(): void { - this.parseInfo = undefined; + this.reparseInfoMap = new Map(); } - public storeParseInfo(parseInfo?: ParseInfo): void { - this.parseInfo = parseInfo; + public storeReparseInfo(filePath: string, reparseInfo?: ReparseInfo): void { + if(reparseInfo) { + this.reparseInfoMap.set(filePath, reparseInfo); + } } - public getParseInfo(): ParseInfo | undefined { - return this.parseInfo; + public getAndRemoveParseInfo(filePath: string): ReparseInfo | undefined { + const reparseInfo = this.reparseInfoMap.get(filePath); + this.reparseInfoMap.delete(filePath); + return reparseInfo; } } diff --git a/src/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.ts b/src/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.ts index 74b6e9aff06..874b10866aa 100644 --- a/src/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.ts +++ b/src/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.ts @@ -80,13 +80,11 @@ export class TreeSitterExecutor implements SyncParser { sourceCode = request.content; } - const parseInfo = inc?.getParseInfo(); - const nextReparseAction = parseInfo?.nextReparseAction; - if(request.filePath !== undefined && nextReparseAction) { - const previousFile = parseInfo.lastParseStepOutput?.files.find(f => f.filePath === request.filePath); - if(previousFile && typeof previousFile.parsed !== 'string') { - const previousTree = previousFile.parsed; - previousTree.edit(nextReparseAction.edit); + if(inc && request.filePath !== undefined) { + const reparseInfo = inc.getAndRemoveParseInfo(request.filePath); + if(reparseInfo && reparseInfo.previousTree && typeof reparseInfo.previousTree !== 'string') { + const previousTree = reparseInfo.previousTree; + previousTree.edit(reparseInfo.editRegion); return this.parser.parse(sourceCode, previousTree); } } From 7ea08812d00727464ad8efbb31415f8cf3fe0b9b Mon Sep 17 00:00:00 2001 From: Jonathan Riesland Date: Wed, 1 Apr 2026 12:04:44 +0200 Subject: [PATCH 09/20] feat: extend coarseCheckWhetherToInvalidate --- .../incremental-parse/incremental-parse.ts | 94 ++----------------- 1 file changed, 7 insertions(+), 87 deletions(-) diff --git a/src/project/incremental/incremental-parse/incremental-parse.ts b/src/project/incremental/incremental-parse/incremental-parse.ts index bb1d98459db..8b7700b04b3 100644 --- a/src/project/incremental/incremental-parse/incremental-parse.ts +++ b/src/project/incremental/incremental-parse/incremental-parse.ts @@ -1,8 +1,7 @@ import type { FlowrAnalyzerContext } from '../../context/flowr-analyzer-context'; -import type { FileContentInvalidateEvent, InvalidationEvent } from '../../cache/flowr-cache'; +import type { InvalidationEvent } from '../../cache/flowr-cache'; import { InvalidationEventType } from '../../cache/flowr-cache'; -import type { FlowrFileProvider } from '../../context/flowr-file'; -import type Parser from 'web-tree-sitter'; +import { FileRole } from '../../context/flowr-file'; /** * Is this file even relevant to us? @@ -11,93 +10,14 @@ export function coarseCheckWhetherToInvalidate(ctx: FlowrAnalyzerContext, event: if(event?.type === InvalidationEventType.Full) { return true; } - // const path = event.file.path(); - // if the file has not been considered by the analysis we do not have to continue - // TODO: make sure this also works for descriptions/other meta files maybe we have to check for the role - /* - if(event.file.roles?.includes(FileRole.Source) && !ctx.files.consideredFilesList().includes(path)) { - return false; - } - */ - const newContent = event.file.content(); - if(event.oldContent === newContent) { - // TODO: maybe we want to allow a 'force' flag? - // nothing changed - console.debug('File content did not change, skipping invalidation'); - return false; - } - - return true; -} - -export interface ReparseAction { - file: FlowrFileProvider, - edit: Parser.Edit; -} - -/** - * Compute the reparse action for the given file. - * @param event - The invalidation event. - * @returns The reparse action. - */ -export function computeReparseAction(event: FileContentInvalidateEvent): ReparseAction { - const oldContent = event.oldContent?.toString() ?? ''; - const newContent = event.file.content().toString(); - const oldLen = oldContent.length; - const newLen = newContent.length; - - // 1) Longest common prefix - let startIndex = 0; - while( - startIndex < oldLen && - startIndex < newLen && - oldContent[startIndex] === newContent[startIndex] - ) { - startIndex++; - } - - // 2) Longest common suffix, without overlapping the prefix - let oldSuffixIndex = oldLen; - let newSuffixIndex = newLen; - while( - oldSuffixIndex > startIndex && - newSuffixIndex > startIndex && - oldContent[oldSuffixIndex - 1] === newContent[newSuffixIndex - 1] - ) { - oldSuffixIndex--; - newSuffixIndex--; + if(!event.file.roles?.includes(FileRole.Source) && !event.file.roles?.includes(FileRole.Description)) { + return false; } - const oldEndIndex = oldSuffixIndex; - const newEndIndex = newSuffixIndex; - - return { - file: event.file, - edit: { - startIndex, - oldEndIndex, - newEndIndex, - startPosition: indexToPoint(oldContent, startIndex), - oldEndPosition: indexToPoint(oldContent, oldEndIndex), - newEndPosition: indexToPoint(newContent, newEndIndex), - } - }; -} - - -function indexToPoint(text: string, index: number): Parser.Point { - let row = 0; - let column = 0; - - for(let i = 0; i < index; i++) { - if(text[i] === '\n') { - row++; - column = 0; - } else { - column++; - } + if(!ctx.files.consideredFilesList().includes(event.file.path())) { + return false; } - return { row, column }; + return event.oldContent !== event.file.content(); } \ No newline at end of file From c7ad0bc1c6bdcc87b857abd38a5f79d00202e305 Mon Sep 17 00:00:00 2001 From: Jonathan Riesland Date: Wed, 1 Apr 2026 12:06:25 +0200 Subject: [PATCH 10/20] feat: reset() in FlowrAnalyzerContext fires InvalidationEventType.Full --- src/project/context/flowr-analyzer-context.ts | 35 ++++++++++++++----- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/src/project/context/flowr-analyzer-context.ts b/src/project/context/flowr-analyzer-context.ts index e004517683a..46b95982d61 100644 --- a/src/project/context/flowr-analyzer-context.ts +++ b/src/project/context/flowr-analyzer-context.ts @@ -37,6 +37,13 @@ import type { import { FlowrAnalyzerIncrementalAnalysisContext } from './flowr-analyzer-incremental-analysis-context'; +import type { + InvalidationEvent, + InvalidationEventReceiver } from '../cache/flowr-cache'; +import { + InvalidationEventType +} from '../cache/flowr-cache'; +import { assertUnreachable } from '../../util/assert'; /** * This is a read-only interface to the {@link FlowrAnalyzerContext}. @@ -86,7 +93,7 @@ export interface ReadOnlyFlowrAnalyzerContext { * {@link deps.getDependency}. * If you are just interested in inspecting the context, you can use {@link ReadOnlyFlowrAnalyzerContext} instead (e.g., via {@link inspect}). */ -export class FlowrAnalyzerContext implements ReadOnlyFlowrAnalyzerContext { +export class FlowrAnalyzerContext implements ReadOnlyFlowrAnalyzerContext, InvalidationEventReceiver { public readonly meta: FlowrAnalyzerMetaContext; public readonly files: FlowrAnalyzerFilesContext; public readonly deps: FlowrAnalyzerDependenciesContext; @@ -102,10 +109,10 @@ export class FlowrAnalyzerContext implements ReadOnlyFlowrAnalyzerContext { const loadingOrder = new FlowrAnalyzerLoadingOrderContext(this, plugins.get(PluginType.LoadingOrder) as FlowrAnalyzerLoadingOrderPlugin[]); this.files = new FlowrAnalyzerFilesContext(this, loadingOrder, (plugins.get(PluginType.ProjectDiscovery) ?? []) as FlowrAnalyzerProjectDiscoveryPlugin[], (plugins.get(PluginType.FileLoad) ?? []) as FlowrAnalyzerFilePlugin[]); - this.env = new FlowrAnalyzerEnvironmentContext(this); - this.inc = new FlowrAnalyzerIncrementalAnalysisContext(); + this.env = new FlowrAnalyzerEnvironmentContext(this); + this.inc = new FlowrAnalyzerIncrementalAnalysisContext(); const functions = new FlowrAnalyzerFunctionsContext(this); - this.deps = new FlowrAnalyzerDependenciesContext(functions, (plugins.get(PluginType.DependencyIdentification) ?? []) as FlowrAnalyzerPackageVersionsPlugin[]); + this.deps = new FlowrAnalyzerDependenciesContext(functions, (plugins.get(PluginType.DependencyIdentification) ?? []) as FlowrAnalyzerPackageVersionsPlugin[]); this.meta = new FlowrAnalyzerMetaContext(); } @@ -154,10 +161,22 @@ export class FlowrAnalyzerContext implements ReadOnlyFlowrAnalyzerContext { * Reset the context to its initial state, e.g., removing all files, dependencies, and loading orders. */ public reset(): void { - this.files.reset(); - this.deps.reset(); - this.meta.reset(); - this.inc.reset(); + this.receive( { type: InvalidationEventType.Full }); + } + + receive(event: InvalidationEvent): void { + const type = event.type; + switch(type) { + case InvalidationEventType.Full: + case InvalidationEventType.FileInvalidate: + this.files.reset(); + this.deps.reset(); + this.meta.reset(); + this.inc.reset(); + break; + default: + assertUnreachable(type); + } } } From 889bc91c54efbaf399e40402ba29d2819352f7d7 Mon Sep 17 00:00:00 2001 From: Jonathan Riesland Date: Wed, 1 Apr 2026 12:07:49 +0200 Subject: [PATCH 11/20] test-fix: check if incremental parse was attempted --- .../incremental/incremental-parsing.test.ts | 305 ++---------------- 1 file changed, 19 insertions(+), 286 deletions(-) diff --git a/test/functionality/incremental/incremental-parsing.test.ts b/test/functionality/incremental/incremental-parsing.test.ts index 19c5a7f8d2c..1f5075e384b 100644 --- a/test/functionality/incremental/incremental-parsing.test.ts +++ b/test/functionality/incremental/incremental-parsing.test.ts @@ -1,8 +1,9 @@ -import { assert, describe, it } from 'vitest'; +import { assert, describe, expect, it, vi } from 'vitest'; import { FlowrAnalyzerBuilder } from '../../../src/project/flowr-analyzer-builder'; import { FlowrInlineTextFile } from '../../../src/project/context/flowr-file'; import type { NormalizedAst } from '../../../src/r-bridge/lang-4.x/ast/model/processing/decorate'; import { printNormalizedAstToMermaid } from '../../../src/core/print/normalize-printer'; +import type { TreeSitterExecutor } from '../../../src/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor'; interface IncrementalParsingTestInput { @@ -53,7 +54,23 @@ async function executeIncrementalParse(inputs: readonly IncrementalParsingTestIn f?.updateInlineContent(input.updatedContent); } - return (await analyzer.normalize()); + const parser = analyzer['parser'] as TreeSitterExecutor; + const parseSpy = vi.spyOn(parser, 'parse'); + const result = await analyzer.normalize(); + + const parseCalls = parseSpy.mock.calls; + let callIdx = 0; + for(const input of inputs ?? []) { + if(input.originalContent.trim() !== '' && input.updatedContent !== input.originalContent) { + // check if incremental parse was attempted, i.e., if a previousTree was provided when parsing again + const [, previousTreeArg] = parseCalls[callIdx]; + expect(previousTreeArg, `file ${input.path}: expected incremental parse`).toBeDefined(); + + callIdx++; + } + } + + return result; } @@ -573,288 +590,4 @@ describe('Incremental Parsing produces same results as Full Parsing', () => { }); }); }); -}); - - - - - - - - -describe('Incremental Parsing produces same results as Full Parsing for one file for', () => { - describe('no change', () => { - describe('to an empty file', async() => { - const inputs: IncrementalParsingTestInput[] = [{ - originalContent: '', - updatedContent: '', - path: 'a.R' - }]; - await executeAndCompareResults(inputs); - }); - describe('to a file with content', async() => { - const inputs: IncrementalParsingTestInput[] = [{ - originalContent: 'x <- 42', - updatedContent: 'x <- 42', - path: 'a.R' - }]; - await executeAndCompareResults(inputs); - }); - }); - describe('one line', () => { - describe('inserted', () => { - describe('into an empty file', async() => { - const inputs: IncrementalParsingTestInput[] = [{ - originalContent: '', - updatedContent: 'x <- 42', - path: 'a.R' - }]; - await executeAndCompareResults(inputs); - }); - describe('at the start', async() => { - const inputs: IncrementalParsingTestInput[] = [{ - originalContent: 'x <- 42', - updatedContent: 'y <- 21\nx <- 42', - path: 'a.R' - }]; - await executeAndCompareResults(inputs); - }); - describe('at the end', async() => { - const inputs: IncrementalParsingTestInput[] = [{ - originalContent: 'x <- 42', - updatedContent: 'x <- 42\nprint(x)', - path: 'a.R' - }]; - await executeAndCompareResults(inputs); - }); - describe('in the middle', async() => { - const inputs: IncrementalParsingTestInput[] = [{ - originalContent: 'x <- 42\nprint(x)', - updatedContent: 'x <- 42\nx <- 2 * x\nprint(x)', - path: 'a.R' - }]; - await executeAndCompareResults(inputs); - }); - }); - describe('removed', () => { - describe('such that the file becomes empty', async() => { - const inputs: IncrementalParsingTestInput[] = [{ - originalContent: 'x <- 42', - updatedContent: '', - path: 'a.R' - }]; - await executeAndCompareResults(inputs); - }); - describe('at the start', async() => { - const inputs: IncrementalParsingTestInput[] = [{ - originalContent: 'y <- 21\nx <- 42', - updatedContent: 'x <- 42', - path: 'a.R' - }]; - await executeAndCompareResults(inputs); - }); - describe('at the end', async() => { - const inputs: IncrementalParsingTestInput[] = [{ - originalContent: 'x <- 42\nprint(x)', - updatedContent: 'x <- 42', - path: 'a.R' - }]; - await executeAndCompareResults(inputs); - }); - describe('in the middle', async() => { - const inputs: IncrementalParsingTestInput[] = [{ - originalContent: 'x <- 42\nx <- 2 * x\nprint(x)', - updatedContent: 'x <- 42\nprint(x)', - path: 'a.R' - }]; - await executeAndCompareResults(inputs); - }); - }); - describe('replaced', () => { - describe('at the start', async() => { - const inputs: IncrementalParsingTestInput[] = [{ - originalContent: 'y <- 21\nx <- 42', - updatedContent: 'x <- 21\nx <- 42', - path: 'a.R' - }]; - await executeAndCompareResults(inputs); - }); - describe('at the end', async() => { - const inputs: IncrementalParsingTestInput[] = [{ - originalContent: 'x <- 42\nprint(x)', - updatedContent: 'x <- 42\nx <- x * x', - path: 'a.R' - }]; - await executeAndCompareResults(inputs); - }); - describe('in the middle', async() => { - const inputs: IncrementalParsingTestInput[] = [{ - originalContent: 'x <- 42\nx <- 2 * x\nprint(x)', - updatedContent: 'x <- 42\nx <- 21\nprint(x)', - path: 'a.R' - }]; - await executeAndCompareResults(inputs); - }); - }); - }); - describe('multiple lines', () => { - describe('inserted', () => { - describe('into an empty file', async() => { - const inputs: IncrementalParsingTestInput[] = [{ - originalContent: '', - updatedContent: 'x <- 42\ny <- 21\nz <- 10', - path: 'a.R' - }]; - await executeAndCompareResults(inputs); - }); - describe('at different positions', async() => { - const inputs: IncrementalParsingTestInput[] = [{ - originalContent: 'x <- 42\nprint(x)', - updatedContent: 'y <- 21\nx <- 42\ny <- y * 2\nprint(x)\nprint(y)', - path: 'a.R' - }]; - await executeAndCompareResults(inputs); - }); - }); - describe('removed', () => { - describe('such that the file becomes empty', async() => { - const inputs: IncrementalParsingTestInput[] = [{ - originalContent: 'x <- 42\ny <- 21\nz <- 10', - updatedContent: '', - path: 'a.R' - }]; - await executeAndCompareResults(inputs); - }); - describe('at different positions', async() => { - const inputs: IncrementalParsingTestInput[] = [{ - originalContent: 'y <- 21\nx <- 42\ny <- y * 2\nprint(x)\nprint(y)', - updatedContent: 'x <- 42\nprint(x)', - path: 'a.R' - }]; - await executeAndCompareResults(inputs); - }); - }); - describe('replaced', () => { - describe('partially', async() => { - const inputs: IncrementalParsingTestInput[] = [{ - originalContent: 'y <- 21\nx <- 42\ny <- y * 2\nprint(x)\nprint(y)', - updatedContent: 'y <- 21\nx <- 21\ny <- y * y\nprint(x)\nprint(y + 2)', - path: 'a.R' - }]; - await executeAndCompareResults(inputs); - }); - describe('fully', async() => { - const inputs: IncrementalParsingTestInput[] = [{ - originalContent: 'y <- 21\nx <- 42\ny <- y * 2\nprint(x)\nprint(y)', - updatedContent: 'z <- 10\nz <- z + 32\nprint(z)', - path: 'a.R' - }]; - await executeAndCompareResults(inputs); - }); - }); - }); -}); - - -describe('Incremental Parsing produces same results as Full Parsing across multiple files for', () => { - it('editing only the first file while the second file stays unchanged', async() => { - const inputs: IncrementalParsingTestInput[] = [ - { - originalContent: 'x <- 42\nprint(x)', - updatedContent: 'x <- 42\nx <- x + 1\nprint(x)', - path: 'a.R' - }, - { - originalContent: 'y <- 21\nprint(y)', - updatedContent: 'y <- 21\nprint(y)', - path: 'b.R' - } - ]; - await executeAndCompareResults(inputs); - }); - - it('editing only the second file while the first file stays unchanged', async() => { - const inputs: IncrementalParsingTestInput[] = [ - { - originalContent: 'x <- 42\nprint(x)', - updatedContent: 'x <- 42\nprint(x)', - path: 'a.R' - }, - { - originalContent: 'y <- 21\nprint(y)', - updatedContent: 'y <- 21\ny <- y * 2\nprint(y)', - path: 'b.R' - } - ]; - await executeAndCompareResults(inputs); - }); - - it('editing both files independently in the same run', async() => { - const inputs: IncrementalParsingTestInput[] = [ - { - originalContent: 'x <- 1\nprint(x)', - updatedContent: 'x <- 2\nx <- x * 3\nprint(x)', - path: 'a.R' - }, - { - originalContent: 'y <- 10\nprint(y)', - updatedContent: 'z <- 10\nprint(z + 1)', - path: 'b.R' - } - ]; - await executeAndCompareResults(inputs); - }); - - it('adding a new file while another file stays unchanged', async() => { - const inputs: IncrementalParsingTestInput[] = [ - { - originalContent: 'x <- 42\nprint(x)', - updatedContent: 'x <- 42\nprint(x)', - path: 'a.R' - }, - { - originalContent: '', - updatedContent: 'foo <- function(x) x * 2\nprint(foo(21))', - path: 'b.R' - } - ]; - await executeAndCompareResults(inputs); - }); - - it('removing one file while another file stays unchanged', async() => { - const inputs: IncrementalParsingTestInput[] = [ - { - originalContent: 'x <- 42\nprint(x)', - updatedContent: 'x <- 42\nprint(x)', - path: 'a.R' - }, - { - originalContent: 'tmp <- 1\nprint(tmp)', - updatedContent: '', - path: 'b.R' - } - ]; - await executeAndCompareResults(inputs); - }); - - it('mixing file modification, file addition, and file removal in one run', async() => { - const inputs: IncrementalParsingTestInput[] = [ - { - originalContent: 'x <- 1\nprint(x)', - updatedContent: 'x <- 1\nx <- x + 1\nprint(x)', - path: 'a.R' - }, - { - originalContent: '', - updatedContent: 'y <- 21\nprint(y)', - path: 'b.R' - }, - { - originalContent: 'obsolete <- TRUE\nprint(obsolete)', - updatedContent: '', - path: 'c.R' - } - ]; - await executeAndCompareResults(inputs); - }); }); \ No newline at end of file From 22413ae35e10b828ec708993bda118544082de35 Mon Sep 17 00:00:00 2001 From: Jonathan Riesland Date: Wed, 1 Apr 2026 12:09:28 +0200 Subject: [PATCH 12/20] doc(wiki): add section for FlowrAnalyzerIncrementalAnalysisContext --- src/documentation/wiki-analyzer.ts | 50 +++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/src/documentation/wiki-analyzer.ts b/src/documentation/wiki-analyzer.ts index 160fe363cf3..edb21b3394e 100644 --- a/src/documentation/wiki-analyzer.ts +++ b/src/documentation/wiki-analyzer.ts @@ -39,7 +39,9 @@ import { FlowrAnalyzerPlugin } from '../project/plugins/flowr-analyzer-plugin'; import { FlowrAnalyzerEnvironmentContext } from '../project/context/flowr-analyzer-environment-context'; import { FlowrAnalyzerFunctionsContext } from '../project/context/flowr-analyzer-functions-context'; import { FlowrAnalyzerMetaContext } from '../project/context/flowr-analyzer-meta-context'; +import { FlowrAnalyzerIncrementalAnalysisContext } from '../project/context/flowr-analyzer-incremental-analysis-context'; import { FlowrConfig } from '../config'; +import { FlowrInlineTextFile } from '../project/context/flowr-file'; async function analyzerQuickExample() { const analyzer = await new FlowrAnalyzerBuilder() @@ -99,11 +101,12 @@ ${ 'How to add a new plugin': undefined, }, 'Context Information': { - 'Files Context': undefined, - 'Loading Order Context': undefined, - 'Dependencies Context': undefined, - 'Environment Context': undefined, - 'Meta Context': undefined, + 'Files Context': undefined, + 'Loading Order Context': undefined, + 'Dependencies Context': undefined, + 'Environment Context': undefined, + 'Meta Context': undefined, + 'Incremental Analysis Context': undefined, }, 'Caching': undefined }) @@ -478,6 +481,43 @@ and the project namespace via ${ctx.linkM(FlowrAnalyzerMetaContext, 'getNamespace', { codeFont: true, realNameWrapper: 'i' })}. +${section('Incremental Analysis Context', 3)} + +The ${ctx.link(FlowrAnalyzerIncrementalAnalysisContext)} is a context that stores analysis information needed for making the next analysis run incremental by reusing the previous analysis results: + +${ctx.hierarchy(FlowrAnalyzerIncrementalAnalysisContext, { showImplSnippet: false })} + +This context is not an analysis-result cache by itself. +Instead, it carries forward the minimal state needed by future incremental phases after an invalidation happened. +At the moment, it is used for incremental parsing with Tree-sitter, but it is intended to become the shared context for additional incremental analysis stages as well. + +If the analyzer or context is reset, the incremental information is discarded via +${ctx.linkM(FlowrAnalyzerIncrementalAnalysisContext, 'reset', { codeFont: true, realNameWrapper: 'i' })}. +Likewise, a full cache rebuild resets this context before recreating the analysis pipeline. +In other words, this context only transports incremental handoff state between analysis runs. + +${section('Incremental Parsing', 4)} + +Currently, the implemented use of this context is Tree-sitter's incremental parsing support. +When a file is represented by a mutable file provider such as ${ctx.link('FlowrInlineTextFile')} and its content is invalidated via +${ctx.linkM(FlowrInlineTextFile, 'invalidate', { codeFont: true, realNameWrapper: 'i' })}, +the analyzer cache receives a file invalidation event. +For relevant source-like files, the cache compares the old and new file contents, computes a minimal edit region, +and stores two pieces of information in this context under the file path: + +* the previous Tree-sitter parse tree +* the ${ctx.link('Parser.Edit')} describing the changed source region + +On the next parse run, the Tree-sitter parser consumes this information via +${ctx.linkM(FlowrAnalyzerIncrementalAnalysisContext, 'getAndRemoveParseInfo', { codeFont: true, realNameWrapper: 'i' })}, +applies the edit to the old tree, and reparses incrementally instead of starting from scratch. +The stored entry is removed as soon as it is consumed, so the context only carries information across a single invalidation boundary. + +${section('Incremental Dataflow', 4)} + +This context is planned to also support future incremental dataflow graph computation. + + ${section('Caching', 2)} To speed up analyses, flowR provides a caching mechanism that stores intermediate results of the analysis. From 5b6ee1415b2c7717e8e7558ece460eab5fa5636f Mon Sep 17 00:00:00 2001 From: Jonathan Riesland Date: Wed, 1 Apr 2026 12:12:08 +0200 Subject: [PATCH 13/20] feat-fix: add receive(event) call for ctx --- src/project/flowr-analyzer.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/project/flowr-analyzer.ts b/src/project/flowr-analyzer.ts index fe36ecd75c0..7770cfe7468 100644 --- a/src/project/flowr-analyzer.ts +++ b/src/project/flowr-analyzer.ts @@ -210,7 +210,7 @@ export class FlowrAnalyzer implements } public receive(event: InvalidationEvent): void { - // TODO: ctx + this.ctx.receive(event); this.cache.receive(event); } From af3d39338a8720180d15318dbb8ff17f953cc2b2 Mon Sep 17 00:00:00 2001 From: Jonathan Riesland Date: Wed, 1 Apr 2026 12:13:18 +0200 Subject: [PATCH 14/20] lint-fix: remove implemented TODO --- src/project/context/flowr-analyzer-context.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/project/context/flowr-analyzer-context.ts b/src/project/context/flowr-analyzer-context.ts index 46b95982d61..6b9f6af4dbc 100644 --- a/src/project/context/flowr-analyzer-context.ts +++ b/src/project/context/flowr-analyzer-context.ts @@ -98,7 +98,6 @@ export class FlowrAnalyzerContext implements ReadOnlyFlowrAnalyzerContext, Inval public readonly files: FlowrAnalyzerFilesContext; public readonly deps: FlowrAnalyzerDependenciesContext; public readonly env: FlowrAnalyzerEnvironmentContext; - // TODO: docment this in the wiki-analyzer wiki! public readonly inc: FlowrAnalyzerIncrementalAnalysisContext; private _analyzer: FlowrAnalyzer | undefined; From 26bcdce239613088bd46df6c8641efbb1fd71a0c Mon Sep 17 00:00:00 2001 From: Jonathan Riesland Date: Wed, 1 Apr 2026 12:33:19 +0200 Subject: [PATCH 15/20] feat-fix: only reset contexts of analyzer on InvalidationEventType.Full --- src/project/context/flowr-analyzer-context.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/project/context/flowr-analyzer-context.ts b/src/project/context/flowr-analyzer-context.ts index 6b9f6af4dbc..6b988a11d82 100644 --- a/src/project/context/flowr-analyzer-context.ts +++ b/src/project/context/flowr-analyzer-context.ts @@ -167,12 +167,13 @@ export class FlowrAnalyzerContext implements ReadOnlyFlowrAnalyzerContext, Inval const type = event.type; switch(type) { case InvalidationEventType.Full: - case InvalidationEventType.FileInvalidate: + this.meta.reset(); this.files.reset(); this.deps.reset(); - this.meta.reset(); this.inc.reset(); break; + case InvalidationEventType.FileInvalidate: + break; default: assertUnreachable(type); } From 817f912259cb3c1005d2f9cf456a0571e0a8c965 Mon Sep 17 00:00:00 2001 From: Jonathan Riesland Date: Wed, 1 Apr 2026 12:36:38 +0200 Subject: [PATCH 16/20] feat-fix: coarseCheckWhetherToInvalidate Remove condition that the file has to be in the consideredFilesList of files context as only parsing a file does not add it to consideredFilesList --- src/project/incremental/incremental-parse/incremental-parse.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/project/incremental/incremental-parse/incremental-parse.ts b/src/project/incremental/incremental-parse/incremental-parse.ts index 8b7700b04b3..b10bbb826b6 100644 --- a/src/project/incremental/incremental-parse/incremental-parse.ts +++ b/src/project/incremental/incremental-parse/incremental-parse.ts @@ -15,9 +15,11 @@ export function coarseCheckWhetherToInvalidate(ctx: FlowrAnalyzerContext, event: return false; } + /* if(!ctx.files.consideredFilesList().includes(event.file.path())) { return false; } + */ return event.oldContent !== event.file.content(); } \ No newline at end of file From 7928a592db64d9b843edbb4fbef123103e54dc27 Mon Sep 17 00:00:00 2001 From: Jonathan Riesland Date: Sat, 4 Apr 2026 14:50:16 +0200 Subject: [PATCH 17/20] feat-fix: reuse unchanged parse trees during incremental parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rework the incremental parsing handoff so (1) invalidation no longer only prepares incremental reparses for changed files while forcing unchanged files through a full parse again and (2) invalidation no longer computes eager reparse metadata inside FlowrAnalyzerCache. Previously, a mixed update such as “A changed, B unchanged” behaved like this: - reparse info was generated for A - no reparse info was generated for B - the cache pipeline was rebuilt - A was reparsed incrementally - B was parsed from scratch The new architecture fixes that by treating the previous successful parse run as the baseline for the next one: - File invalidation events now record the file path together with the old source text in the incremental analysis context. - FlowrAnalyzerCache snapshots the latest completed Tree-sitter parse results after successful parse-oriented runs. - On the next parse request, TreeSitterExecutor derives reparse info lazily from: - the old parse tree - the old source text, if the file was invalidated - the current file content - Changed files get a minimal edit region and are reparsed incrementally. - Unchanged files now reuse their previous parse tree directly instead of being parsed again from scratch. - Parser call sites, invalidation plumbing, and documentation were updated to support this context-driven flow. Net effect: incremental parsing now correctly handles mixed workloads by incrementally reparsing changed files while reusing old parse results for unchanged files. --- .../call/built-in/built-in-source.ts | 2 +- src/documentation/wiki-analyzer.ts | 27 ++++--- src/project/cache/flowr-analyzer-cache.ts | 48 +++++------- src/project/cache/flowr-cache.ts | 4 +- src/project/context/flowr-analyzer-context.ts | 20 ++--- .../flowr-analyzer-dependencies-context.ts | 19 ++++- .../context/flowr-analyzer-files-context.ts | 25 +++++- ...r-analyzer-incremental-analysis-context.ts | 77 +++++++++++++++---- .../context/flowr-analyzer-meta-context.ts | 19 ++++- src/project/context/flowr-file.ts | 2 +- src/project/flowr-analyzer.ts | 4 +- .../incremental-parse/edit-computation.ts | 8 +- .../incremental-parse/incremental-parse.ts | 50 ++++++++---- .../tree-sitter/tree-sitter-executor.ts | 31 +++++--- src/r-bridge/parser.ts | 9 +-- 15 files changed, 230 insertions(+), 115 deletions(-) diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-source.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-source.ts index 78907aa7eda..61ac760f984 100644 --- a/src/dataflow/internal/process/functions/call/built-in/built-in-source.ts +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-source.ts @@ -246,7 +246,7 @@ export function sourceRequest(rootId: NodeId, request: RParseRequest } else { guard(textRequest !== undefined, `Expected text request to be defined for sourced file ${JSON.stringify(request)}`); } - const parsed = (!data.parser.async ? data.parser : new RShellExecutor()).parse(textRequest.r, data.ctx.inc); + const parsed = (!data.parser.async ? data.parser : new RShellExecutor()).parse(textRequest.r, data.ctx); const normalized = (typeof parsed !== 'string' ? normalizeTreeSitter({ files: [{ parsed, filePath: textRequest.path }] }, getId, data.ctx.config) : normalize({ files: [{ parsed, filePath: textRequest.path }] }, getId)) as NormalizedAst; diff --git a/src/documentation/wiki-analyzer.ts b/src/documentation/wiki-analyzer.ts index edb21b3394e..f1427c3e582 100644 --- a/src/documentation/wiki-analyzer.ts +++ b/src/documentation/wiki-analyzer.ts @@ -493,7 +493,6 @@ At the moment, it is used for incremental parsing with Tree-sitter, but it is in If the analyzer or context is reset, the incremental information is discarded via ${ctx.linkM(FlowrAnalyzerIncrementalAnalysisContext, 'reset', { codeFont: true, realNameWrapper: 'i' })}. -Likewise, a full cache rebuild resets this context before recreating the analysis pipeline. In other words, this context only transports incremental handoff state between analysis runs. ${section('Incremental Parsing', 4)} @@ -501,17 +500,25 @@ ${section('Incremental Parsing', 4)} Currently, the implemented use of this context is Tree-sitter's incremental parsing support. When a file is represented by a mutable file provider such as ${ctx.link('FlowrInlineTextFile')} and its content is invalidated via ${ctx.linkM(FlowrInlineTextFile, 'invalidate', { codeFont: true, realNameWrapper: 'i' })}, -the analyzer cache receives a file invalidation event. -For relevant source-like files, the cache compares the old and new file contents, computes a minimal edit region, -and stores two pieces of information in this context under the file path: +the analyzer receives a file invalidation event. +At that point, the incremental context only records the file path together with the old source text. +No edit region is computed eagerly during invalidation. -* the previous Tree-sitter parse tree -* the ${ctx.link('Parser.Edit')} describing the changed source region +After a successful parse-oriented analysis run, the analyzer cache stores the latest Tree-sitter parse trees in this context via +${ctx.linkM(FlowrAnalyzerIncrementalAnalysisContext, 'storeOldParseResults', { codeFont: true, realNameWrapper: 'i' })}. +This gives the next parse run access to the last completed parse snapshot for each file path. -On the next parse run, the Tree-sitter parser consumes this information via -${ctx.linkM(FlowrAnalyzerIncrementalAnalysisContext, 'getAndRemoveParseInfo', { codeFont: true, realNameWrapper: 'i' })}, -applies the edit to the old tree, and reparses incrementally instead of starting from scratch. -The stored entry is removed as soon as it is consumed, so the context only carries information across a single invalidation boundary. +On the next parse run, Tree-sitter combines both pieces of information lazily: + +* the previous parse tree obtained from + ${ctx.linkM(FlowrAnalyzerIncrementalAnalysisContext, 'getOldParseResultOf', { codeFont: true, realNameWrapper: 'i' })} +* the old source text obtained from + ${ctx.linkM(FlowrAnalyzerIncrementalAnalysisContext, 'getAndRemoveOldContentOf', { codeFont: true, realNameWrapper: 'i' })} + +Using these together with the current file content, flowR computes a minimal ${ctx.link('Parser.Edit')} only when a new parse is actually requested. +If the file content did not change, the previous tree can be reused directly. +Otherwise, the edit is applied to the previous tree and Tree-sitter reparses incrementally instead of starting from scratch. +The stored old-content entry is consumed when it is used, so invalidation state only survives until the next relevant parse. ${section('Incremental Dataflow', 4)} diff --git a/src/project/cache/flowr-analyzer-cache.ts b/src/project/cache/flowr-analyzer-cache.ts index 38b4706f1ed..34f0ddd06dc 100644 --- a/src/project/cache/flowr-analyzer-cache.ts +++ b/src/project/cache/flowr-analyzer-cache.ts @@ -1,4 +1,4 @@ -import type { KnownParser } from '../../r-bridge/parser'; +import type { KnownParser, ParseStepOutput } from '../../r-bridge/parser'; import { type InvalidationEvent, InvalidationEventType, FlowrCache } from './flowr-cache'; import { createDataflowPipeline, @@ -18,11 +18,7 @@ import type { FlowrAnalyzerContext } from '../context/flowr-analyzer-context'; import { FlowrAnalyzerControlFlowCache } from './flowr-analyzer-controlflow-cache'; import type { CallGraph } from '../../dataflow/graph/call-graph'; import { computeCallGraph } from '../../dataflow/graph/call-graph'; -import { - coarseCheckWhetherToInvalidate -} from '../incremental/incremental-parse/incremental-parse'; -import { computeEditRegion } from '../incremental/incremental-parse/edit-computation'; - +import type { Tree } from 'web-tree-sitter'; interface FlowrAnalyzerCacheOptions { parser: Parser; context: FlowrAnalyzerContext; @@ -53,10 +49,7 @@ export class FlowrAnalyzerCache extends FlowrCache extends FlowrCache f.filePath === event.file.path())?.parsed; - const reparseInfo = { - previousTree, - editRegion - }; - - this.args.context.inc.storeReparseInfo(event.file.path(), reparseInfo); - this.initCacheProviders(false); - break; - } default: assertUnreachable(type); } @@ -119,10 +95,26 @@ export class FlowrAnalyzerCache extends FlowrCache // cast needed because of TypeScript's limited narrowing capabilities + ); + } + } + /** * Get the parse output for the request, parsing if necessary. * @param force - Do not use the cache, instead force a new parse. diff --git a/src/project/cache/flowr-cache.ts b/src/project/cache/flowr-cache.ts index ca9313a7547..fdf58a5bbbf 100644 --- a/src/project/cache/flowr-cache.ts +++ b/src/project/cache/flowr-cache.ts @@ -1,5 +1,5 @@ import { assertUnreachable } from '../../util/assert'; -import type { FlowrFileProvider, StringableContent } from '../context/flowr-file'; +import type { StringableContent } from '../context/flowr-file'; export const enum InvalidationEventType { Full = 'full', @@ -9,7 +9,7 @@ export const enum InvalidationEventType { export interface FileContentInvalidateEvent { readonly type: InvalidationEventType.FileInvalidate; readonly oldContent: Content | undefined; - readonly file: FlowrFileProvider; + readonly filePath: string; } export type InvalidationEvent = diff --git a/src/project/context/flowr-analyzer-context.ts b/src/project/context/flowr-analyzer-context.ts index 6b988a11d82..922629fd765 100644 --- a/src/project/context/flowr-analyzer-context.ts +++ b/src/project/context/flowr-analyzer-context.ts @@ -43,7 +43,6 @@ import type { import { InvalidationEventType } from '../cache/flowr-cache'; -import { assertUnreachable } from '../../util/assert'; /** * This is a read-only interface to the {@link FlowrAnalyzerContext}. @@ -109,7 +108,7 @@ export class FlowrAnalyzerContext implements ReadOnlyFlowrAnalyzerContext, Inval this.files = new FlowrAnalyzerFilesContext(this, loadingOrder, (plugins.get(PluginType.ProjectDiscovery) ?? []) as FlowrAnalyzerProjectDiscoveryPlugin[], (plugins.get(PluginType.FileLoad) ?? []) as FlowrAnalyzerFilePlugin[]); this.env = new FlowrAnalyzerEnvironmentContext(this); - this.inc = new FlowrAnalyzerIncrementalAnalysisContext(); + this.inc = new FlowrAnalyzerIncrementalAnalysisContext(this); const functions = new FlowrAnalyzerFunctionsContext(this); this.deps = new FlowrAnalyzerDependenciesContext(functions, (plugins.get(PluginType.DependencyIdentification) ?? []) as FlowrAnalyzerPackageVersionsPlugin[]); this.meta = new FlowrAnalyzerMetaContext(); @@ -164,19 +163,10 @@ export class FlowrAnalyzerContext implements ReadOnlyFlowrAnalyzerContext, Inval } receive(event: InvalidationEvent): void { - const type = event.type; - switch(type) { - case InvalidationEventType.Full: - this.meta.reset(); - this.files.reset(); - this.deps.reset(); - this.inc.reset(); - break; - case InvalidationEventType.FileInvalidate: - break; - default: - assertUnreachable(type); - } + this.meta.receive(event); + this.files.receive(event); + this.deps.receive(event); + this.inc.receive(event); } } diff --git a/src/project/context/flowr-analyzer-dependencies-context.ts b/src/project/context/flowr-analyzer-dependencies-context.ts index 6470b33e0de..34c9a0a647b 100644 --- a/src/project/context/flowr-analyzer-dependencies-context.ts +++ b/src/project/context/flowr-analyzer-dependencies-context.ts @@ -4,6 +4,9 @@ import { } from '../plugins/package-version-plugins/flowr-analyzer-package-versions-plugin'; import type { Package } from '../plugins/package-version-plugins/package'; import type { FlowrAnalyzerFunctionsContext, ReadOnlyFlowrAnalyzerFunctionsContext } from './flowr-analyzer-functions-context'; +import type { InvalidationEvent, InvalidationEventReceiver } from '../cache/flowr-cache'; +import { InvalidationEventType } from '../cache/flowr-cache'; +import { assertUnreachable } from '../../util/assert'; /** * This is a read-only interface to the {@link FlowrAnalyzerDependenciesContext}. @@ -39,7 +42,7 @@ export interface ReadOnlyFlowrAnalyzerDependenciesContext { * * If you are interested in inspecting these dependencies, refer to {@link ReadOnlyFlowrAnalyzerDependenciesContext}. */ -export class FlowrAnalyzerDependenciesContext extends AbstractFlowrAnalyzerContext implements ReadOnlyFlowrAnalyzerDependenciesContext { +export class FlowrAnalyzerDependenciesContext extends AbstractFlowrAnalyzerContext implements ReadOnlyFlowrAnalyzerDependenciesContext, InvalidationEventReceiver { public readonly name = 'flowr-analyzer-dependencies-context'; public readonly functionsContext: FlowrAnalyzerFunctionsContext; @@ -52,6 +55,20 @@ export class FlowrAnalyzerDependenciesContext extends AbstractFlowrAnalyzerConte this.staticsLoaded = false; } + receive(event: InvalidationEvent): void { + const type = event.type; + switch(type) { + case InvalidationEventType.Full: + this.reset(); + break; + case InvalidationEventType.FileInvalidate: + // nothing to do + break; + default: + assertUnreachable(type); + } + } + public constructor(functionsContext: FlowrAnalyzerFunctionsContext, plugins?: readonly FlowrAnalyzerPackageVersionsPlugin[]) { super(functionsContext.getAttachedContext(), FlowrAnalyzerPackageVersionsPlugin.defaultPlugin(), plugins); this.functionsContext = functionsContext; diff --git a/src/project/context/flowr-analyzer-files-context.ts b/src/project/context/flowr-analyzer-files-context.ts index c75def0efa4..aae1af13fe2 100644 --- a/src/project/context/flowr-analyzer-files-context.ts +++ b/src/project/context/flowr-analyzer-files-context.ts @@ -4,7 +4,7 @@ import type { RParseRequest, RParseRequestFromFile } from '../../r-bridge/retriever'; import { isParseRequest } from '../../r-bridge/retriever'; -import { guard } from '../../util/assert'; +import { assertUnreachable, guard } from '../../util/assert'; import type { FlowrAnalyzerLoadingOrderContext, ReadOnlyFlowrAnalyzerLoadingOrderContext @@ -22,6 +22,9 @@ import type { FlowrNewsFile } from '../plugins/file-plugins/files/flowr-news-fil import type { FlowrNamespaceFile } from '../plugins/file-plugins/files/flowr-namespace-file'; import { FlowrAnalyzer } from '../flowr-analyzer'; import type { FlowrAnalyzerContext } from './flowr-analyzer-context'; +import type { InvalidationEvent, InvalidationEventReceiver } from '../cache/flowr-cache'; +import { InvalidationEventType } from '../cache/flowr-cache'; + const fileLog = log.getSubLogger({ name: 'flowr-analyzer-files-context' }); @@ -123,7 +126,7 @@ export interface ReadOnlyFlowrAnalyzerFilesContext { * If you are interested in inspecting these files, refer to {@link ReadOnlyFlowrAnalyzerFilesContext}. * Plugins, however, can use this context directly to modify files. */ -export class FlowrAnalyzerFilesContext extends AbstractFlowrAnalyzerContext)[], FlowrAnalyzerProjectDiscoveryPlugin> implements ReadOnlyFlowrAnalyzerFilesContext { +export class FlowrAnalyzerFilesContext extends AbstractFlowrAnalyzerContext)[], FlowrAnalyzerProjectDiscoveryPlugin> implements ReadOnlyFlowrAnalyzerFilesContext, InvalidationEventReceiver { public readonly name = 'flowr-analyzer-files-context'; public readonly loadingOrder: FlowrAnalyzerLoadingOrderContext; @@ -158,6 +161,20 @@ export class FlowrAnalyzerFilesContext extends AbstractFlowrAnalyzerContext(Object.values(FileRole).map(k => [k, []])) as RoleBasedFiles; } + receive(event: InvalidationEvent): void { + const type = event.type; + switch(type) { + case InvalidationEventType.Full: + this.reset(); + break; + case InvalidationEventType.FileInvalidate: + // nothing to do + break; + default: + assertUnreachable(type); + } + } + /** * Record that a file has been considered during dataflow analysis. */ @@ -357,4 +374,8 @@ export class FlowrAnalyzerFilesContext extends AbstractFlowrAnalyzerContext = new Map(); + private readonly context: FlowrAnalyzerContext; + /** + * The files that have been changed since the last analysis mapping to their old content. + */ + private changedFilesWithOldContent: Map = new Map(); + private oldParseResults: Map = new Map(); + + + constructor(context: FlowrAnalyzerContext) { + this.context = context; + } public reset(): void { - this.reparseInfoMap = new Map(); + this.changedFilesWithOldContent = new Map(); + this.oldParseResults = new Map(); } - public storeReparseInfo(filePath: string, reparseInfo?: ReparseInfo): void { - if(reparseInfo) { - this.reparseInfoMap.set(filePath, reparseInfo); + handleFileInvalidate(filePath: FilePath, oldContent: string): void { + if(this.changedFilesWithOldContent.has(filePath)) { + // If a file is changed multiple times since the last analysis, we only want to store the original old content as the old analysis results were computed with that. + return; } + + this.changedFilesWithOldContent.set(filePath, oldContent); + } + + receive(event: InvalidationEvent): void { + const type = event.type; + switch(type) { + case InvalidationEventType.Full: + this.reset(); + break; + case InvalidationEventType.FileInvalidate: + this.handleFileInvalidate(event.filePath, event.oldContent?.toString() ?? ''); + break; + default: + assertUnreachable(type); + } + } + + public storeOldParseResults(parseStepOutput: ParseStepOutput): void { + for(const parsedStepSingleOutput of parseStepOutput.files) { + if(parsedStepSingleOutput.filePath === undefined) { + // there could be multiple files without a file path, making a distinction impossible + continue; + } + + this.oldParseResults.set(parsedStepSingleOutput.filePath, parsedStepSingleOutput.parsed); + } + } + + public getOldParseResultOf(filePath: FilePath): Parser.Tree | undefined { + return this.oldParseResults.get(filePath); } - public getAndRemoveParseInfo(filePath: string): ReparseInfo | undefined { - const reparseInfo = this.reparseInfoMap.get(filePath); - this.reparseInfoMap.delete(filePath); - return reparseInfo; + public getAndRemoveOldContentOf(filePath: FilePath): string | undefined { + const oldContent = this.changedFilesWithOldContent.get(filePath); + this.changedFilesWithOldContent.delete(filePath); + return oldContent; } } diff --git a/src/project/context/flowr-analyzer-meta-context.ts b/src/project/context/flowr-analyzer-meta-context.ts index 01913b05a81..38eb9a4d0bb 100644 --- a/src/project/context/flowr-analyzer-meta-context.ts +++ b/src/project/context/flowr-analyzer-meta-context.ts @@ -1,4 +1,7 @@ import type { SemVer } from 'semver'; +import type { InvalidationEvent, InvalidationEventReceiver } from '../cache/flowr-cache'; +import { InvalidationEventType } from '../cache/flowr-cache'; +import { assertUnreachable } from '../../util/assert'; export interface ReadOnlyFlowrAnalyzerMetaContext { @@ -30,7 +33,7 @@ export interface ReadOnlyFlowrAnalyzerMetaContext { * * If you are interested in inspecting this metadata, refer to {@link ReadOnlyFlowrAnalyzerMetaContext}. */ -export class FlowrAnalyzerMetaContext implements ReadOnlyFlowrAnalyzerMetaContext { +export class FlowrAnalyzerMetaContext implements ReadOnlyFlowrAnalyzerMetaContext, InvalidationEventReceiver { public readonly name = 'flowr-analyzer-meta-context'; private projectName: string | undefined; private projectTitle: string | undefined; @@ -44,6 +47,20 @@ export class FlowrAnalyzerMetaContext implements ReadOnlyFlowrAnalyzerMetaContex this.namespace = undefined; } + receive(event: InvalidationEvent): void { + const type = event.type; + switch(type) { + case InvalidationEventType.Full: + this.reset(); + break; + case InvalidationEventType.FileInvalidate: + // nothing to do + break; + default: + assertUnreachable(type); + } + } + public setProjectName(name: string): void { this.projectName = name; } diff --git a/src/project/context/flowr-file.ts b/src/project/context/flowr-file.ts index 7f6a0569ac9..137dc12473f 100644 --- a/src/project/context/flowr-file.ts +++ b/src/project/context/flowr-file.ts @@ -178,7 +178,7 @@ export abstract class FlowrFile implements console.log('parseStandalone'); const request = isParseRequest(data) ? data : requestFromInput(data); if(this.parser.name === 'tree-sitter') { - return this.parser.parse(request, this.ctx.inc); + return this.parser.parse(request, this.ctx); } else { const ts = new TreeSitterExecutor(); - return ts.parse(request, this.ctx.inc); + return ts.parse(request, this.ctx); } } diff --git a/src/project/incremental/incremental-parse/edit-computation.ts b/src/project/incremental/incremental-parse/edit-computation.ts index 099ad9bc82b..2ad92b406b7 100644 --- a/src/project/incremental/incremental-parse/edit-computation.ts +++ b/src/project/incremental/incremental-parse/edit-computation.ts @@ -14,8 +14,8 @@ export function computeEditRegion(oldContent: string, newContent: string): Parse let startIndex = 0; while( startIndex < oldLen && - startIndex < newLen && - oldContent[startIndex] === newContent[startIndex] + startIndex < newLen && + oldContent[startIndex] === newContent[startIndex] ) { startIndex++; } @@ -25,8 +25,8 @@ export function computeEditRegion(oldContent: string, newContent: string): Parse let newSuffixIndex = newLen; while( oldSuffixIndex > startIndex && - newSuffixIndex > startIndex && - oldContent[oldSuffixIndex - 1] === newContent[newSuffixIndex - 1] + newSuffixIndex > startIndex && + oldContent[oldSuffixIndex - 1] === newContent[newSuffixIndex - 1] ) { oldSuffixIndex--; newSuffixIndex--; diff --git a/src/project/incremental/incremental-parse/incremental-parse.ts b/src/project/incremental/incremental-parse/incremental-parse.ts index b10bbb826b6..2ca1738c418 100644 --- a/src/project/incremental/incremental-parse/incremental-parse.ts +++ b/src/project/incremental/incremental-parse/incremental-parse.ts @@ -1,25 +1,47 @@ import type { FlowrAnalyzerContext } from '../../context/flowr-analyzer-context'; -import type { InvalidationEvent } from '../../cache/flowr-cache'; -import { InvalidationEventType } from '../../cache/flowr-cache'; -import { FileRole } from '../../context/flowr-file'; +import type Parser from 'web-tree-sitter'; +import type { FilePath } from '../../context/flowr-file'; +import { computeEditRegion } from './edit-computation'; + + +export interface ReparseInfo { + readonly previousTree: Parser.Tree; + readonly editRegion: Parser.Edit | undefined; +} + /** - * Is this file even relevant to us? + * Computes the information needed to reparse a file incrementally with tree-sitter. + * Returns `undefined` if incremental reparsing is not possible. */ -export function coarseCheckWhetherToInvalidate(ctx: FlowrAnalyzerContext, event: InvalidationEvent): boolean { - if(event?.type === InvalidationEventType.Full) { - return true; +export function computeReparseInfo(ctx: FlowrAnalyzerContext, filePath: FilePath): ReparseInfo | undefined { + const previousTree = ctx.inc.getOldParseResultOf(filePath); + if(!previousTree) { + // this file was not parsed before + return undefined; } - if(!event.file.roles?.includes(FileRole.Source) && !event.file.roles?.includes(FileRole.Description)) { - return false; + const oldContent = ctx.inc.getAndRemoveOldContentOf(filePath); + if(oldContent === undefined) { + // this file has not been invalidated since the last parse, no reparse needed + return { + previousTree, + editRegion: undefined + }; } - /* - if(!ctx.files.consideredFilesList().includes(event.file.path())) { - return false; + const newContent = ctx.files.getFile(filePath)?.content().toString() ?? ''; + if(newContent === oldContent) { + // this file was invalidated, but the content did not change, no reparse needed + return { + previousTree, + editRegion: undefined + }; } - */ - return event.oldContent !== event.file.content(); + const editRegion = computeEditRegion(oldContent, newContent); + return { + previousTree, + editRegion + }; } \ No newline at end of file diff --git a/src/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.ts b/src/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.ts index 874b10866aa..57aae3e13d1 100644 --- a/src/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.ts +++ b/src/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.ts @@ -6,9 +6,8 @@ import type { TreeSitterEngineConfig } from '../../../config'; import { log } from '../../../util/log'; import fs from 'fs'; import type { ReadonlyFlowrAnalysisProvider } from '../../../project/flowr-analyzer'; -import type { - FlowrAnalyzerIncrementalAnalysisContext -} from '../../../project/context/flowr-analyzer-incremental-analysis-context'; +import type { FlowrAnalyzerContext } from '../../../project/context/flowr-analyzer-context'; +import { computeReparseInfo } from '../../../project/incremental/incremental-parse/incremental-parse'; export const DEFAULT_TREE_SITTER_R_WASM_PATH = './node_modules/@eagleoutice/tree-sitter-r/tree-sitter-r.wasm'; export const DEFAULT_TREE_SITTER_WASM_PATH = './node_modules/web-tree-sitter/tree-sitter.wasm'; @@ -72,7 +71,7 @@ export class TreeSitterExecutor implements SyncParser { return this.parser.getLanguage().version; } - public parse(request: RParseRequest & { filePath?: string }, inc: FlowrAnalyzerIncrementalAnalysisContext | undefined): Parser.Tree { + public parse(request: RParseRequest & { filePath?: string }, ctx: FlowrAnalyzerContext): Parser.Tree { let sourceCode: string; if(request.request === 'file') { sourceCode = fs.readFileSync(request.content, 'utf8'); @@ -80,15 +79,23 @@ export class TreeSitterExecutor implements SyncParser { sourceCode = request.content; } - if(inc && request.filePath !== undefined) { - const reparseInfo = inc.getAndRemoveParseInfo(request.filePath); - if(reparseInfo && reparseInfo.previousTree && typeof reparseInfo.previousTree !== 'string') { - const previousTree = reparseInfo.previousTree; - previousTree.edit(reparseInfo.editRegion); - return this.parser.parse(sourceCode, previousTree); - } + if(request.filePath === undefined) { + return this.parser.parse(sourceCode); + } + + const reparseInfo = computeReparseInfo(ctx, request.filePath); + if(!reparseInfo) { + // incremental parsing not possible + return this.parser.parse(sourceCode); } - return this.parser.parse(sourceCode); + + if(!reparseInfo.editRegion) { + return reparseInfo.previousTree; + } + + const previousTree = reparseInfo.previousTree; + previousTree.edit(reparseInfo.editRegion); + return this.parser.parse(sourceCode, previousTree); } public createQuery(source: string): Query { diff --git a/src/r-bridge/parser.ts b/src/r-bridge/parser.ts index 28e2cb7a808..fa8f4733c6f 100644 --- a/src/r-bridge/parser.ts +++ b/src/r-bridge/parser.ts @@ -5,7 +5,6 @@ import type { TreeSitterExecutor } from './lang-4.x/tree-sitter/tree-sitter-exec import type { Query, QueryCapture, SyntaxNode } from 'web-tree-sitter'; import type { FlowrAnalysisProvider } from '../project/flowr-analyzer'; import type { FlowrAnalyzerContext } from '../project/context/flowr-analyzer-context'; -import type { FlowrAnalyzerIncrementalAnalysisContext } from '../project/context/flowr-analyzer-incremental-analysis-context'; interface ParserContent { readonly name: string; @@ -16,10 +15,10 @@ interface ParserContent { information(analyzer: FlowrAnalysisProvider): BaseParserInformation; /** - * Parses the given request and uses the provided incremental context (only if the parser + * Parses the given request and uses the provided context (only if the parser * itself supports incrementality {@link ParserContent#incremental}). */ - parse(request: RParseRequestFromText & { filePath?: string }, inc: FlowrAnalyzerIncrementalAnalysisContext | undefined): T; + parse(request: RParseRequestFromText & { filePath?: string }, inc: FlowrAnalyzerContext | undefined): T; close(): void; } @@ -112,7 +111,7 @@ Promise> { /* sadly we cannot Promise.all with the Rshell as it has to process commands in order and is not thread safe */ const files: ParseStepOutputSingleFile[] = []; for(const req of translatedRequests) { - const parsed = await (input.parser).parse(req.r, ctx.inc); + const parsed = await (input.parser).parse(req.r, ctx); files.push({ parsed, filePath: req.path, @@ -128,7 +127,7 @@ Promise> { files: translatedRequests.map(r => { const withPath: RParseRequestFromText & { filePath?: string } = r.r; withPath.filePath = r.path; - const parsed = p.parse(withPath, ctx.inc); + const parsed = p.parse(withPath, ctx); return { parsed, filePath: r.path, From d6ea3520690057b15370a4f63080674fb276cb5e Mon Sep 17 00:00:00 2001 From: Jonathan Riesland Date: Sat, 4 Apr 2026 20:01:11 +0200 Subject: [PATCH 18/20] test-fix: cover direct reuse of unchanged parse trees Adapt the incremental parsing tests to the new architecture where incremental state is derived lazily from the previous successful parse run instead of from eagerly stored reparse info. The tests no longer inspect the removed reparseInfoMap. Instead they now: - capture the previous Tree-sitter trees after the first analysis run - invalidate files by updating their content - verify that invalidation clears the current parse pipeline - trace the second parse run to observe which previous tree is reused for which file - assert that changed files use their own previous tree for incremental reparsing - assert that unchanged files reuse their previous tree directly without reparsing This specifically covers the mixed case that motivated the refactor: when one file changes and another does not, the changed file must be reparsed incrementally and the unchanged file must keep its old parse tree rather than being parsed from scratch. --- .../incremental/incremental-parsing.test.ts | 1020 +++++++++-------- 1 file changed, 542 insertions(+), 478 deletions(-) diff --git a/test/functionality/incremental/incremental-parsing.test.ts b/test/functionality/incremental/incremental-parsing.test.ts index 1f5075e384b..04356645443 100644 --- a/test/functionality/incremental/incremental-parsing.test.ts +++ b/test/functionality/incremental/incremental-parsing.test.ts @@ -4,6 +4,9 @@ import { FlowrInlineTextFile } from '../../../src/project/context/flowr-file'; import type { NormalizedAst } from '../../../src/r-bridge/lang-4.x/ast/model/processing/decorate'; import { printNormalizedAstToMermaid } from '../../../src/core/print/normalize-printer'; import type { TreeSitterExecutor } from '../../../src/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor'; +import type { FlowrAnalyzer } from '../../../src/project/flowr-analyzer'; +import type { Tree } from 'web-tree-sitter'; +import type { ParseStepOutput, ParseStepOutputSingleFile } from '../../../src/r-bridge/parser'; interface IncrementalParsingTestInput { @@ -12,14 +15,99 @@ interface IncrementalParsingTestInput { updatedContent: string; } -interface SingleFileCase { - name: string; - input: IncrementalParsingTestInput; +interface IncrementalParseCall { + filePath: string; + previousTree: Tree | undefined; } -interface MultiFileCase { - name: string; - inputs: IncrementalParsingTestInput[]; +async function traceIncrementalParseCalls( + analyzer: FlowrAnalyzer, + run: () => Promise +): Promise<{ result: T; incrementalParseCalls: IncrementalParseCall[] }> { + const executor = analyzer['parser'] as TreeSitterExecutor; + const parser = executor['parser']; + const originalExecutorParse = executor.parse.bind(executor); + const originalParserParse = parser.parse.bind(parser); + let currentFilePath: string | undefined; + const incrementalParseCalls: IncrementalParseCall[] = []; + + // The outer executor still knows which file is being parsed, so we capture that path for the nested Tree-sitter call. + const executorSpy = vi.spyOn(executor, 'parse').mockImplementation((request, ctx) => { + currentFilePath = request.filePath; + try { + return originalExecutorParse(request, ctx); + } finally { + currentFilePath = undefined; + } + }); + + // The inner Tree-sitter parser sees the reused previous tree but not the file path, so we pair it with the path captured above. + const parserSpy = vi.spyOn(parser, 'parse').mockImplementation((sourceCode, previousTree) => { + assert(currentFilePath !== undefined, 'inner Tree-sitter parse should only be called while handling a file-backed parse request'); + incrementalParseCalls.push({ + filePath: currentFilePath, + previousTree: previousTree + }); + return originalParserParse(sourceCode, previousTree); + }); + + try { + return { + result: await run(), + incrementalParseCalls + }; + } finally { + executorSpy.mockRestore(); + parserSpy.mockRestore(); + } +} + +function capturePreviousTrees(analyzer: FlowrAnalyzer): Map { + const initialParse = analyzer.peekParse(); + assert(initialParse !== undefined); + + const previousTrees = new Map(); + for(const parsedFile of initialParse.files) { + assert(parsedFile.filePath !== undefined); + previousTrees.set(parsedFile.filePath, parsedFile.parsed as Tree); + } + return previousTrees; +} + +function assertChangedFilesUseIncrementalParse( + inputs: readonly IncrementalParsingTestInput[], + previousTrees: ReadonlyMap, + incrementalParseCalls: readonly IncrementalParseCall[] +): void { + const changedInputs = inputs.filter(input => input.updatedContent !== input.originalContent); + expect(incrementalParseCalls).toHaveLength(changedInputs.length); + + for(const changedInput of changedInputs) { + const previousTree = previousTrees.get(changedInput.path); + assert(previousTree !== undefined, `Missing previous tree for ${changedInput.path}`); + + const parseCall = incrementalParseCalls.find(call => call.filePath === changedInput.path); + assert(parseCall !== undefined, `Missing incremental parse call for ${changedInput.path}`); + expect(parseCall.previousTree).toBe(previousTree); + } +} + +function assertUnchangedFilesReusePreviousTrees( + inputs: readonly IncrementalParsingTestInput[], + previousTrees: ReadonlyMap, + reparsedTrees: ParseStepOutputSingleFile[] +): void { + for(const input of inputs) { + if(input.updatedContent !== input.originalContent) { + continue; + } + + const previousTree = previousTrees.get(input.path); + const reparsedTree = reparsedTrees.find(file => file.filePath === input.path)?.parsed; + assert(previousTree !== undefined, `Missing previous tree for ${input.path}`); + assert(reparsedTree !== undefined, `Missing reparsed tree for ${input.path}`); + expect(reparsedTree, `no-op invalidation should reuse the previous tree for ${input.path}`).toBe(previousTree); + } } @@ -49,26 +137,21 @@ async function executeIncrementalParse(inputs: readonly IncrementalParsingTestIn } await analyzer.normalize(); + const previousTrees = capturePreviousTrees(analyzer); for(const input of inputs) { - const f = files.get(input.path); - f?.updateInlineContent(input.updatedContent); + files.get(input.path)?.updateInlineContent(input.updatedContent); } + assert(analyzer.peekParse() === undefined, 'changing the content of parsed files should reset the previous pipeline'); - const parser = analyzer['parser'] as TreeSitterExecutor; - const parseSpy = vi.spyOn(parser, 'parse'); - const result = await analyzer.normalize(); + const { result, incrementalParseCalls } = await traceIncrementalParseCalls( + analyzer, + async() => await analyzer.normalize() + ); + const reparsed = analyzer.peekParse() as ParseStepOutput; + assert(reparsed !== undefined, 'after parsing once more, the pipeline must contain the analysis results again'); - const parseCalls = parseSpy.mock.calls; - let callIdx = 0; - for(const input of inputs ?? []) { - if(input.originalContent.trim() !== '' && input.updatedContent !== input.originalContent) { - // check if incremental parse was attempted, i.e., if a previousTree was provided when parsing again - const [, previousTreeArg] = parseCalls[callIdx]; - expect(previousTreeArg, `file ${input.path}: expected incremental parse`).toBeDefined(); - - callIdx++; - } - } + assertChangedFilesUseIncrementalParse(inputs, previousTrees, incrementalParseCalls); + assertUnchangedFilesReusePreviousTrees(inputs, previousTrees, reparsed.files); return result; } @@ -96,498 +179,479 @@ const file = ( updatedContent }); -const singleFileCase = ( - name: string, - originalContent: string, - updatedContent: string -): SingleFileCase => ({ - name, - input: { - path: 'a.R', - originalContent, - updatedContent - } -}); - -const singleFileNoOpCases: SingleFileCase[] = [ - singleFileCase('empty file', '', ''), - singleFileCase('file with top-level content', 'x <- 42', 'x <- 42'), - singleFileCase( - 'file with nested content', - lines( - 'f <- function(x) {', - '\ty <- x + 1', - '\tprint(y)', - '}' - ), - lines( - 'f <- function(x) {', - '\ty <- x + 1', - '\tprint(y)', - '}' - ) - ), - singleFileCase('syntactically invalid file', 'print(', 'print(') -]; - -const singleFileInsertCases: SingleFileCase[] = [ - singleFileCase('one full line into an empty file', '', 'x <- 42'), - singleFileCase( - 'one full line at the start of a file', - 'x <- 42', - lines('y <- 21', 'x <- 42') - ), - singleFileCase( - 'one full line in the middle of a file', - lines('x <- 42', 'print(x)'), - lines('x <- 42', 'x <- 2 * x', 'print(x)') - ), - singleFileCase( - 'one full line at the end of a file', - 'x <- 42', - lines('x <- 42', 'print(x)') - ), - singleFileCase( - 'multiple lines into an empty file', - '', - lines('x <- 42', 'y <- 21', 'z <- 10') - ), - singleFileCase( - 'multiple lines at different positions', - lines('x <- 42', 'print(x)'), - lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)') - ), - singleFileCase('a single character inside a number', 'x <- 42', 'x <- 420'), - singleFileCase('a single character inside an identifier', 'x <- 42', 'xy <- 42'), - singleFileCase('a token inside an expression', 'x <- 1 + 2', 'x <- 1 + 2 + 3'), - singleFileCase( - 'a token inside a nested argument list', - 'print(sum(1, 3))', - 'print(sum(1, 2, 3))' - ), - singleFileCase('a trailing newline at end of file', 'x <- 42', 'x <- 42\n') -]; - -const singleFileRemoveCases: SingleFileCase[] = [ - singleFileCase('one full line such that the file becomes empty', 'x <- 42', ''), - singleFileCase( - 'one full line at the start of a file', - lines('y <- 21', 'x <- 42'), - 'x <- 42' - ), - singleFileCase( - 'one full line in the middle of a file', - lines('x <- 42', 'x <- 2 * x', 'print(x)'), - lines('x <- 42', 'print(x)') - ), - singleFileCase( - 'one full line at the end of a file', - lines('x <- 42', 'print(x)'), - 'x <- 42' - ), - singleFileCase( - 'multiple lines such that the file becomes empty', - lines('x <- 42', 'y <- 21', 'z <- 10'), - '' - ), - singleFileCase( - 'multiple lines at different positions', - lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'), - lines('x <- 42', 'print(x)') - ), - singleFileCase('a single character from a number', 'x <- 420', 'x <- 42'), - singleFileCase('a single character from an identifier', 'xy <- 42', 'x <- 42'), - singleFileCase('a token from an expression', 'x <- 1 + 2 + 3', 'x <- 1 + 2'), - singleFileCase( - 'a token from a nested argument list', - 'print(sum(1, 2, 3))', - 'print(sum(1, 3))' - ), - singleFileCase('a trailing newline at end of file', 'x <- 42\n', 'x <- 42') -]; - -const singleFileReplaceCases: SingleFileCase[] = [ - singleFileCase( - 'one full line at the start of a file', - lines('y <- 21', 'x <- 42'), - lines('x <- 84', 'x <- 42') - ), - singleFileCase( - 'one full line in the middle of a file', - lines('x <- 42', 'x <- 2 * x', 'print(x)'), - lines('x <- 42', 'y <- 21', 'print(x)') - ), - singleFileCase( - 'one full line at the end of a file', - lines('x <- 42', 'print(x)'), - lines('x <- 42', 'x <- x * x') - ), - singleFileCase( - 'a partially replaced multi-line region', - lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'), - lines('y <- 21', 'x <- 21', 'y <- y * y', 'print(x)', 'print(y)') - ), - singleFileCase( - 'a fully replaced content', - lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'), - lines('z <- 10', 'z <- z + 32', 'print(z)') - ), - singleFileCase('a single character in a number', 'x <- 42', 'x <- 43'), - singleFileCase('an operator token', 'x <- 1 + 2', 'x <- 1 * 2'), - singleFileCase( - 'an identifier token', - lines('x <- 42', 'print(x)'), - lines('value <- 42', 'print(value)') - ), - singleFileCase( - 'part of a single line expression', - 'x <- (1 + 2) * 3', - 'x <- (1 + 20) * 3' - ), - singleFileCase('whitespace only on a single line', 'x <- 42', 'x <- 42'), - singleFileCase( - 'whitespace only across multiple lines', - lines('f <- function(x) {', '\ty <- x + 1', '\tprint(y)', '}'), - lines('f <- function(x) {', '\t', '\ty <- x + 1', '\tprint(y)', '}') - ), - singleFileCase('comment text', 'x <- 42 # old comment', 'x <- 42 # new comment'), - singleFileCase('a string literal', 'msg <- "abc"', 'msg <- "abcd"'), - singleFileCase('a UTF-8 string literal', 'msg <- "äöü"', 'msg <- "äöü€"'), - singleFileCase('a UTF-8 comment', 'x <- 42 # gruß', 'x <- 42 # grüße €') -]; - -const singleFileSyntaxTransitionCases: SingleFileCase[] = [ - singleFileCase( - 'valid to invalid by removing the right-hand side of an assignment', - 'x <- 42', - 'x <-' - ), - singleFileCase( - 'valid to invalid by removing a closing brace', - lines( - 'f <- function(x) {', - '\tprint(x)', - '}' - ), - lines( - 'f <- function(x) {', - '\tprint(x)' - ) - ), - singleFileCase( - 'valid to invalid by removing a closing parenthesis', - 'print(sum(1, 2))', - 'print(sum(1, 2)' - ), - singleFileCase( - 'invalid to valid by completing an assignment', - 'x <-', - 'x <- 42' - ), - singleFileCase( - 'invalid to valid by restoring a closing brace', - lines( - 'f <- function(x) {', - '\tprint(x)' - ), - lines( - 'f <- function(x) {', - '\tprint(x)', - '}' - ) - ), - singleFileCase( - 'invalid to valid by restoring a closing parenthesis', - 'print(sum(1, 2)', - 'print(sum(1, 2))' - ), - singleFileCase( - 'invalid to invalid across different incomplete forms', - 'print(', - 'function(,' - ) -]; - -const singleFileNestedStructureCases: SingleFileCase[] = [ - singleFileCase( - 'inside a function body', - lines( - 'f <- function(x) {', - '\ty <- x + 1', - '\tprint(y)', - '}' - ), - lines( - 'f <- function(x) {', - '\ty <- x * 2', - '\tprint(y)', - '}' - ) - ), - singleFileCase( - 'inside an if branch', - lines( - 'if (x > 0) {', - '\ty <- 1', - '}' - ), - lines( - 'if (x > 0) {', - '\ty <- 1', - '\tz <- 2', - '}' - ) - ), - singleFileCase( - 'inside a for loop body', - lines( - 'for (i in 1:3) {', - '\tprint(i)', - '}' - ), - lines( - 'for (i in 1:3) {', - '\ttotal <- i + 1', - '\tprint(total)', - '}' - ) - ), - singleFileCase( - 'inside a nested argument list', - 'print(sum(1, 2, 3))', - 'print(sum(1, 20, 3))' - ), - singleFileCase( - 'inside nested brackets and subexpressions', - 'x <- list(a = list(b = 1))', - 'x <- list(a = list(b = 2))' - ) -]; - -const repeatedUpdatePairwiseCases: SingleFileCase[] = [ - singleFileCase( - 'sequence step 1: empty file to initial assignment', - '', - 'x <- 1' - ), - singleFileCase( - 'sequence step 2: initial assignment to character-level update', - 'x <- 1', - 'x <- 10' - ), - singleFileCase( - 'sequence step 3: character-level update to added statement', - 'x <- 10', - lines('x <- 10', 'print(x)') - ), - singleFileCase( - 'sequence step 4: added statement to nested function', - lines('x <- 10', 'print(x)'), - lines( - 'f <- function() {', - '\tprint(x)', - '}', - 'f()' - ) - ), - singleFileCase( - 'sequence step 5: nested function to temporarily invalid syntax', - lines( - 'f <- function() {', - '\tprint(x)', - '}', - 'f()' - ), - lines( - 'f <- function() {', - '\tprint(x)', - 'f()' - ) - ), - singleFileCase( - 'sequence step 6: temporarily invalid syntax back to valid syntax', - lines( - 'f <- function() {', - '\tprint(x)', - 'f()' - ), - lines( - 'f <- function() {', - '\tprint(x)', - '}', - 'f()' - ) - ) -]; - -const multiFileCases: MultiFileCase[] = [ - { - name: 'editing only the first file while the second file stays unchanged', - inputs: [ - file('a.R', lines('x <- 42', 'print(x)'), lines('x <- 42', 'x <- x + 1', 'print(x)')), - file('b.R', lines('y <- 21', 'print(y)'), lines('y <- 21', 'print(y)')) - ] - }, - { - name: 'editing only the second file while the first file stays unchanged', - inputs: [ - file('a.R', lines('x <- 42', 'print(x)'), lines('x <- 42', 'print(x)')), - file('b.R', lines('y <- 21', 'print(y)'), lines('y <- 21', 'y <- y * 2', 'print(y)')) - ] - }, - { - name: 'editing both files independently in the same run', - inputs: [ - file('a.R', lines('x <- 1', 'print(x)'), lines('x <- 2', 'x <- x * 3', 'print(x)')), - file('b.R', lines('y <- 10', 'print(y)'), lines('z <- 10', 'print(z + 1)')) - ] - }, - { - name: 'adding a new file while another file stays unchanged', - inputs: [ - file('a.R', lines('x <- 42', 'print(x)'), lines('x <- 42', 'print(x)')), - file('b.R', '', lines('helper <- function(x) {', '\tx * 2', '}', 'print(helper(21))')) - ] - }, - { - name: 'removing one file while another file stays unchanged', - inputs: [ - file('a.R', lines('x <- 42', 'print(x)'), lines('x <- 42', 'print(x)')), - file('b.R', lines('tmp <- 1', 'print(tmp)'), '') - ] - }, - { - name: 'mixing file modification, file addition, and file removal in one run', - inputs: [ - file('a.R', lines('x <- 1', 'print(x)'), lines('x <- 1', 'x <- x + 1', 'print(x)')), - file('b.R', '', lines('y <- 21', 'print(y)')), - file('c.R', lines('obsolete <- TRUE', 'print(obsolete)'), '') - ] - }, - { - name: 'making one file invalid while another file remains unchanged and valid', - inputs: [ - file( - 'a.R', - lines('f <- function(x) {', '\tprint(x)', '}'), - lines('f <- function(x) {', '\tprint(x)') - ), - file('b.R', lines('y <- 21', 'print(y)'), lines('y <- 21', 'print(y)')) - ] - }, - { - name: 'editing UTF-8 content in one file while another file stays unchanged', - inputs: [ - file('a.R', lines('msg <- "äöü"', 'print(msg)'), lines('msg <- "äöü€"', 'print(msg)')), - file('b.R', lines('x <- 42', 'print(x)'), lines('x <- 42', 'print(x)')) - ] - }, - { - name: 'editing inside a nested construct in one file and at top level in another', - inputs: [ - file( - 'a.R', - lines('f <- function(x) {', '\ty <- x + 1', '\tprint(y)', '}'), - lines('f <- function(x) {', '\ty <- x * 2', '\tprint(y)', '}') - ), - file('b.R', lines('z <- 3', 'print(z)'), lines('z <- 3', 'z <- z + 1', 'print(z)')) - ] - }, - { - name: 'editing only one of two syntactically invalid files', - inputs: [ - file('a.R', 'print(', 'print(1)'), - file('b.R', 'x <-', 'x <-') - ] - } -]; - -const repeatedMultiFilePairwiseCases: MultiFileCase[] = [ - { - name: 'pairwise sequence step 1 across files', - inputs: [ - file('a.R', '', 'x <- 1'), - file('b.R', '', 'y <- 2') - ] - }, - { - name: 'pairwise sequence step 2 across files', - inputs: [ - file('a.R', 'x <- 1', lines('x <- 1', 'print(x)')), - file('b.R', 'y <- 2', 'y <- 20') - ] - }, - { - name: 'pairwise sequence step 3 across files with temporary invalidity', - inputs: [ - file('a.R', lines('x <- 1', 'print(x)'), 'x <-'), - file('b.R', 'y <- 20', lines('f <- function() {', '\tprint(y)', '}', 'f()')) - ] - }, - { - name: 'pairwise sequence step 4 across files returning to valid syntax', - inputs: [ - file('a.R', 'x <-', lines('x <- 1', 'print(x)')), - file('b.R', lines('f <- function() {', '\tprint(y)', '}', 'f()'), lines('f <- function() {', '\tprint(y + 1)', '}', 'f()')) - ] - } -]; - describe('Incremental Parsing produces same results as Full Parsing', () => { describe('single-file', () => { describe('no-op', () => { - it.each(singleFileNoOpCases)('$name', async({ input }) => { - await executeAndCompareResults([input]); + it('empty file', async() => { + await executeAndCompareResults([file('a.R', '', '')]); + }); + + it('file with top-level content', async() => { + await executeAndCompareResults([file('a.R', 'x <- 42', 'x <- 42')]); + }); + + it('file with nested content', async() => { + await executeAndCompareResults([file( + 'a.R', + lines( + 'f <- function(x) {', + '\ty <- x + 1', + '\tprint(y)', + '}' + ), + lines( + 'f <- function(x) {', + '\ty <- x + 1', + '\tprint(y)', + '}' + ) + )]); + }); + + it('syntactically invalid file', async() => { + await executeAndCompareResults([file('a.R', 'print(', 'print(')]); }); }); describe('insert', () => { - it.each(singleFileInsertCases)('$name', async({ input }) => { - await executeAndCompareResults([input]); + it('one full line into an empty file', async() => { + await executeAndCompareResults([file('a.R', '', 'x <- 42')]); + }); + + it('one full line at the start of a file', async() => { + await executeAndCompareResults([file('a.R', 'x <- 42', lines('y <- 21', 'x <- 42'))]); + }); + + it('one full line in the middle of a file', async() => { + await executeAndCompareResults([file('a.R', lines('x <- 42', 'print(x)'), lines('x <- 42', 'x <- 2 * x', 'print(x)'))]); + }); + + it('one full line at the end of a file', async() => { + await executeAndCompareResults([file('a.R', 'x <- 42', lines('x <- 42', 'print(x)'))]); + }); + + it('multiple lines into an empty file', async() => { + await executeAndCompareResults([file('a.R', '', lines('x <- 42', 'y <- 21', 'z <- 10'))]); + }); + + it('multiple lines at different positions', async() => { + await executeAndCompareResults([file('a.R', lines('x <- 42', 'print(x)'), lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'))]); + }); + + it('a single character inside a number', async() => { + await executeAndCompareResults([file('a.R', 'x <- 42', 'x <- 420')]); + }); + + it('a single character inside an identifier', async() => { + await executeAndCompareResults([file('a.R', 'x <- 42', 'xy <- 42')]); + }); + + it('a token inside an expression', async() => { + await executeAndCompareResults([file('a.R', 'x <- 1 + 2', 'x <- 1 + 2 + 3')]); + }); + + it('a token inside a nested argument list', async() => { + await executeAndCompareResults([file('a.R', 'print(sum(1, 3))', 'print(sum(1, 2, 3))')]); + }); + + it('a trailing newline at end of file', async() => { + await executeAndCompareResults([file('a.R', 'x <- 42', 'x <- 42\n')]); }); }); describe('remove', () => { - it.each(singleFileRemoveCases)('$name', async({ input }) => { - await executeAndCompareResults([input]); + it('one full line such that the file becomes empty', async() => { + await executeAndCompareResults([file('a.R', 'x <- 42', '')]); + }); + + it('one full line at the start of a file', async() => { + await executeAndCompareResults([file('a.R', lines('y <- 21', 'x <- 42'), 'x <- 42')]); + }); + + it('one full line in the middle of a file', async() => { + await executeAndCompareResults([file('a.R', lines('x <- 42', 'x <- 2 * x', 'print(x)'), lines('x <- 42', 'print(x)'))]); + }); + + it('one full line at the end of a file', async() => { + await executeAndCompareResults([file('a.R', lines('x <- 42', 'print(x)'), 'x <- 42')]); + }); + + it('multiple lines such that the file becomes empty', async() => { + await executeAndCompareResults([file('a.R', lines('x <- 42', 'y <- 21', 'z <- 10'), '')]); + }); + + it('multiple lines at different positions', async() => { + await executeAndCompareResults([file('a.R', lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'), lines('x <- 42', 'print(x)'))]); + }); + + it('a single character from a number', async() => { + await executeAndCompareResults([file('a.R', 'x <- 420', 'x <- 42')]); + }); + + it('a single character from an identifier', async() => { + await executeAndCompareResults([file('a.R', 'xy <- 42', 'x <- 42')]); + }); + + it('a token from an expression', async() => { + await executeAndCompareResults([file('a.R', 'x <- 1 + 2 + 3', 'x <- 1 + 2')]); + }); + + it('a token from a nested argument list', async() => { + await executeAndCompareResults([file('a.R', 'print(sum(1, 2, 3))', 'print(sum(1, 3))')]); + }); + + it('a trailing newline at end of file', async() => { + await executeAndCompareResults([file('a.R', 'x <- 42\n', 'x <- 42')]); }); }); describe('replace', () => { - it.each(singleFileReplaceCases)('$name', async({ input }) => { - await executeAndCompareResults([input]); + it('one full line at the start of a file', async() => { + await executeAndCompareResults([file('a.R', lines('y <- 21', 'x <- 42'), lines('x <- 84', 'x <- 42'))]); + }); + + it('one full line in the middle of a file', async() => { + await executeAndCompareResults([file('a.R', lines('x <- 42', 'x <- 2 * x', 'print(x)'), lines('x <- 42', 'y <- 21', 'print(x)'))]); + }); + + it('one full line at the end of a file', async() => { + await executeAndCompareResults([file('a.R', lines('x <- 42', 'print(x)'), lines('x <- 42', 'x <- x * x'))]); + }); + + it('a partially replaced multi-line region', async() => { + await executeAndCompareResults([file('a.R', lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'), lines('y <- 21', 'x <- 21', 'y <- y * y', 'print(x)', 'print(y)'))]); + }); + + it('a fully replaced content', async() => { + await executeAndCompareResults([file('a.R', lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'), lines('z <- 10', 'z <- z + 32', 'print(z)'))]); + }); + + it('a single character in a number', async() => { + await executeAndCompareResults([file('a.R', 'x <- 42', 'x <- 43')]); + }); + + it('an operator token', async() => { + await executeAndCompareResults([file('a.R', 'x <- 1 + 2', 'x <- 1 * 2')]); + }); + + it('an identifier token', async() => { + await executeAndCompareResults([file('a.R', lines('x <- 42', 'print(x)'), lines('value <- 42', 'print(value)'))]); + }); + + it('part of a single line expression', async() => { + await executeAndCompareResults([file('a.R', 'x <- (1 + 2) * 3', 'x <- (1 + 20) * 3')]); + }); + + it('whitespace only on a single line', async() => { + await executeAndCompareResults([file('a.R', 'x <- 42', 'x <- 42')]); + }); + + it('whitespace only across multiple lines', async() => { + await executeAndCompareResults([file('a.R', lines('f <- function(x) {', '\ty <- x + 1', '\tprint(y)', '}'), lines('f <- function(x) {', '\t', '\ty <- x + 1', '\tprint(y)', '}'))]); + }); + + it('comment text', async() => { + await executeAndCompareResults([file('a.R', 'x <- 42 # old comment', 'x <- 42 # new comment')]); + }); + + it('a string literal', async() => { + await executeAndCompareResults([file('a.R', 'msg <- "abc"', 'msg <- "abcd"')]); + }); + + it('a UTF-8 string literal', async() => { + await executeAndCompareResults([file('a.R', 'msg <- "äöü"', 'msg <- "äöü€"')]); + }); + + it('a UTF-8 comment', async() => { + await executeAndCompareResults([file('a.R', 'x <- 42 # gruß', 'x <- 42 # grüße €')]); }); }); describe('syntax transitions', () => { - it.each(singleFileSyntaxTransitionCases)('$name', async({ input }) => { - await executeAndCompareResults([input]); + it('valid to invalid by removing the right-hand side of an assignment', async() => { + await executeAndCompareResults([file('a.R', 'x <- 42', 'x <-')]); + }); + + it('valid to invalid by removing a closing brace', async() => { + await executeAndCompareResults([file( + 'a.R', + lines( + 'f <- function(x) {', + '\tprint(x)', + '}' + ), + lines( + 'f <- function(x) {', + '\tprint(x)' + ) + )]); + }); + + it('valid to invalid by removing a closing parenthesis', async() => { + await executeAndCompareResults([file('a.R', 'print(sum(1, 2))', 'print(sum(1, 2)')]); + }); + + it('invalid to valid by completing an assignment', async() => { + await executeAndCompareResults([file('a.R', 'x <-', 'x <- 42')]); + }); + + it('invalid to valid by restoring a closing brace', async() => { + await executeAndCompareResults([file( + 'a.R', + lines( + 'f <- function(x) {', + '\tprint(x)' + ), + lines( + 'f <- function(x) {', + '\tprint(x)', + '}' + ) + )]); + }); + + it('invalid to valid by restoring a closing parenthesis', async() => { + await executeAndCompareResults([file('a.R', 'print(sum(1, 2)', 'print(sum(1, 2))')]); + }); + + it('invalid to invalid across different incomplete forms', async() => { + await executeAndCompareResults([file('a.R', 'print(', 'function(,')]); }); }); describe('nested structures', () => { - it.each(singleFileNestedStructureCases)('$name', async({ input }) => { - await executeAndCompareResults([input]); + it('inside a function body', async() => { + await executeAndCompareResults([file( + 'a.R', + lines( + 'f <- function(x) {', + '\ty <- x + 1', + '\tprint(y)', + '}' + ), + lines( + 'f <- function(x) {', + '\ty <- x * 2', + '\tprint(y)', + '}' + ) + )]); + }); + + it('inside an if branch', async() => { + await executeAndCompareResults([file( + 'a.R', + lines( + 'if (x > 0) {', + '\ty <- 1', + '}' + ), + lines( + 'if (x > 0) {', + '\ty <- 1', + '\tz <- 2', + '}' + ) + )]); + }); + + it('inside a for loop body', async() => { + await executeAndCompareResults([file( + 'a.R', + lines( + 'for (i in 1:3) {', + '\tprint(i)', + '}' + ), + lines( + 'for (i in 1:3) {', + '\ttotal <- i + 1', + '\tprint(total)', + '}' + ) + )]); + }); + + it('inside a nested argument list', async() => { + await executeAndCompareResults([file('a.R', 'print(sum(1, 2, 3))', 'print(sum(1, 20, 3))')]); + }); + + it('inside nested brackets and subexpressions', async() => { + await executeAndCompareResults([file('a.R', 'x <- list(a = list(b = 1))', 'x <- list(a = list(b = 2))')]); }); }); describe('pairwise successive states', () => { - it.each(repeatedUpdatePairwiseCases)('$name', async({ input }) => { - await executeAndCompareResults([input]); + it('sequence step 1: empty file to initial assignment', async() => { + await executeAndCompareResults([file('a.R', '', 'x <- 1')]); + }); + + it('sequence step 2: initial assignment to character-level update', async() => { + await executeAndCompareResults([file('a.R', 'x <- 1', 'x <- 10')]); + }); + + it('sequence step 3: character-level update to added statement', async() => { + await executeAndCompareResults([file('a.R', 'x <- 10', lines('x <- 10', 'print(x)'))]); + }); + + it('sequence step 4: added statement to nested function', async() => { + await executeAndCompareResults([file( + 'a.R', + lines('x <- 10', 'print(x)'), + lines( + 'f <- function() {', + '\tprint(x)', + '}', + 'f()' + ) + )]); + }); + + it('sequence step 5: nested function to temporarily invalid syntax', async() => { + await executeAndCompareResults([file( + 'a.R', + lines( + 'f <- function() {', + '\tprint(x)', + '}', + 'f()' + ), + lines( + 'f <- function() {', + '\tprint(x)', + 'f()' + ) + )]); + }); + + it('sequence step 6: temporarily invalid syntax back to valid syntax', async() => { + await executeAndCompareResults([file( + 'a.R', + lines( + 'f <- function() {', + '\tprint(x)', + 'f()' + ), + lines( + 'f <- function() {', + '\tprint(x)', + '}', + 'f()' + ) + )]); }); }); }); describe('multi-file', () => { - it.each(multiFileCases)('$name', async({ inputs }) => { - await executeAndCompareResults(inputs); + it('editing only the first file while the second file stays unchanged', async() => { + await executeAndCompareResults([ + file('a.R', lines('x <- 42', 'print(x)'), lines('x <- 42', 'x <- x + 1', 'print(x)')), + file('b.R', lines('y <- 21', 'print(y)'), lines('y <- 21', 'print(y)')) + ]); + }); + + it('editing only the second file while the first file stays unchanged', async() => { + await executeAndCompareResults([ + file('a.R', lines('x <- 42', 'print(x)'), lines('x <- 42', 'print(x)')), + file('b.R', lines('y <- 21', 'print(y)'), lines('y <- 21', 'y <- y * 2', 'print(y)')) + ]); + }); + + it('editing both files independently in the same run', async() => { + await executeAndCompareResults([ + file('a.R', lines('x <- 1', 'print(x)'), lines('x <- 2', 'x <- x * 3', 'print(x)')), + file('b.R', lines('y <- 10', 'print(y)'), lines('z <- 10', 'print(z + 1)')) + ]); + }); + + it('adding a new file while another file stays unchanged', async() => { + await executeAndCompareResults([ + file('a.R', lines('x <- 42', 'print(x)'), lines('x <- 42', 'print(x)')), + file('b.R', '', lines('helper <- function(x) {', '\tx * 2', '}', 'print(helper(21))')) + ]); + }); + + it('removing one file while another file stays unchanged', async() => { + await executeAndCompareResults([ + file('a.R', lines('x <- 42', 'print(x)'), lines('x <- 42', 'print(x)')), + file('b.R', lines('tmp <- 1', 'print(tmp)'), '') + ]); + }); + + it('mixing file modification, file addition, and file removal in one run', async() => { + await executeAndCompareResults([ + file('a.R', lines('x <- 1', 'print(x)'), lines('x <- 1', 'x <- x + 1', 'print(x)')), + file('b.R', '', lines('y <- 21', 'print(y)')), + file('c.R', lines('obsolete <- TRUE', 'print(obsolete)'), '') + ]); + }); + + it('making one file invalid while another file remains unchanged and valid', async() => { + await executeAndCompareResults([ + file( + 'a.R', + lines('f <- function(x) {', '\tprint(x)', '}'), + lines('f <- function(x) {', '\tprint(x)') + ), + file('b.R', lines('y <- 21', 'print(y)'), lines('y <- 21', 'print(y)')) + ]); + }); + + it('editing UTF-8 content in one file while another file stays unchanged', async() => { + await executeAndCompareResults([ + file('a.R', lines('msg <- "äöü"', 'print(msg)'), lines('msg <- "äöü€"', 'print(msg)')), + file('b.R', lines('x <- 42', 'print(x)'), lines('x <- 42', 'print(x)')) + ]); + }); + + it('editing inside a nested construct in one file and at top level in another', async() => { + await executeAndCompareResults([ + file( + 'a.R', + lines('f <- function(x) {', '\ty <- x + 1', '\tprint(y)', '}'), + lines('f <- function(x) {', '\ty <- x * 2', '\tprint(y)', '}') + ), + file('b.R', lines('z <- 3', 'print(z)'), lines('z <- 3', 'z <- z + 1', 'print(z)')) + ]); + }); + + it('editing only one of two syntactically invalid files', async() => { + await executeAndCompareResults([ + file('a.R', 'print(', 'print(1)'), + file('b.R', 'x <-', 'x <-') + ]); }); describe('pairwise successive states across files', () => { - it.each(repeatedMultiFilePairwiseCases)('$name', async({ inputs }) => { - await executeAndCompareResults(inputs); + it('pairwise sequence step 1 across files', async() => { + await executeAndCompareResults([ + file('a.R', '', 'x <- 1'), + file('b.R', '', 'y <- 2') + ]); + }); + + it('pairwise sequence step 2 across files', async() => { + await executeAndCompareResults([ + file('a.R', 'x <- 1', lines('x <- 1', 'print(x)')), + file('b.R', 'y <- 2', 'y <- 20') + ]); + }); + + it('pairwise sequence step 3 across files with temporary invalidity', async() => { + await executeAndCompareResults([ + file('a.R', lines('x <- 1', 'print(x)'), 'x <-'), + file('b.R', 'y <- 20', lines('f <- function() {', '\tprint(y)', '}', 'f()')) + ]); + }); + + it('pairwise sequence step 4 across files returning to valid syntax', async() => { + await executeAndCompareResults([ + file('a.R', 'x <-', lines('x <- 1', 'print(x)')), + file('b.R', lines('f <- function() {', '\tprint(y)', '}', 'f()'), lines('f <- function() {', '\tprint(y + 1)', '}', 'f()')) + ]); }); }); }); -}); \ No newline at end of file +}); From 4b9d1afc1bdd921fc5ad40cd3231dd2bc83b9efe Mon Sep 17 00:00:00 2001 From: Jonathan Riesland Date: Sat, 4 Apr 2026 21:46:02 +0200 Subject: [PATCH 19/20] test-fix: restructure incremental parsing scenarios Group incremental parsing tests by one vs multiple update sets, remove the old broad successive-state cases, and add focused multi-step edge cases for single-file and multi-file analyzer reuse. --- .../incremental/incremental-parsing.test.ts | 1287 ++++++++++------- 1 file changed, 751 insertions(+), 536 deletions(-) diff --git a/test/functionality/incremental/incremental-parsing.test.ts b/test/functionality/incremental/incremental-parsing.test.ts index 04356645443..18689a4d8fc 100644 --- a/test/functionality/incremental/incremental-parsing.test.ts +++ b/test/functionality/incremental/incremental-parsing.test.ts @@ -9,10 +9,14 @@ import type { Tree } from 'web-tree-sitter'; import type { ParseStepOutput, ParseStepOutputSingleFile } from '../../../src/r-bridge/parser'; -interface IncrementalParsingTestInput { - path: string; - originalContent: string; - updatedContent: string; +interface FileState { + path: string; + content: string; +} + +interface IncrementalParsingScenario { + initialFiles: readonly FileState[]; + fileUpdates: readonly (readonly FileState[])[]; } interface IncrementalParseCall { @@ -20,10 +24,63 @@ interface IncrementalParseCall { previousTree: Tree | undefined; } -async function traceIncrementalParseCalls( - analyzer: FlowrAnalyzer, - run: () => Promise -): Promise<{ result: T; incrementalParseCalls: IncrementalParseCall[] }> { +function applyUpdateStepToFileStates( + fileStates: Map, + updateStep: readonly FileState[] +): void { + for(const update of updateStep) { + assert(fileStates.has(update.path), `All paths must be present in initialFiles, missing ${update.path}`); + fileStates.set(update.path, update.content); + } +} + +function changedPathsBetween( + beforeStep: ReadonlyMap, + afterStep: ReadonlyMap +): string[] { + return Array.from(beforeStep.keys()).filter(path => beforeStep.get(path) !== afterStep.get(path)); +} + +function unchangedPathsBetween( + beforeStep: ReadonlyMap, + afterStep: ReadonlyMap +): string[] { + return Array.from(beforeStep.keys()).filter(path => beforeStep.get(path) === afterStep.get(path)); +} + +async function createAnalyzerForFiles( + initialFiles: readonly FileState[] +): Promise<{ analyzer: FlowrAnalyzer; files: Map }> { + const analyzer = await new FlowrAnalyzerBuilder() + .setEngine('tree-sitter') + .build(); + const files = new Map(); + + for(const initialFile of initialFiles) { + const file = new FlowrInlineTextFile(initialFile.path, initialFile.content); + analyzer.addFile(file); + analyzer.addRequest({ request: 'file', content: initialFile.path }); + files.set(initialFile.path, file); + } + + return { analyzer, files }; +} + +function applyUpdateStepToAnalyzer( + files: ReadonlyMap, + updateStep: readonly FileState[] +): void { + for(const update of updateStep) { + const file = files.get(update.path); + assert(file !== undefined, `All paths must be present in initialFiles, missing ${update.path}`); + file.updateInlineContent(update.content); + } +} + +function createIncrementalParseTracer(analyzer: FlowrAnalyzer): { + trace(run: () => Promise): Promise<{ result: T; incrementalParseCalls: IncrementalParseCall[] }>; + restore(): void; +} { const executor = analyzer['parser'] as TreeSitterExecutor; const parser = executor['parser']; const originalExecutorParse = executor.parse.bind(executor); @@ -51,15 +108,19 @@ async function traceIncrementalParseCalls( return originalParserParse(sourceCode, previousTree); }); - try { - return { - result: await run(), - incrementalParseCalls - }; - } finally { - executorSpy.mockRestore(); - parserSpy.mockRestore(); - } + return { + async trace(run: () => Promise): Promise<{ result: T; incrementalParseCalls: IncrementalParseCall[] }> { + incrementalParseCalls.length = 0; + return { + result: await run(), + incrementalParseCalls: [...incrementalParseCalls] + }; + }, + restore(): void { + executorSpy.mockRestore(); + parserSpy.mockRestore(); + } + }; } function capturePreviousTrees(analyzer: FlowrAnalyzer): Map { @@ -75,582 +136,736 @@ function capturePreviousTrees(analyzer: FlowrAnalyzer): Map { } function assertChangedFilesUseIncrementalParse( - inputs: readonly IncrementalParsingTestInput[], + changedPaths: readonly string[], previousTrees: ReadonlyMap, incrementalParseCalls: readonly IncrementalParseCall[] ): void { - const changedInputs = inputs.filter(input => input.updatedContent !== input.originalContent); - expect(incrementalParseCalls).toHaveLength(changedInputs.length); + expect(incrementalParseCalls).toHaveLength(changedPaths.length); - for(const changedInput of changedInputs) { - const previousTree = previousTrees.get(changedInput.path); - assert(previousTree !== undefined, `Missing previous tree for ${changedInput.path}`); + for(const changedPath of changedPaths) { + const previousTree = previousTrees.get(changedPath); + assert(previousTree !== undefined, `Missing previous tree for ${changedPath}`); - const parseCall = incrementalParseCalls.find(call => call.filePath === changedInput.path); - assert(parseCall !== undefined, `Missing incremental parse call for ${changedInput.path}`); + const parseCall = incrementalParseCalls.find(call => call.filePath === changedPath); + assert(parseCall !== undefined, `Missing incremental parse call for ${changedPath}`); expect(parseCall.previousTree).toBe(previousTree); } } function assertUnchangedFilesReusePreviousTrees( - inputs: readonly IncrementalParsingTestInput[], + unchangedPaths: readonly string[], previousTrees: ReadonlyMap, reparsedTrees: ParseStepOutputSingleFile[] ): void { - for(const input of inputs) { - if(input.updatedContent !== input.originalContent) { - continue; - } - - const previousTree = previousTrees.get(input.path); - const reparsedTree = reparsedTrees.find(file => file.filePath === input.path)?.parsed; - assert(previousTree !== undefined, `Missing previous tree for ${input.path}`); - assert(reparsedTree !== undefined, `Missing reparsed tree for ${input.path}`); - expect(reparsedTree, `no-op invalidation should reuse the previous tree for ${input.path}`).toBe(previousTree); - } -} - - -async function executeFullParse(inputs: readonly IncrementalParsingTestInput[]): Promise { - const analyzer = await new FlowrAnalyzerBuilder() - .setEngine('tree-sitter') - .build(); - for(const input of inputs) { - const f = new FlowrInlineTextFile(input.path, input.updatedContent); - analyzer.addFile(f); - analyzer.addRequest({ request: 'file', content: input.path }); + for(const unchangedPath of unchangedPaths) { + const previousTree = previousTrees.get(unchangedPath); + const reparsedTree = reparsedTrees.find(file => file.filePath === unchangedPath)?.parsed; + assert(previousTree !== undefined, `Missing previous tree for ${unchangedPath}`); + assert(reparsedTree !== undefined, `Missing reparsed tree for ${unchangedPath}`); + expect(reparsedTree, `no-op invalidation should reuse the previous tree for ${unchangedPath}`).toBe(previousTree); } - return (await analyzer.normalize()); } - -async function executeIncrementalParse(inputs: readonly IncrementalParsingTestInput[]): Promise { - const analyzer = await new FlowrAnalyzerBuilder() - .setEngine('tree-sitter') - .build(); - const files = new Map(); - for(const input of inputs) { - const f = new FlowrInlineTextFile(input.path, input.originalContent); - analyzer.addFile(f); - analyzer.addRequest({ request: 'file', content: input.path }); - files.set(input.path, f); +function assertPipelineStateAfterUpdateStep( + analyzer: FlowrAnalyzer, + updateStep: readonly FileState[] +): void { + if(updateStep.length === 0) { + assert(analyzer.peekParse() !== undefined, 'without any file updates, the previous pipeline should remain available'); + return; } - await analyzer.normalize(); - const previousTrees = capturePreviousTrees(analyzer); - for(const input of inputs) { - files.get(input.path)?.updateInlineContent(input.updatedContent); - } assert(analyzer.peekParse() === undefined, 'changing the content of parsed files should reset the previous pipeline'); - - const { result, incrementalParseCalls } = await traceIncrementalParseCalls( - analyzer, - async() => await analyzer.normalize() - ); - const reparsed = analyzer.peekParse() as ParseStepOutput; - assert(reparsed !== undefined, 'after parsing once more, the pipeline must contain the analysis results again'); - - assertChangedFilesUseIncrementalParse(inputs, previousTrees, incrementalParseCalls); - assertUnchangedFilesReusePreviousTrees(inputs, previousTrees, reparsed.files); - - return result; } +async function executeFullParse(fileStates: readonly FileState[]): Promise { + const { analyzer } = await createAnalyzerForFiles(fileStates); + return await analyzer.normalize(); +} -async function executeAndCompareResults(inputs: readonly IncrementalParsingTestInput[]): Promise { - const fullParse = await executeFullParse(inputs); - const incrementalParse = await executeIncrementalParse(inputs); - - const fullParseMermaid = printNormalizedAstToMermaid(fullParse); - const incrementalParseMermaid = printNormalizedAstToMermaid(incrementalParse); +async function executeAndCompareScenario(scenario: IncrementalParsingScenario): Promise { + const { analyzer, files } = await createAnalyzerForFiles(scenario.initialFiles); + const incrementalParseTracer = createIncrementalParseTracer(analyzer); + await analyzer.normalize(); + const currentFileStates: Map = new Map(scenario.initialFiles.map(fileState => [fileState.path, fileState.content])); - assert.equal(fullParseMermaid, incrementalParseMermaid, 'The incremental parse result does not match the full parse result'); + try { + for(const updateStep of scenario.fileUpdates) { + const previousTrees = capturePreviousTrees(analyzer); + const previousFileStates = new Map(currentFileStates); + + applyUpdateStepToAnalyzer(files, updateStep); + applyUpdateStepToFileStates(currentFileStates, updateStep); + assertPipelineStateAfterUpdateStep(analyzer, updateStep); + + const changedPaths = changedPathsBetween(previousFileStates, currentFileStates); + const unchangedPaths = unchangedPathsBetween(previousFileStates, currentFileStates); + const { result: incrementalResult, incrementalParseCalls } = await incrementalParseTracer.trace( + async() => await analyzer.normalize() + ); + const reparsed = analyzer.peekParse() as ParseStepOutput; + assert(reparsed !== undefined, 'after parsing once more, the pipeline must contain the analysis results again'); + + assertChangedFilesUseIncrementalParse(changedPaths, previousTrees, incrementalParseCalls); + assertUnchangedFilesReusePreviousTrees(unchangedPaths, previousTrees, reparsed.files); + + const fileStatesFromMap = Array.from(currentFileStates, ([path, content]) => ({ path, content })); + const fullReparseResult = await executeFullParse(fileStatesFromMap); + assert.equal( + printNormalizedAstToMermaid(fullReparseResult), + printNormalizedAstToMermaid(incrementalResult), + 'The incremental parse result does not match the full parse result' + ); + } + } finally { + incrementalParseTracer.restore(); + } } const lines = (...xs: string[]): string => xs.join('\n'); const file = ( path: string, - originalContent: string, - updatedContent: string -): IncrementalParsingTestInput => ({ + content: string +): FileState => ({ path, - originalContent, - updatedContent + content }); +const step = (...files: FileState[]): readonly FileState[] => files; -describe('Incremental Parsing produces same results as Full Parsing', () => { - describe('single-file', () => { - describe('no-op', () => { - it('empty file', async() => { - await executeAndCompareResults([file('a.R', '', '')]); - }); - - it('file with top-level content', async() => { - await executeAndCompareResults([file('a.R', 'x <- 42', 'x <- 42')]); - }); - - it('file with nested content', async() => { - await executeAndCompareResults([file( - 'a.R', - lines( - 'f <- function(x) {', - '\ty <- x + 1', - '\tprint(y)', - '}' - ), - lines( - 'f <- function(x) {', - '\ty <- x + 1', - '\tprint(y)', - '}' - ) - )]); - }); - - it('syntactically invalid file', async() => { - await executeAndCompareResults([file('a.R', 'print(', 'print(')]); - }); - }); - - describe('insert', () => { - it('one full line into an empty file', async() => { - await executeAndCompareResults([file('a.R', '', 'x <- 42')]); - }); - - it('one full line at the start of a file', async() => { - await executeAndCompareResults([file('a.R', 'x <- 42', lines('y <- 21', 'x <- 42'))]); - }); - - it('one full line in the middle of a file', async() => { - await executeAndCompareResults([file('a.R', lines('x <- 42', 'print(x)'), lines('x <- 42', 'x <- 2 * x', 'print(x)'))]); - }); - - it('one full line at the end of a file', async() => { - await executeAndCompareResults([file('a.R', 'x <- 42', lines('x <- 42', 'print(x)'))]); - }); - - it('multiple lines into an empty file', async() => { - await executeAndCompareResults([file('a.R', '', lines('x <- 42', 'y <- 21', 'z <- 10'))]); - }); - - it('multiple lines at different positions', async() => { - await executeAndCompareResults([file('a.R', lines('x <- 42', 'print(x)'), lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'))]); - }); - - it('a single character inside a number', async() => { - await executeAndCompareResults([file('a.R', 'x <- 42', 'x <- 420')]); - }); - - it('a single character inside an identifier', async() => { - await executeAndCompareResults([file('a.R', 'x <- 42', 'xy <- 42')]); - }); - - it('a token inside an expression', async() => { - await executeAndCompareResults([file('a.R', 'x <- 1 + 2', 'x <- 1 + 2 + 3')]); - }); - - it('a token inside a nested argument list', async() => { - await executeAndCompareResults([file('a.R', 'print(sum(1, 3))', 'print(sum(1, 2, 3))')]); - }); - - it('a trailing newline at end of file', async() => { - await executeAndCompareResults([file('a.R', 'x <- 42', 'x <- 42\n')]); - }); - }); - - describe('remove', () => { - it('one full line such that the file becomes empty', async() => { - await executeAndCompareResults([file('a.R', 'x <- 42', '')]); - }); - - it('one full line at the start of a file', async() => { - await executeAndCompareResults([file('a.R', lines('y <- 21', 'x <- 42'), 'x <- 42')]); - }); - - it('one full line in the middle of a file', async() => { - await executeAndCompareResults([file('a.R', lines('x <- 42', 'x <- 2 * x', 'print(x)'), lines('x <- 42', 'print(x)'))]); - }); - - it('one full line at the end of a file', async() => { - await executeAndCompareResults([file('a.R', lines('x <- 42', 'print(x)'), 'x <- 42')]); - }); - - it('multiple lines such that the file becomes empty', async() => { - await executeAndCompareResults([file('a.R', lines('x <- 42', 'y <- 21', 'z <- 10'), '')]); - }); - - it('multiple lines at different positions', async() => { - await executeAndCompareResults([file('a.R', lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'), lines('x <- 42', 'print(x)'))]); - }); - - it('a single character from a number', async() => { - await executeAndCompareResults([file('a.R', 'x <- 420', 'x <- 42')]); - }); - - it('a single character from an identifier', async() => { - await executeAndCompareResults([file('a.R', 'xy <- 42', 'x <- 42')]); - }); - - it('a token from an expression', async() => { - await executeAndCompareResults([file('a.R', 'x <- 1 + 2 + 3', 'x <- 1 + 2')]); - }); - - it('a token from a nested argument list', async() => { - await executeAndCompareResults([file('a.R', 'print(sum(1, 2, 3))', 'print(sum(1, 3))')]); - }); - - it('a trailing newline at end of file', async() => { - await executeAndCompareResults([file('a.R', 'x <- 42\n', 'x <- 42')]); - }); - }); - - describe('replace', () => { - it('one full line at the start of a file', async() => { - await executeAndCompareResults([file('a.R', lines('y <- 21', 'x <- 42'), lines('x <- 84', 'x <- 42'))]); - }); - - it('one full line in the middle of a file', async() => { - await executeAndCompareResults([file('a.R', lines('x <- 42', 'x <- 2 * x', 'print(x)'), lines('x <- 42', 'y <- 21', 'print(x)'))]); - }); - - it('one full line at the end of a file', async() => { - await executeAndCompareResults([file('a.R', lines('x <- 42', 'print(x)'), lines('x <- 42', 'x <- x * x'))]); - }); - - it('a partially replaced multi-line region', async() => { - await executeAndCompareResults([file('a.R', lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'), lines('y <- 21', 'x <- 21', 'y <- y * y', 'print(x)', 'print(y)'))]); - }); - - it('a fully replaced content', async() => { - await executeAndCompareResults([file('a.R', lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'), lines('z <- 10', 'z <- z + 32', 'print(z)'))]); - }); - - it('a single character in a number', async() => { - await executeAndCompareResults([file('a.R', 'x <- 42', 'x <- 43')]); - }); - - it('an operator token', async() => { - await executeAndCompareResults([file('a.R', 'x <- 1 + 2', 'x <- 1 * 2')]); - }); - - it('an identifier token', async() => { - await executeAndCompareResults([file('a.R', lines('x <- 42', 'print(x)'), lines('value <- 42', 'print(value)'))]); - }); - - it('part of a single line expression', async() => { - await executeAndCompareResults([file('a.R', 'x <- (1 + 2) * 3', 'x <- (1 + 20) * 3')]); - }); - - it('whitespace only on a single line', async() => { - await executeAndCompareResults([file('a.R', 'x <- 42', 'x <- 42')]); - }); - - it('whitespace only across multiple lines', async() => { - await executeAndCompareResults([file('a.R', lines('f <- function(x) {', '\ty <- x + 1', '\tprint(y)', '}'), lines('f <- function(x) {', '\t', '\ty <- x + 1', '\tprint(y)', '}'))]); - }); +const scenario = ( + initialFiles: readonly FileState[], + ...fileUpdates: readonly (readonly FileState[])[] +): IncrementalParsingScenario => ({ + initialFiles, + fileUpdates +}); - it('comment text', async() => { - await executeAndCompareResults([file('a.R', 'x <- 42 # old comment', 'x <- 42 # new comment')]); - }); - it('a string literal', async() => { - await executeAndCompareResults([file('a.R', 'msg <- "abc"', 'msg <- "abcd"')]); - }); - - it('a UTF-8 string literal', async() => { - await executeAndCompareResults([file('a.R', 'msg <- "äöü"', 'msg <- "äöü€"')]); +describe('Incremental Parsing produces same results as Full Parsing', () => { + describe('one update set', () => { + describe('single-file', () => { + describe('no-op', () => { + it('empty file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', '')], + step(file('a.R', '')) + )); + }); + + it('file with top-level content', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 42')], + step(file('a.R', 'x <- 42')) + )); + }); + + it('file with nested content', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('f <- function(x) {', '\ty <- x + 1', '\tprint(y)', '}'))], + step(file('a.R', lines('f <- function(x) {', '\ty <- x + 1', '\tprint(y)', '}'))) + )); + }); + + it('syntactically invalid file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'print(')], + step(file('a.R', 'print(')) + )); + }); + }); + + describe('insert', () => { + it('one full line into an empty file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', '')], + step(file('a.R', 'x <- 42')) + )); + }); + + it('one full line at the start of a file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 42')], + step(file('a.R', lines('y <- 21', 'x <- 42'))) + )); + }); + + it('one full line in the middle of a file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('x <- 42', 'print(x)'))], + step(file('a.R', lines('x <- 42', 'x <- 2 * x', 'print(x)'))) + )); + }); + + it('one full line at the end of a file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 42')], + step(file('a.R', lines('x <- 42', 'print(x)'))) + )); + }); + + it('multiple lines into an empty file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', '')], + step(file('a.R', lines('x <- 42', 'y <- 21', 'z <- 10'))) + )); + }); + + it('multiple lines at different positions', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('x <- 42', 'print(x)'))], + step(file('a.R', lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'))) + )); + }); + + it('a single character inside a number', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 42')], + step(file('a.R', 'x <- 420')) + )); + }); + + it('a single character inside an identifier', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 42')], + step(file('a.R', 'xy <- 42')) + )); + }); + + it('a token inside an expression', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 1 + 2')], + step(file('a.R', 'x <- 1 + 2 + 3')) + )); + }); + + it('a token inside a nested argument list', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'print(sum(1, 3))')], + step(file('a.R', 'print(sum(1, 2, 3))')) + )); + }); + + it('a trailing newline at end of file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 42')], + step(file('a.R', 'x <- 42\n')) + )); + }); + }); + + describe('remove', () => { + it('one full line such that the file becomes empty', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 42')], + step(file('a.R', '')) + )); + }); + + it('one full line at the start of a file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('y <- 21', 'x <- 42'))], + step(file('a.R', 'x <- 42')) + )); + }); + + it('one full line in the middle of a file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('x <- 42', 'x <- 2 * x', 'print(x)'))], + step(file('a.R', lines('x <- 42', 'print(x)'))) + )); + }); + + it('one full line at the end of a file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('x <- 42', 'print(x)'))], + step(file('a.R', 'x <- 42')) + )); + }); + + it('multiple lines such that the file becomes empty', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('x <- 42', 'y <- 21', 'z <- 10'))], + step(file('a.R', '')) + )); + }); + + it('multiple lines at different positions', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'))], + step(file('a.R', lines('x <- 42', 'print(x)'))) + )); + }); + + it('a single character from a number', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 420')], + step(file('a.R', 'x <- 42')) + )); + }); + + it('a single character from an identifier', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'xy <- 42')], + step(file('a.R', 'x <- 42')) + )); + }); + + it('a token from an expression', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 1 + 2 + 3')], + step(file('a.R', 'x <- 1 + 2')) + )); + }); + + it('a token from a nested argument list', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'print(sum(1, 2, 3))')], + step(file('a.R', 'print(sum(1, 3))')) + )); + }); + + it('a trailing newline at end of file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 42\n')], + step(file('a.R', 'x <- 42')) + )); + }); + }); + + describe('replace', () => { + it('one full line at the start of a file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('y <- 21', 'x <- 42'))], + step(file('a.R', lines('x <- 84', 'x <- 42'))) + )); + }); + + it('one full line in the middle of a file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('x <- 42', 'x <- 2 * x', 'print(x)'))], + step(file('a.R', lines('x <- 42', 'y <- 21', 'print(x)'))) + )); + }); + + it('one full line at the end of a file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('x <- 42', 'print(x)'))], + step(file('a.R', lines('x <- 42', 'x <- x * x'))) + )); + }); + + it('a partially replaced multi-line region', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'))], + step(file('a.R', lines('y <- 21', 'x <- 21', 'y <- y * y', 'print(x)', 'print(y)'))) + )); + }); + + it('a fully replaced content', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'))], + step(file('a.R', lines('z <- 10', 'z <- z + 32', 'print(z)'))) + )); + }); + + it('a single character in a number', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 42')], + step(file('a.R', 'x <- 43')) + )); + }); + + it('an operator token', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 1 + 2')], + step(file('a.R', 'x <- 1 * 2')) + )); + }); + + it('an identifier token', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('x <- 42', 'print(x)'))], + step(file('a.R', lines('value <- 42', 'print(value)'))) + )); + }); + + it('part of a single line expression', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- (1 + 2) * 3')], + step(file('a.R', 'x <- (1 + 20) * 3')) + )); + }); + + it('whitespace only on a single line', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 42')], + step(file('a.R', 'x <- 42')) + )); + }); + + it('whitespace only across multiple lines', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('f <- function(x) {', '\ty <- x + 1', '\tprint(y)', '}'))], + step(file('a.R', lines('f <- function(x) {', '\t', '\ty <- x + 1', '\tprint(y)', '}'))) + )); + }); + + it('comment text', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 42 # old comment')], + step(file('a.R', 'x <- 42 # new comment')) + )); + }); + + it('a string literal', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'msg <- "abc"')], + step(file('a.R', 'msg <- "abcd"')) + )); + }); + + it('a UTF-8 string literal', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'msg <- "äöü"')], + step(file('a.R', 'msg <- "äöü€"')) + )); + }); + + it('a UTF-8 comment', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 42 # gruß')], + step(file('a.R', 'x <- 42 # grüße €')) + )); + }); + }); + + describe('syntax transitions', () => { + it('valid to invalid by removing the right-hand side of an assignment', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 42')], + step(file('a.R', 'x <-')) + )); + }); + + it('valid to invalid by removing a closing brace', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('f <- function(x) {', '\tprint(x)', '}'))], + step(file('a.R', lines('f <- function(x) {', '\tprint(x)'))) + )); + }); + + it('valid to invalid by removing a closing parenthesis', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'print(sum(1, 2))')], + step(file('a.R', 'print(sum(1, 2)')) + )); + }); + + it('invalid to valid by completing an assignment', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <-')], + step(file('a.R', 'x <- 42')) + )); + }); + + it('invalid to valid by restoring a closing brace', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('f <- function(x) {', '\tprint(x)'))], + step(file('a.R', lines('f <- function(x) {', '\tprint(x)', '}'))) + )); + }); + + it('invalid to valid by restoring a closing parenthesis', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'print(sum(1, 2)')], + step(file('a.R', 'print(sum(1, 2))')) + )); + }); + + it('invalid to invalid across different incomplete forms', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'print(')], + step(file('a.R', 'function(,')) + )); + }); + }); + + describe('nested structures', () => { + it('inside a function body', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('f <- function(x) {', '\ty <- x + 1', '\tprint(y)', '}'))], + step(file('a.R', lines('f <- function(x) {', '\ty <- x * 2', '\tprint(y)', '}'))) + )); + }); + + it('inside an if branch', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('if (x > 0) {', '\ty <- 1', '}'))], + step(file('a.R', lines('if (x > 0) {', '\ty <- 1', '\tz <- 2', '}'))) + )); + }); + + it('inside a for loop body', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('for (i in 1:3) {', '\tprint(i)', '}'))], + step(file('a.R', lines('for (i in 1:3) {', '\ttotal <- i + 1', '\tprint(total)', '}'))) + )); + }); + + it('inside a nested argument list', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'print(sum(1, 2, 3))')], + step(file('a.R', 'print(sum(1, 20, 3))')) + )); + }); + + it('inside nested brackets and subexpressions', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- list(a = list(b = 1))')], + step(file('a.R', 'x <- list(a = list(b = 2))')) + )); + }); }); - it('a UTF-8 comment', async() => { - await executeAndCompareResults([file('a.R', 'x <- 42 # gruß', 'x <- 42 # grüße €')]); - }); }); - describe('syntax transitions', () => { - it('valid to invalid by removing the right-hand side of an assignment', async() => { - await executeAndCompareResults([file('a.R', 'x <- 42', 'x <-')]); - }); - - it('valid to invalid by removing a closing brace', async() => { - await executeAndCompareResults([file( - 'a.R', - lines( - 'f <- function(x) {', - '\tprint(x)', - '}' - ), - lines( - 'f <- function(x) {', - '\tprint(x)' + describe('multi-file', () => { + it('editing only the first file while the second file stays unchanged', async() => { + await executeAndCompareScenario(scenario( + [ + file('a.R', lines('x <- 42', 'print(x)')), + file('b.R', lines('y <- 21', 'print(y)')) + ], + step( + file('a.R', lines('x <- 42', 'x <- x + 1', 'print(x)')), + file('b.R', lines('y <- 21', 'print(y)')) ) - )]); - }); - - it('valid to invalid by removing a closing parenthesis', async() => { - await executeAndCompareResults([file('a.R', 'print(sum(1, 2))', 'print(sum(1, 2)')]); - }); - - it('invalid to valid by completing an assignment', async() => { - await executeAndCompareResults([file('a.R', 'x <-', 'x <- 42')]); - }); - - it('invalid to valid by restoring a closing brace', async() => { - await executeAndCompareResults([file( - 'a.R', - lines( - 'f <- function(x) {', - '\tprint(x)' - ), - lines( - 'f <- function(x) {', - '\tprint(x)', - '}' + )); + }); + + it('editing only the second file while the first file stays unchanged', async() => { + await executeAndCompareScenario(scenario( + [ + file('a.R', lines('x <- 42', 'print(x)')), + file('b.R', lines('y <- 21', 'print(y)')) + ], + step( + file('a.R', lines('x <- 42', 'print(x)')), + file('b.R', lines('y <- 21', 'y <- y * 2', 'print(y)')) ) - )]); - }); - - it('invalid to valid by restoring a closing parenthesis', async() => { - await executeAndCompareResults([file('a.R', 'print(sum(1, 2)', 'print(sum(1, 2))')]); - }); - - it('invalid to invalid across different incomplete forms', async() => { - await executeAndCompareResults([file('a.R', 'print(', 'function(,')]); - }); - }); - - describe('nested structures', () => { - it('inside a function body', async() => { - await executeAndCompareResults([file( - 'a.R', - lines( - 'f <- function(x) {', - '\ty <- x + 1', - '\tprint(y)', - '}' - ), - lines( - 'f <- function(x) {', - '\ty <- x * 2', - '\tprint(y)', - '}' + )); + }); + + it('editing both files independently in the same run', async() => { + await executeAndCompareScenario(scenario( + [ + file('a.R', lines('x <- 1', 'print(x)')), + file('b.R', lines('y <- 10', 'print(y)')) + ], + step( + file('a.R', lines('x <- 2', 'x <- x * 3', 'print(x)')), + file('b.R', lines('z <- 10', 'print(z + 1)')) ) - )]); - }); - - it('inside an if branch', async() => { - await executeAndCompareResults([file( - 'a.R', - lines( - 'if (x > 0) {', - '\ty <- 1', - '}' - ), - lines( - 'if (x > 0) {', - '\ty <- 1', - '\tz <- 2', - '}' + )); + }); + + it('adding a new file while another file stays unchanged', async() => { + await executeAndCompareScenario(scenario( + [ + file('a.R', lines('x <- 42', 'print(x)')), + file('b.R', '') + ], + step( + file('a.R', lines('x <- 42', 'print(x)')), + file('b.R', lines('helper <- function(x) {', '\tx * 2', '}', 'print(helper(21))')) ) - )]); - }); - - it('inside a for loop body', async() => { - await executeAndCompareResults([file( - 'a.R', - lines( - 'for (i in 1:3) {', - '\tprint(i)', - '}' - ), - lines( - 'for (i in 1:3) {', - '\ttotal <- i + 1', - '\tprint(total)', - '}' + )); + }); + + it('removing one file while another file stays unchanged', async() => { + await executeAndCompareScenario(scenario( + [ + file('a.R', lines('x <- 42', 'print(x)')), + file('b.R', lines('tmp <- 1', 'print(tmp)')) + ], + step( + file('a.R', lines('x <- 42', 'print(x)')), + file('b.R', '') ) - )]); - }); - - it('inside a nested argument list', async() => { - await executeAndCompareResults([file('a.R', 'print(sum(1, 2, 3))', 'print(sum(1, 20, 3))')]); + )); + }); + + it('mixing file modification, file addition, and file removal in one run', async() => { + await executeAndCompareScenario(scenario( + [ + file('a.R', lines('x <- 1', 'print(x)')), + file('b.R', ''), + file('c.R', lines('obsolete <- TRUE', 'print(obsolete)')) + ], + step( + file('a.R', lines('x <- 1', 'x <- x + 1', 'print(x)')), + file('b.R', lines('y <- 21', 'print(y)')), + file('c.R', '') + ) + )); + }); + + it('making one file invalid while another file remains unchanged and valid', async() => { + await executeAndCompareScenario(scenario( + [ + file('a.R', lines('f <- function(x) {', '\tprint(x)', '}')), + file('b.R', lines('y <- 21', 'print(y)')) + ], + step( + file('a.R', lines('f <- function(x) {', '\tprint(x)')), + file('b.R', lines('y <- 21', 'print(y)')) + ) + )); + }); + + it('editing UTF-8 content in one file while another file stays unchanged', async() => { + await executeAndCompareScenario(scenario( + [ + file('a.R', lines('msg <- "äöü"', 'print(msg)')), + file('b.R', lines('x <- 42', 'print(x)')) + ], + step( + file('a.R', lines('msg <- "äöü€"', 'print(msg)')), + file('b.R', lines('x <- 42', 'print(x)')) + ) + )); + }); + + it('editing inside a nested construct in one file and at top level in another', async() => { + await executeAndCompareScenario(scenario( + [ + file('a.R', lines('f <- function(x) {', '\ty <- x + 1', '\tprint(y)', '}')), + file('b.R', lines('z <- 3', 'print(z)')) + ], + step( + file('a.R', lines('f <- function(x) {', '\ty <- x * 2', '\tprint(y)', '}')), + file('b.R', lines('z <- 3', 'z <- z + 1', 'print(z)')) + ) + )); + }); + + it('editing only one of two syntactically invalid files', async() => { + await executeAndCompareScenario(scenario( + [ + file('a.R', 'print('), + file('b.R', 'x <-') + ], + step( + file('a.R', 'print(1)'), + file('b.R', 'x <-') + ) + )); }); - it('inside nested brackets and subexpressions', async() => { - await executeAndCompareResults([file('a.R', 'x <- list(a = list(b = 1))', 'x <- list(a = list(b = 2))')]); - }); }); + }); - describe('pairwise successive states', () => { - it('sequence step 1: empty file to initial assignment', async() => { - await executeAndCompareResults([file('a.R', '', 'x <- 1')]); - }); - - it('sequence step 2: initial assignment to character-level update', async() => { - await executeAndCompareResults([file('a.R', 'x <- 1', 'x <- 10')]); + describe('multiple update sets', () => { + describe('single-file', () => { + it('keeps the cached pipeline across an empty update step before a later real edit', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 1')], + step(), + step(file('a.R', 'x <- 2')) + )); + }); + + it('reuses the previous tree for a no-op invalidation after a prior real edit', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 1')], + step(file('a.R', 'x <- 10')), + step(file('a.R', 'x <- 10')) + )); + }); + + it('handles multiple updates in one step whose final content matches the original content', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 1')], + step( + file('a.R', 'x <- 10'), + file('a.R', 'x <- 1') + ) + )); }); - it('sequence step 3: character-level update to added statement', async() => { - await executeAndCompareResults([file('a.R', 'x <- 10', lines('x <- 10', 'print(x)'))]); + it('recovers across valid, invalid, cached, and valid states on the same analyzer instance', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 1')], + step(file('a.R', 'x <-')), + step(), + step(file('a.R', 'x <- 1')) + )); }); + }); - it('sequence step 4: added statement to nested function', async() => { - await executeAndCompareResults([file( - 'a.R', - lines('x <- 10', 'print(x)'), - lines( - 'f <- function() {', - '\tprint(x)', - '}', - 'f()' + describe('multi-file', () => { + it('keeps the cached pipeline on an empty step before changing only one file', async() => { + await executeAndCompareScenario(scenario( + [ + file('a.R', 'x <- 1'), + file('b.R', 'y <- 2') + ], + step(), + step( + file('a.R', 'x <- 10'), + file('b.R', 'y <- 2') ) - )]); - }); - - it('sequence step 5: nested function to temporarily invalid syntax', async() => { - await executeAndCompareResults([file( - 'a.R', - lines( - 'f <- function() {', - '\tprint(x)', - '}', - 'f()' + )); + }); + + it('handles a no-op invalidation for one file while another file changes in the next step', async() => { + await executeAndCompareScenario(scenario( + [ + file('a.R', 'x <- 1'), + file('b.R', 'y <- 2') + ], + step( + file('a.R', 'x <- 10'), + file('b.R', 'y <- 2') ), - lines( - 'f <- function() {', - '\tprint(x)', - 'f()' + step( + file('a.R', 'x <- 10'), + file('b.R', 'y <- 20') ) - )]); - }); - - it('sequence step 6: temporarily invalid syntax back to valid syntax', async() => { - await executeAndCompareResults([file( - 'a.R', - lines( - 'f <- function() {', - '\tprint(x)', - 'f()' + )); + }); + + it('handles repeated updates to one file in a step while another file ends up truly changed', async() => { + await executeAndCompareScenario(scenario( + [ + file('a.R', 'x <- 1'), + file('b.R', 'y <- 2') + ], + step( + file('a.R', 'x <- 10'), + file('a.R', 'x <- 1'), + file('b.R', 'y <- 20') + ) + )); + }); + + it('switches which file changes across successive steps while the other is reused', async() => { + await executeAndCompareScenario(scenario( + [ + file('a.R', lines('x <- 1', 'print(x)')), + file('b.R', lines('y <- 2', 'print(y)')) + ], + step( + file('a.R', lines('x <- 10', 'print(x)')), + file('b.R', lines('y <- 2', 'print(y)')) ), - lines( - 'f <- function() {', - '\tprint(x)', - '}', - 'f()' + step( + file('a.R', lines('x <- 10', 'print(x)')), + file('b.R', lines('y <- 20', 'print(y)')) ) - )]); - }); - }); - }); - - describe('multi-file', () => { - it('editing only the first file while the second file stays unchanged', async() => { - await executeAndCompareResults([ - file('a.R', lines('x <- 42', 'print(x)'), lines('x <- 42', 'x <- x + 1', 'print(x)')), - file('b.R', lines('y <- 21', 'print(y)'), lines('y <- 21', 'print(y)')) - ]); - }); - - it('editing only the second file while the first file stays unchanged', async() => { - await executeAndCompareResults([ - file('a.R', lines('x <- 42', 'print(x)'), lines('x <- 42', 'print(x)')), - file('b.R', lines('y <- 21', 'print(y)'), lines('y <- 21', 'y <- y * 2', 'print(y)')) - ]); - }); - - it('editing both files independently in the same run', async() => { - await executeAndCompareResults([ - file('a.R', lines('x <- 1', 'print(x)'), lines('x <- 2', 'x <- x * 3', 'print(x)')), - file('b.R', lines('y <- 10', 'print(y)'), lines('z <- 10', 'print(z + 1)')) - ]); - }); - - it('adding a new file while another file stays unchanged', async() => { - await executeAndCompareResults([ - file('a.R', lines('x <- 42', 'print(x)'), lines('x <- 42', 'print(x)')), - file('b.R', '', lines('helper <- function(x) {', '\tx * 2', '}', 'print(helper(21))')) - ]); - }); - - it('removing one file while another file stays unchanged', async() => { - await executeAndCompareResults([ - file('a.R', lines('x <- 42', 'print(x)'), lines('x <- 42', 'print(x)')), - file('b.R', lines('tmp <- 1', 'print(tmp)'), '') - ]); - }); - - it('mixing file modification, file addition, and file removal in one run', async() => { - await executeAndCompareResults([ - file('a.R', lines('x <- 1', 'print(x)'), lines('x <- 1', 'x <- x + 1', 'print(x)')), - file('b.R', '', lines('y <- 21', 'print(y)')), - file('c.R', lines('obsolete <- TRUE', 'print(obsolete)'), '') - ]); - }); - - it('making one file invalid while another file remains unchanged and valid', async() => { - await executeAndCompareResults([ - file( - 'a.R', - lines('f <- function(x) {', '\tprint(x)', '}'), - lines('f <- function(x) {', '\tprint(x)') - ), - file('b.R', lines('y <- 21', 'print(y)'), lines('y <- 21', 'print(y)')) - ]); - }); - - it('editing UTF-8 content in one file while another file stays unchanged', async() => { - await executeAndCompareResults([ - file('a.R', lines('msg <- "äöü"', 'print(msg)'), lines('msg <- "äöü€"', 'print(msg)')), - file('b.R', lines('x <- 42', 'print(x)'), lines('x <- 42', 'print(x)')) - ]); - }); - - it('editing inside a nested construct in one file and at top level in another', async() => { - await executeAndCompareResults([ - file( - 'a.R', - lines('f <- function(x) {', '\ty <- x + 1', '\tprint(y)', '}'), - lines('f <- function(x) {', '\ty <- x * 2', '\tprint(y)', '}') - ), - file('b.R', lines('z <- 3', 'print(z)'), lines('z <- 3', 'z <- z + 1', 'print(z)')) - ]); - }); - - it('editing only one of two syntactically invalid files', async() => { - await executeAndCompareResults([ - file('a.R', 'print(', 'print(1)'), - file('b.R', 'x <-', 'x <-') - ]); - }); - - describe('pairwise successive states across files', () => { - it('pairwise sequence step 1 across files', async() => { - await executeAndCompareResults([ - file('a.R', '', 'x <- 1'), - file('b.R', '', 'y <- 2') - ]); - }); - - it('pairwise sequence step 2 across files', async() => { - await executeAndCompareResults([ - file('a.R', 'x <- 1', lines('x <- 1', 'print(x)')), - file('b.R', 'y <- 2', 'y <- 20') - ]); - }); - - it('pairwise sequence step 3 across files with temporary invalidity', async() => { - await executeAndCompareResults([ - file('a.R', lines('x <- 1', 'print(x)'), 'x <-'), - file('b.R', 'y <- 20', lines('f <- function() {', '\tprint(y)', '}', 'f()')) - ]); - }); - - it('pairwise sequence step 4 across files returning to valid syntax', async() => { - await executeAndCompareResults([ - file('a.R', 'x <-', lines('x <- 1', 'print(x)')), - file('b.R', lines('f <- function() {', '\tprint(y)', '}', 'f()'), lines('f <- function() {', '\tprint(y + 1)', '}', 'f()')) - ]); + )); }); }); }); From ea96a77637be0059a6ba09863c10ee23917dd391 Mon Sep 17 00:00:00 2001 From: Jonathan Riesland Date: Thu, 21 May 2026 21:34:31 +0200 Subject: [PATCH 20/20] feat-fix: apply feedback from code review --- src/documentation/wiki-analyzer.ts | 46 +- src/project/cache/flowr-analyzer-cache.ts | 2 +- src/project/cache/flowr-cache.ts | 31 +- .../flowr-analyzer-dependencies-context.ts | 2 +- .../context/flowr-analyzer-files-context.ts | 6 +- ...r-analyzer-incremental-analysis-context.ts | 19 +- .../context/flowr-analyzer-meta-context.ts | 2 +- src/project/context/flowr-file.ts | 2 +- .../incremental-parse/edit-computation.ts | 31 +- .../incremental-parse/incremental-parse.ts | 4 +- .../tree-sitter/tree-sitter-executor.ts | 7 +- .../incremental/incremental-parsing.test.ts | 1142 ++++++++--------- 12 files changed, 632 insertions(+), 662 deletions(-) diff --git a/src/documentation/wiki-analyzer.ts b/src/documentation/wiki-analyzer.ts index f1427c3e582..39fe7d174c8 100644 --- a/src/documentation/wiki-analyzer.ts +++ b/src/documentation/wiki-analyzer.ts @@ -497,28 +497,30 @@ In other words, this context only transports incremental handoff state between a ${section('Incremental Parsing', 4)} -Currently, the implemented use of this context is Tree-sitter's incremental parsing support. -When a file is represented by a mutable file provider such as ${ctx.link('FlowrInlineTextFile')} and its content is invalidated via -${ctx.linkM(FlowrInlineTextFile, 'invalidate', { codeFont: true, realNameWrapper: 'i' })}, -the analyzer receives a file invalidation event. -At that point, the incremental context only records the file path together with the old source text. -No edit region is computed eagerly during invalidation. - -After a successful parse-oriented analysis run, the analyzer cache stores the latest Tree-sitter parse trees in this context via -${ctx.linkM(FlowrAnalyzerIncrementalAnalysisContext, 'storeOldParseResults', { codeFont: true, realNameWrapper: 'i' })}. -This gives the next parse run access to the last completed parse snapshot for each file path. - -On the next parse run, Tree-sitter combines both pieces of information lazily: - -* the previous parse tree obtained from - ${ctx.linkM(FlowrAnalyzerIncrementalAnalysisContext, 'getOldParseResultOf', { codeFont: true, realNameWrapper: 'i' })} -* the old source text obtained from - ${ctx.linkM(FlowrAnalyzerIncrementalAnalysisContext, 'getAndRemoveOldContentOf', { codeFont: true, realNameWrapper: 'i' })} - -Using these together with the current file content, flowR computes a minimal ${ctx.link('Parser.Edit')} only when a new parse is actually requested. -If the file content did not change, the previous tree can be reused directly. -Otherwise, the edit is applied to the previous tree and Tree-sitter reparses incrementally instead of starting from scratch. -The stored old-content entry is consumed when it is used, so invalidation state only survives until the next relevant parse. +This context is used to exploit Tree-sitter's incremental parsing feature. +For one file, the incremental state follows a fixed lifecycle: + +1. After a successful parse-oriented analysis run, the analyzer cache stores the latest Tree-sitter parse tree via + ${ctx.linkM(FlowrAnalyzerIncrementalAnalysisContext, 'storeOldParseResults', { codeFont: true, realNameWrapper: 'i' })}. + This tree is the baseline for the next incremental parse of that file. +2. When a mutable file provider such as ${ctx.link('FlowrInlineTextFile')} is invalidated via + ${ctx.linkM(FlowrInlineTextFile, 'invalidate', { codeFont: true, realNameWrapper: 'i' })}, + the analyzer receives a file invalidation event and stores the file path together with the old source text. + If the same file is invalidated again before the next parse, this stored old text is intentionally **not** replaced: + the stored parse tree still belongs to the version from before the first invalidation, so the incremental parse must keep that matching old-content baseline. +3. When parsing is requested again, flowR retrieves + * the previous parse tree from + ${ctx.linkM(FlowrAnalyzerIncrementalAnalysisContext, 'getOldParseResultOf', { codeFont: true, realNameWrapper: 'i' })} + * the stored old source text from + ${ctx.linkM(FlowrAnalyzerIncrementalAnalysisContext, 'getOldContentOf', { codeFont: true, realNameWrapper: 'i' })} + + Using these together with the current file content, flowR computes a minimal ${ctx.link('Parser.Edit')} only when a new parse is actually requested. + If the file content did not change, the previous tree can be reused directly. + Otherwise, the edit is applied to the previous tree and Tree-sitter reparses incrementally instead of starting from scratch. +4. The stored old-content entry is removed when it is used because it belongs only to that previous parse snapshot. + After the new parse succeeds, the analyzer stores a new parse tree baseline. + A later invalidation must then be able to record a fresh old-content value that matches this new tree. + If the old-content entry were kept, later invalidations of the same file would not replace it, and the next incremental parse could compare the current file content against stale old text that no longer matches the stored previous tree. ${section('Incremental Dataflow', 4)} diff --git a/src/project/cache/flowr-analyzer-cache.ts b/src/project/cache/flowr-analyzer-cache.ts index 34f0ddd06dc..d540984b263 100644 --- a/src/project/cache/flowr-analyzer-cache.ts +++ b/src/project/cache/flowr-analyzer-cache.ts @@ -68,7 +68,7 @@ export class FlowrAnalyzerCache extends FlowrCache { - readonly type: InvalidationEventType.FileInvalidate; +/** + * Invalidation event for a single file identified by {@link filePath}. + * + * {@link oldContent} contains the file content from immediately before the + * change that triggered this event. In other words, it is the pre-change content + * for this invalidation, not necessarily the content from the last completed + * analysis run. + */ +export interface SingleFileInvalidationEvent { + readonly type: InvalidationEventType.SingleFileInvalidate; readonly oldContent: Content | undefined; readonly filePath: string; } export type InvalidationEvent = { type: InvalidationEventType.Full } - | FileContentInvalidateEvent; + | SingleFileInvalidationEvent; export type InvalidationEventHandler = (event: InvalidationEvent) => void; @@ -42,7 +61,7 @@ export abstract class FlowrCache implements InvalidationEventReceiver { /* we will update this as soon as we support incremental update patterns */ switch(type) { case InvalidationEventType.Full: - case InvalidationEventType.FileInvalidate: + case InvalidationEventType.SingleFileInvalidate: this.value = undefined; break; default: @@ -72,4 +91,4 @@ export abstract class FlowrCache implements InvalidationEventReceiver { return this.value; } -} \ No newline at end of file +} diff --git a/src/project/context/flowr-analyzer-dependencies-context.ts b/src/project/context/flowr-analyzer-dependencies-context.ts index 34c9a0a647b..75fc8f3cc9e 100644 --- a/src/project/context/flowr-analyzer-dependencies-context.ts +++ b/src/project/context/flowr-analyzer-dependencies-context.ts @@ -61,7 +61,7 @@ export class FlowrAnalyzerDependenciesContext extends AbstractFlowrAnalyzerConte case InvalidationEventType.Full: this.reset(); break; - case InvalidationEventType.FileInvalidate: + case InvalidationEventType.SingleFileInvalidate: // nothing to do break; default: diff --git a/src/project/context/flowr-analyzer-files-context.ts b/src/project/context/flowr-analyzer-files-context.ts index aae1af13fe2..7c941abb51f 100644 --- a/src/project/context/flowr-analyzer-files-context.ts +++ b/src/project/context/flowr-analyzer-files-context.ts @@ -167,7 +167,7 @@ export class FlowrAnalyzerFilesContext extends AbstractFlowrAnalyzerContext = new Map(); + private changedFilesWithOldContent: Map = new Map(); private oldParseResults: Map = new Map(); @@ -38,7 +41,7 @@ export class FlowrAnalyzerIncrementalAnalysisContext implements ReadOnlyFlowrAna this.oldParseResults = new Map(); } - handleFileInvalidate(filePath: FilePath, oldContent: string): void { + handleFileInvalidate(filePath: FilePath, oldContent: string | undefined): void { if(this.changedFilesWithOldContent.has(filePath)) { // If a file is changed multiple times since the last analysis, we only want to store the original old content as the old analysis results were computed with that. return; @@ -53,8 +56,8 @@ export class FlowrAnalyzerIncrementalAnalysisContext implements ReadOnlyFlowrAna case InvalidationEventType.Full: this.reset(); break; - case InvalidationEventType.FileInvalidate: - this.handleFileInvalidate(event.filePath, event.oldContent?.toString() ?? ''); + case InvalidationEventType.SingleFileInvalidate: + this.handleFileInvalidate(event.filePath, event.oldContent?.toString()); break; default: assertUnreachable(type); @@ -76,9 +79,11 @@ export class FlowrAnalyzerIncrementalAnalysisContext implements ReadOnlyFlowrAna return this.oldParseResults.get(filePath); } - public getAndRemoveOldContentOf(filePath: FilePath): string | undefined { - const oldContent = this.changedFilesWithOldContent.get(filePath); + public getOldContentOf(filePath: FilePath): string | undefined { + return this.changedFilesWithOldContent.get(filePath); + } + + public deleteOldContentOf(filePath: FilePath): void { this.changedFilesWithOldContent.delete(filePath); - return oldContent; } } diff --git a/src/project/context/flowr-analyzer-meta-context.ts b/src/project/context/flowr-analyzer-meta-context.ts index 38eb9a4d0bb..a5f211a4c6d 100644 --- a/src/project/context/flowr-analyzer-meta-context.ts +++ b/src/project/context/flowr-analyzer-meta-context.ts @@ -53,7 +53,7 @@ export class FlowrAnalyzerMetaContext implements ReadOnlyFlowrAnalyzerMetaContex case InvalidationEventType.Full: this.reset(); break; - case InvalidationEventType.FileInvalidate: + case InvalidationEventType.SingleFileInvalidate: // nothing to do break; default: diff --git a/src/project/context/flowr-file.ts b/src/project/context/flowr-file.ts index 137dc12473f..109638948c8 100644 --- a/src/project/context/flowr-file.ts +++ b/src/project/context/flowr-file.ts @@ -178,7 +178,7 @@ export abstract class FlowrFile { public readonly name = 'tree-sitter'; private readonly parser: Parser; + public readonly incremental = true; private static language: Parser.Language; - public incremental = true; /** * Initializes the underlying tree-sitter parser. This only needs to be called once globally. @@ -84,6 +84,11 @@ export class TreeSitterExecutor implements SyncParser { } const reparseInfo = computeReparseInfo(ctx, request.filePath); + // `computeReparseInfo` needs the stored old content to compute the edit against the + // previous tree. Once that snapshot has been consumed, drop it so a later invalidation + // can record a fresh old-content baseline for the next stored tree. + ctx.inc.deleteOldContentOf(request.filePath); + if(!reparseInfo) { // incremental parsing not possible return this.parser.parse(sourceCode); diff --git a/test/functionality/incremental/incremental-parsing.test.ts b/test/functionality/incremental/incremental-parsing.test.ts index 18689a4d8fc..eeb25f8d956 100644 --- a/test/functionality/incremental/incremental-parsing.test.ts +++ b/test/functionality/incremental/incremental-parsing.test.ts @@ -15,6 +15,7 @@ interface FileState { } interface IncrementalParsingScenario { + testLabel: string; initialFiles: readonly FileState[]; fileUpdates: readonly (readonly FileState[])[]; } @@ -183,43 +184,45 @@ async function executeFullParse(fileStates: readonly FileState[]): Promise { - const { analyzer, files } = await createAnalyzerForFiles(scenario.initialFiles); - const incrementalParseTracer = createIncrementalParseTracer(analyzer); - await analyzer.normalize(); - const currentFileStates: Map = new Map(scenario.initialFiles.map(fileState => [fileState.path, fileState.content])); - - try { - for(const updateStep of scenario.fileUpdates) { - const previousTrees = capturePreviousTrees(analyzer); - const previousFileStates = new Map(currentFileStates); - - applyUpdateStepToAnalyzer(files, updateStep); - applyUpdateStepToFileStates(currentFileStates, updateStep); - assertPipelineStateAfterUpdateStep(analyzer, updateStep); - - const changedPaths = changedPathsBetween(previousFileStates, currentFileStates); - const unchangedPaths = unchangedPathsBetween(previousFileStates, currentFileStates); - const { result: incrementalResult, incrementalParseCalls } = await incrementalParseTracer.trace( - async() => await analyzer.normalize() - ); - const reparsed = analyzer.peekParse() as ParseStepOutput; - assert(reparsed !== undefined, 'after parsing once more, the pipeline must contain the analysis results again'); - - assertChangedFilesUseIncrementalParse(changedPaths, previousTrees, incrementalParseCalls); - assertUnchangedFilesReusePreviousTrees(unchangedPaths, previousTrees, reparsed.files); - - const fileStatesFromMap = Array.from(currentFileStates, ([path, content]) => ({ path, content })); - const fullReparseResult = await executeFullParse(fileStatesFromMap); - assert.equal( - printNormalizedAstToMermaid(fullReparseResult), - printNormalizedAstToMermaid(incrementalResult), - 'The incremental parse result does not match the full parse result' - ); +function executeAndCompareScenario(scenario: IncrementalParsingScenario): void { + it(scenario.testLabel, async() => { + const { analyzer, files } = await createAnalyzerForFiles(scenario.initialFiles); + const incrementalParseTracer = createIncrementalParseTracer(analyzer); + await analyzer.normalize(); + const currentFileStates: Map = new Map(scenario.initialFiles.map(fileState => [fileState.path, fileState.content])); + + try { + for(const updateStep of scenario.fileUpdates) { + const previousTrees = capturePreviousTrees(analyzer); + const previousFileStates = new Map(currentFileStates); + + applyUpdateStepToAnalyzer(files, updateStep); + applyUpdateStepToFileStates(currentFileStates, updateStep); + assertPipelineStateAfterUpdateStep(analyzer, updateStep); + + const changedPaths = changedPathsBetween(previousFileStates, currentFileStates); + const unchangedPaths = unchangedPathsBetween(previousFileStates, currentFileStates); + const { result: incrementalResult, incrementalParseCalls } = await incrementalParseTracer.trace( + async() => await analyzer.normalize() + ); + const reparsed = analyzer.peekParse() as ParseStepOutput; + assert(reparsed !== undefined, 'after parsing once more, the pipeline must contain the analysis results again'); + + assertChangedFilesUseIncrementalParse(changedPaths, previousTrees, incrementalParseCalls); + assertUnchangedFilesReusePreviousTrees(unchangedPaths, previousTrees, reparsed.files); + + const fileStatesFromMap = Array.from(currentFileStates, ([path, content]) => ({ path, content })); + const fullReparseResult = await executeFullParse(fileStatesFromMap); + assert.equal( + printNormalizedAstToMermaid(fullReparseResult), + printNormalizedAstToMermaid(incrementalResult), + 'The incremental parse result does not match the full parse result' + ); + } + } finally { + incrementalParseTracer.restore(); } - } finally { - incrementalParseTracer.restore(); - } + }); } const lines = (...xs: string[]): string => xs.join('\n'); @@ -235,9 +238,11 @@ const file = ( const step = (...files: FileState[]): readonly FileState[] => files; const scenario = ( + testLabel: string, initialFiles: readonly FileState[], ...fileUpdates: readonly (readonly FileState[])[] ): IncrementalParsingScenario => ({ + testLabel, initialFiles, fileUpdates }); @@ -247,626 +252,555 @@ describe('Incremental Parsing produces same results as Full Parsing', () => { describe('one update set', () => { describe('single-file', () => { describe('no-op', () => { - it('empty file', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', '')], - step(file('a.R', '')) - )); - }); - - it('file with top-level content', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'x <- 42')], - step(file('a.R', 'x <- 42')) - )); - }); - - it('file with nested content', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', lines('f <- function(x) {', '\ty <- x + 1', '\tprint(y)', '}'))], - step(file('a.R', lines('f <- function(x) {', '\ty <- x + 1', '\tprint(y)', '}'))) - )); - }); - - it('syntactically invalid file', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'print(')], - step(file('a.R', 'print(')) - )); - }); + executeAndCompareScenario(scenario( + 'empty file', + [file('a.R', '')], + step(file('a.R', '')) + )); + + executeAndCompareScenario(scenario( + 'file with top-level content', + [file('a.R', 'x <- 42')], + step(file('a.R', 'x <- 42')) + )); + + executeAndCompareScenario(scenario( + 'file with nested content', + [file('a.R', lines('f <- function(x) {', '\ty <- x + 1', '\tprint(y)', '}'))], + step(file('a.R', lines('f <- function(x) {', '\ty <- x + 1', '\tprint(y)', '}'))) + )); + + executeAndCompareScenario(scenario( + 'syntactically invalid file', + [file('a.R', 'print(')], + step(file('a.R', 'print(')) + )); }); describe('insert', () => { - it('one full line into an empty file', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', '')], - step(file('a.R', 'x <- 42')) - )); - }); - - it('one full line at the start of a file', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'x <- 42')], - step(file('a.R', lines('y <- 21', 'x <- 42'))) - )); - }); - - it('one full line in the middle of a file', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', lines('x <- 42', 'print(x)'))], - step(file('a.R', lines('x <- 42', 'x <- 2 * x', 'print(x)'))) - )); - }); - - it('one full line at the end of a file', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'x <- 42')], - step(file('a.R', lines('x <- 42', 'print(x)'))) - )); - }); - - it('multiple lines into an empty file', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', '')], - step(file('a.R', lines('x <- 42', 'y <- 21', 'z <- 10'))) - )); - }); - - it('multiple lines at different positions', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', lines('x <- 42', 'print(x)'))], - step(file('a.R', lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'))) - )); - }); - - it('a single character inside a number', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'x <- 42')], - step(file('a.R', 'x <- 420')) - )); - }); - - it('a single character inside an identifier', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'x <- 42')], - step(file('a.R', 'xy <- 42')) - )); - }); - - it('a token inside an expression', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'x <- 1 + 2')], - step(file('a.R', 'x <- 1 + 2 + 3')) - )); - }); - - it('a token inside a nested argument list', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'print(sum(1, 3))')], - step(file('a.R', 'print(sum(1, 2, 3))')) - )); - }); - - it('a trailing newline at end of file', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'x <- 42')], - step(file('a.R', 'x <- 42\n')) - )); - }); + executeAndCompareScenario(scenario( + 'one full line into an empty file', + [file('a.R', '')], + step(file('a.R', 'x <- 42')) + )); + + executeAndCompareScenario(scenario( + 'one full line at the start of a file', + [file('a.R', 'x <- 42')], + step(file('a.R', lines('y <- 21', 'x <- 42'))) + )); + + executeAndCompareScenario(scenario( + 'one full line in the middle of a file', + [file('a.R', lines('x <- 42', 'print(x)'))], + step(file('a.R', lines('x <- 42', 'x <- 2 * x', 'print(x)'))) + )); + + executeAndCompareScenario(scenario( + 'one full line at the end of a file', + [file('a.R', 'x <- 42')], + step(file('a.R', lines('x <- 42', 'print(x)'))) + )); + + executeAndCompareScenario(scenario( + 'multiple lines into an empty file', + [file('a.R', '')], + step(file('a.R', lines('x <- 42', 'y <- 21', 'z <- 10'))) + )); + + executeAndCompareScenario(scenario( + 'multiple lines at different positions', + [file('a.R', lines('x <- 42', 'print(x)'))], + step(file('a.R', lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'))) + )); + + executeAndCompareScenario(scenario( + 'a single character inside a number', + [file('a.R', 'x <- 42')], + step(file('a.R', 'x <- 420')) + )); + + executeAndCompareScenario(scenario( + 'a single character inside an identifier', + [file('a.R', 'x <- 42')], + step(file('a.R', 'xy <- 42')) + )); + + executeAndCompareScenario(scenario( + 'a token inside an expression', + [file('a.R', 'x <- 1 + 2')], + step(file('a.R', 'x <- 1 + 2 + 3')) + )); + + executeAndCompareScenario(scenario( + 'a token inside a nested argument list', + [file('a.R', 'print(sum(1, 3))')], + step(file('a.R', 'print(sum(1, 2, 3))')) + )); + + executeAndCompareScenario(scenario( + 'a trailing newline at end of file', + [file('a.R', 'x <- 42')], + step(file('a.R', 'x <- 42\n')) + )); }); describe('remove', () => { - it('one full line such that the file becomes empty', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'x <- 42')], - step(file('a.R', '')) - )); - }); - - it('one full line at the start of a file', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', lines('y <- 21', 'x <- 42'))], - step(file('a.R', 'x <- 42')) - )); - }); - - it('one full line in the middle of a file', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', lines('x <- 42', 'x <- 2 * x', 'print(x)'))], - step(file('a.R', lines('x <- 42', 'print(x)'))) - )); - }); - - it('one full line at the end of a file', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', lines('x <- 42', 'print(x)'))], - step(file('a.R', 'x <- 42')) - )); - }); - - it('multiple lines such that the file becomes empty', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', lines('x <- 42', 'y <- 21', 'z <- 10'))], - step(file('a.R', '')) - )); - }); - - it('multiple lines at different positions', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'))], - step(file('a.R', lines('x <- 42', 'print(x)'))) - )); - }); - - it('a single character from a number', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'x <- 420')], - step(file('a.R', 'x <- 42')) - )); - }); - - it('a single character from an identifier', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'xy <- 42')], - step(file('a.R', 'x <- 42')) - )); - }); - - it('a token from an expression', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'x <- 1 + 2 + 3')], - step(file('a.R', 'x <- 1 + 2')) - )); - }); - - it('a token from a nested argument list', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'print(sum(1, 2, 3))')], - step(file('a.R', 'print(sum(1, 3))')) - )); - }); - - it('a trailing newline at end of file', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'x <- 42\n')], - step(file('a.R', 'x <- 42')) - )); - }); + executeAndCompareScenario(scenario( + 'one full line such that the file becomes empty', + [file('a.R', 'x <- 42')], + step(file('a.R', '')) + )); + + executeAndCompareScenario(scenario( + 'one full line at the start of a file', + [file('a.R', lines('y <- 21', 'x <- 42'))], + step(file('a.R', 'x <- 42')) + )); + + executeAndCompareScenario(scenario( + 'one full line in the middle of a file', + [file('a.R', lines('x <- 42', 'x <- 2 * x', 'print(x)'))], + step(file('a.R', lines('x <- 42', 'print(x)'))) + )); + + executeAndCompareScenario(scenario( + 'one full line at the end of a file', + [file('a.R', lines('x <- 42', 'print(x)'))], + step(file('a.R', 'x <- 42')) + )); + + executeAndCompareScenario(scenario( + 'multiple lines such that the file becomes empty', + [file('a.R', lines('x <- 42', 'y <- 21', 'z <- 10'))], + step(file('a.R', '')) + )); + + executeAndCompareScenario(scenario( + 'multiple lines at different positions', + [file('a.R', lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'))], + step(file('a.R', lines('x <- 42', 'print(x)'))) + )); + + executeAndCompareScenario(scenario( + 'a single character from a number', + [file('a.R', 'x <- 420')], + step(file('a.R', 'x <- 42')) + )); + + executeAndCompareScenario(scenario( + 'a single character from an identifier', + [file('a.R', 'xy <- 42')], + step(file('a.R', 'x <- 42')) + )); + + executeAndCompareScenario(scenario( + 'a token from an expression', + [file('a.R', 'x <- 1 + 2 + 3')], + step(file('a.R', 'x <- 1 + 2')) + )); + + executeAndCompareScenario(scenario( + 'a token from a nested argument list', + [file('a.R', 'print(sum(1, 2, 3))')], + step(file('a.R', 'print(sum(1, 3))')) + )); + + executeAndCompareScenario(scenario( + 'a trailing newline at end of file', + [file('a.R', 'x <- 42\n')], + step(file('a.R', 'x <- 42')) + )); }); describe('replace', () => { - it('one full line at the start of a file', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', lines('y <- 21', 'x <- 42'))], - step(file('a.R', lines('x <- 84', 'x <- 42'))) - )); - }); - - it('one full line in the middle of a file', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', lines('x <- 42', 'x <- 2 * x', 'print(x)'))], - step(file('a.R', lines('x <- 42', 'y <- 21', 'print(x)'))) - )); - }); - - it('one full line at the end of a file', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', lines('x <- 42', 'print(x)'))], - step(file('a.R', lines('x <- 42', 'x <- x * x'))) - )); - }); - - it('a partially replaced multi-line region', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'))], - step(file('a.R', lines('y <- 21', 'x <- 21', 'y <- y * y', 'print(x)', 'print(y)'))) - )); - }); - - it('a fully replaced content', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'))], - step(file('a.R', lines('z <- 10', 'z <- z + 32', 'print(z)'))) - )); - }); - - it('a single character in a number', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'x <- 42')], - step(file('a.R', 'x <- 43')) - )); - }); - - it('an operator token', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'x <- 1 + 2')], - step(file('a.R', 'x <- 1 * 2')) - )); - }); - - it('an identifier token', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', lines('x <- 42', 'print(x)'))], - step(file('a.R', lines('value <- 42', 'print(value)'))) - )); - }); - - it('part of a single line expression', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'x <- (1 + 2) * 3')], - step(file('a.R', 'x <- (1 + 20) * 3')) - )); - }); - - it('whitespace only on a single line', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'x <- 42')], - step(file('a.R', 'x <- 42')) - )); - }); - - it('whitespace only across multiple lines', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', lines('f <- function(x) {', '\ty <- x + 1', '\tprint(y)', '}'))], - step(file('a.R', lines('f <- function(x) {', '\t', '\ty <- x + 1', '\tprint(y)', '}'))) - )); - }); - - it('comment text', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'x <- 42 # old comment')], - step(file('a.R', 'x <- 42 # new comment')) - )); - }); - - it('a string literal', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'msg <- "abc"')], - step(file('a.R', 'msg <- "abcd"')) - )); - }); - - it('a UTF-8 string literal', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'msg <- "äöü"')], - step(file('a.R', 'msg <- "äöü€"')) - )); - }); - - it('a UTF-8 comment', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'x <- 42 # gruß')], - step(file('a.R', 'x <- 42 # grüße €')) - )); - }); - }); + executeAndCompareScenario(scenario( + 'one full line at the start of a file', + [file('a.R', lines('y <- 21', 'x <- 42'))], + step(file('a.R', lines('x <- 84', 'x <- 42'))) + )); - describe('syntax transitions', () => { - it('valid to invalid by removing the right-hand side of an assignment', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'x <- 42')], - step(file('a.R', 'x <-')) - )); - }); - - it('valid to invalid by removing a closing brace', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', lines('f <- function(x) {', '\tprint(x)', '}'))], - step(file('a.R', lines('f <- function(x) {', '\tprint(x)'))) - )); - }); - - it('valid to invalid by removing a closing parenthesis', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'print(sum(1, 2))')], - step(file('a.R', 'print(sum(1, 2)')) - )); - }); - - it('invalid to valid by completing an assignment', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'x <-')], - step(file('a.R', 'x <- 42')) - )); - }); - - it('invalid to valid by restoring a closing brace', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', lines('f <- function(x) {', '\tprint(x)'))], - step(file('a.R', lines('f <- function(x) {', '\tprint(x)', '}'))) - )); - }); - - it('invalid to valid by restoring a closing parenthesis', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'print(sum(1, 2)')], - step(file('a.R', 'print(sum(1, 2))')) - )); - }); - - it('invalid to invalid across different incomplete forms', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'print(')], - step(file('a.R', 'function(,')) - )); - }); - }); + executeAndCompareScenario(scenario( + 'one full line in the middle of a file', + [file('a.R', lines('x <- 42', 'x <- 2 * x', 'print(x)'))], + step(file('a.R', lines('x <- 42', 'y <- 21', 'print(x)'))) + )); - describe('nested structures', () => { - it('inside a function body', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', lines('f <- function(x) {', '\ty <- x + 1', '\tprint(y)', '}'))], - step(file('a.R', lines('f <- function(x) {', '\ty <- x * 2', '\tprint(y)', '}'))) - )); - }); - - it('inside an if branch', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', lines('if (x > 0) {', '\ty <- 1', '}'))], - step(file('a.R', lines('if (x > 0) {', '\ty <- 1', '\tz <- 2', '}'))) - )); - }); - - it('inside a for loop body', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', lines('for (i in 1:3) {', '\tprint(i)', '}'))], - step(file('a.R', lines('for (i in 1:3) {', '\ttotal <- i + 1', '\tprint(total)', '}'))) - )); - }); - - it('inside a nested argument list', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'print(sum(1, 2, 3))')], - step(file('a.R', 'print(sum(1, 20, 3))')) - )); - }); - - it('inside nested brackets and subexpressions', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'x <- list(a = list(b = 1))')], - step(file('a.R', 'x <- list(a = list(b = 2))')) - )); - }); - }); + executeAndCompareScenario(scenario( + 'one full line at the end of a file', + [file('a.R', lines('x <- 42', 'print(x)'))], + step(file('a.R', lines('x <- 42', 'x <- x * x'))) + )); - }); + executeAndCompareScenario(scenario( + 'a partially replaced multi-line region', + [file('a.R', lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'))], + step(file('a.R', lines('y <- 21', 'x <- 21', 'y <- y * y', 'print(x)', 'print(y)'))) + )); - describe('multi-file', () => { - it('editing only the first file while the second file stays unchanged', async() => { - await executeAndCompareScenario(scenario( - [ - file('a.R', lines('x <- 42', 'print(x)')), - file('b.R', lines('y <- 21', 'print(y)')) - ], - step( - file('a.R', lines('x <- 42', 'x <- x + 1', 'print(x)')), - file('b.R', lines('y <- 21', 'print(y)')) - ) + executeAndCompareScenario(scenario( + 'a fully replaced content', + [file('a.R', lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'))], + step(file('a.R', lines('z <- 10', 'z <- z + 32', 'print(z)'))) )); - }); - it('editing only the second file while the first file stays unchanged', async() => { - await executeAndCompareScenario(scenario( - [ - file('a.R', lines('x <- 42', 'print(x)')), - file('b.R', lines('y <- 21', 'print(y)')) - ], - step( - file('a.R', lines('x <- 42', 'print(x)')), - file('b.R', lines('y <- 21', 'y <- y * 2', 'print(y)')) - ) + executeAndCompareScenario(scenario( + 'a single character in a number', + [file('a.R', 'x <- 42')], + step(file('a.R', 'x <- 43')) )); - }); - it('editing both files independently in the same run', async() => { - await executeAndCompareScenario(scenario( - [ - file('a.R', lines('x <- 1', 'print(x)')), - file('b.R', lines('y <- 10', 'print(y)')) - ], - step( - file('a.R', lines('x <- 2', 'x <- x * 3', 'print(x)')), - file('b.R', lines('z <- 10', 'print(z + 1)')) - ) + executeAndCompareScenario(scenario( + 'an operator token', + [file('a.R', 'x <- 1 + 2')], + step(file('a.R', 'x <- 1 * 2')) )); - }); - it('adding a new file while another file stays unchanged', async() => { - await executeAndCompareScenario(scenario( - [ - file('a.R', lines('x <- 42', 'print(x)')), - file('b.R', '') - ], - step( - file('a.R', lines('x <- 42', 'print(x)')), - file('b.R', lines('helper <- function(x) {', '\tx * 2', '}', 'print(helper(21))')) - ) + executeAndCompareScenario(scenario( + 'an identifier token', + [file('a.R', lines('x <- 42', 'print(x)'))], + step(file('a.R', lines('value <- 42', 'print(value)'))) )); - }); - it('removing one file while another file stays unchanged', async() => { - await executeAndCompareScenario(scenario( - [ - file('a.R', lines('x <- 42', 'print(x)')), - file('b.R', lines('tmp <- 1', 'print(tmp)')) - ], - step( - file('a.R', lines('x <- 42', 'print(x)')), - file('b.R', '') - ) + executeAndCompareScenario(scenario( + 'part of a single line expression', + [file('a.R', 'x <- (1 + 2) * 3')], + step(file('a.R', 'x <- (1 + 20) * 3')) )); - }); - it('mixing file modification, file addition, and file removal in one run', async() => { - await executeAndCompareScenario(scenario( - [ - file('a.R', lines('x <- 1', 'print(x)')), - file('b.R', ''), - file('c.R', lines('obsolete <- TRUE', 'print(obsolete)')) - ], - step( - file('a.R', lines('x <- 1', 'x <- x + 1', 'print(x)')), - file('b.R', lines('y <- 21', 'print(y)')), - file('c.R', '') - ) + executeAndCompareScenario(scenario( + 'whitespace only on a single line', + [file('a.R', 'x <- 42')], + step(file('a.R', 'x <- 42')) )); - }); - it('making one file invalid while another file remains unchanged and valid', async() => { - await executeAndCompareScenario(scenario( - [ - file('a.R', lines('f <- function(x) {', '\tprint(x)', '}')), - file('b.R', lines('y <- 21', 'print(y)')) - ], - step( - file('a.R', lines('f <- function(x) {', '\tprint(x)')), - file('b.R', lines('y <- 21', 'print(y)')) - ) + executeAndCompareScenario(scenario( + 'whitespace only across multiple lines', + [file('a.R', lines('f <- function(x) {', '\ty <- x + 1', '\tprint(y)', '}'))], + step(file('a.R', lines('f <- function(x) {', '\t', '\ty <- x + 1', '\tprint(y)', '}'))) )); - }); - it('editing UTF-8 content in one file while another file stays unchanged', async() => { - await executeAndCompareScenario(scenario( - [ - file('a.R', lines('msg <- "äöü"', 'print(msg)')), - file('b.R', lines('x <- 42', 'print(x)')) - ], - step( - file('a.R', lines('msg <- "äöü€"', 'print(msg)')), - file('b.R', lines('x <- 42', 'print(x)')) - ) + executeAndCompareScenario(scenario( + 'comment text', + [file('a.R', 'x <- 42 # old comment')], + step(file('a.R', 'x <- 42 # new comment')) )); - }); - it('editing inside a nested construct in one file and at top level in another', async() => { - await executeAndCompareScenario(scenario( - [ - file('a.R', lines('f <- function(x) {', '\ty <- x + 1', '\tprint(y)', '}')), - file('b.R', lines('z <- 3', 'print(z)')) - ], - step( - file('a.R', lines('f <- function(x) {', '\ty <- x * 2', '\tprint(y)', '}')), - file('b.R', lines('z <- 3', 'z <- z + 1', 'print(z)')) - ) + executeAndCompareScenario(scenario( + 'a string literal', + [file('a.R', 'msg <- "abc"')], + step(file('a.R', 'msg <- "abcd"')) + )); + + executeAndCompareScenario(scenario( + 'a UTF-8 string literal', + [file('a.R', 'msg <- "äöü"')], + step(file('a.R', 'msg <- "äöü€"')) )); - }); - it('editing only one of two syntactically invalid files', async() => { - await executeAndCompareScenario(scenario( - [ - file('a.R', 'print('), - file('b.R', 'x <-') - ], - step( - file('a.R', 'print(1)'), - file('b.R', 'x <-') - ) + executeAndCompareScenario(scenario( + 'a UTF-8 comment', + [file('a.R', 'x <- 42 # gruß')], + step(file('a.R', 'x <- 42 # grüße €')) )); }); - }); - }); + describe('syntax transitions', () => { + executeAndCompareScenario(scenario( + 'valid to invalid by removing the right-hand side of an assignment', + [file('a.R', 'x <- 42')], + step(file('a.R', 'x <-')) + )); - describe('multiple update sets', () => { - describe('single-file', () => { - it('keeps the cached pipeline across an empty update step before a later real edit', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'x <- 1')], - step(), - step(file('a.R', 'x <- 2')) + executeAndCompareScenario(scenario( + 'valid to invalid by removing a closing brace', + [file('a.R', lines('f <- function(x) {', '\tprint(x)', '}'))], + step(file('a.R', lines('f <- function(x) {', '\tprint(x)'))) )); - }); - it('reuses the previous tree for a no-op invalidation after a prior real edit', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'x <- 1')], - step(file('a.R', 'x <- 10')), - step(file('a.R', 'x <- 10')) + executeAndCompareScenario(scenario( + 'valid to invalid by removing a closing parenthesis', + [file('a.R', 'print(sum(1, 2))')], + step(file('a.R', 'print(sum(1, 2)')) )); - }); - it('handles multiple updates in one step whose final content matches the original content', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'x <- 1')], - step( - file('a.R', 'x <- 10'), - file('a.R', 'x <- 1') - ) + executeAndCompareScenario(scenario( + 'invalid to valid by completing an assignment', + [file('a.R', 'x <-')], + step(file('a.R', 'x <- 42')) )); - }); - it('recovers across valid, invalid, cached, and valid states on the same analyzer instance', async() => { - await executeAndCompareScenario(scenario( - [file('a.R', 'x <- 1')], - step(file('a.R', 'x <-')), - step(), - step(file('a.R', 'x <- 1')) + executeAndCompareScenario(scenario( + 'invalid to valid by restoring a closing brace', + [file('a.R', lines('f <- function(x) {', '\tprint(x)'))], + step(file('a.R', lines('f <- function(x) {', '\tprint(x)', '}'))) )); - }); - }); - describe('multi-file', () => { - it('keeps the cached pipeline on an empty step before changing only one file', async() => { - await executeAndCompareScenario(scenario( - [ - file('a.R', 'x <- 1'), - file('b.R', 'y <- 2') - ], - step(), - step( - file('a.R', 'x <- 10'), - file('b.R', 'y <- 2') - ) + executeAndCompareScenario(scenario( + 'invalid to valid by restoring a closing parenthesis', + [file('a.R', 'print(sum(1, 2)')], + step(file('a.R', 'print(sum(1, 2))')) )); - }); - it('handles a no-op invalidation for one file while another file changes in the next step', async() => { - await executeAndCompareScenario(scenario( - [ - file('a.R', 'x <- 1'), - file('b.R', 'y <- 2') - ], - step( - file('a.R', 'x <- 10'), - file('b.R', 'y <- 2') - ), - step( - file('a.R', 'x <- 10'), - file('b.R', 'y <- 20') - ) + executeAndCompareScenario(scenario( + 'invalid to invalid across different incomplete forms', + [file('a.R', 'print(')], + step(file('a.R', 'function(,')) )); }); - it('handles repeated updates to one file in a step while another file ends up truly changed', async() => { - await executeAndCompareScenario(scenario( - [ - file('a.R', 'x <- 1'), - file('b.R', 'y <- 2') - ], - step( - file('a.R', 'x <- 10'), - file('a.R', 'x <- 1'), - file('b.R', 'y <- 20') - ) + describe('nested structures', () => { + executeAndCompareScenario(scenario( + 'inside a function body', + [file('a.R', lines('f <- function(x) {', '\ty <- x + 1', '\tprint(y)', '}'))], + step(file('a.R', lines('f <- function(x) {', '\ty <- x * 2', '\tprint(y)', '}'))) + )); + + executeAndCompareScenario(scenario( + 'inside an if branch', + [file('a.R', lines('if (x > 0) {', '\ty <- 1', '}'))], + step(file('a.R', lines('if (x > 0) {', '\ty <- 1', '\tz <- 2', '}'))) + )); + + executeAndCompareScenario(scenario( + 'inside a for loop body', + [file('a.R', lines('for (i in 1:3) {', '\tprint(i)', '}'))], + step(file('a.R', lines('for (i in 1:3) {', '\ttotal <- i + 1', '\tprint(total)', '}'))) + )); + + executeAndCompareScenario(scenario( + 'inside a nested argument list', + [file('a.R', 'print(sum(1, 2, 3))')], + step(file('a.R', 'print(sum(1, 20, 3))')) )); - }); - it('switches which file changes across successive steps while the other is reused', async() => { - await executeAndCompareScenario(scenario( - [ - file('a.R', lines('x <- 1', 'print(x)')), - file('b.R', lines('y <- 2', 'print(y)')) - ], - step( - file('a.R', lines('x <- 10', 'print(x)')), - file('b.R', lines('y <- 2', 'print(y)')) - ), - step( - file('a.R', lines('x <- 10', 'print(x)')), - file('b.R', lines('y <- 20', 'print(y)')) - ) + executeAndCompareScenario(scenario( + 'inside nested brackets and subexpressions', + [file('a.R', 'x <- list(a = list(b = 1))')], + step(file('a.R', 'x <- list(a = list(b = 2))')) )); }); + + }); + + describe('multi-file', () => { + executeAndCompareScenario(scenario( + 'editing only the first file while the second file stays unchanged', + [ + file('a.R', lines('x <- 42', 'print(x)')), + file('b.R', lines('y <- 21', 'print(y)')) + ], + step( + file('a.R', lines('x <- 42', 'x <- x + 1', 'print(x)')), + file('b.R', lines('y <- 21', 'print(y)')) + ) + )); + + executeAndCompareScenario(scenario( + 'editing only the second file while the first file stays unchanged', + [ + file('a.R', lines('x <- 42', 'print(x)')), + file('b.R', lines('y <- 21', 'print(y)')) + ], + step( + file('a.R', lines('x <- 42', 'print(x)')), + file('b.R', lines('y <- 21', 'y <- y * 2', 'print(y)')) + ) + )); + + executeAndCompareScenario(scenario( + 'editing both files independently in the same run', + [ + file('a.R', lines('x <- 1', 'print(x)')), + file('b.R', lines('y <- 10', 'print(y)')) + ], + step( + file('a.R', lines('x <- 2', 'x <- x * 3', 'print(x)')), + file('b.R', lines('z <- 10', 'print(z + 1)')) + ) + )); + + executeAndCompareScenario(scenario( + 'adding a new file while another file stays unchanged', + [ + file('a.R', lines('x <- 42', 'print(x)')), + file('b.R', '') + ], + step( + file('a.R', lines('x <- 42', 'print(x)')), + file('b.R', lines('helper <- function(x) {', '\tx * 2', '}', 'print(helper(21))')) + ) + )); + + executeAndCompareScenario(scenario( + 'removing one file while another file stays unchanged', + [ + file('a.R', lines('x <- 42', 'print(x)')), + file('b.R', lines('tmp <- 1', 'print(tmp)')) + ], + step( + file('a.R', lines('x <- 42', 'print(x)')), + file('b.R', '') + ) + )); + + executeAndCompareScenario(scenario( + 'mixing file modification, file addition, and file removal in one run', + [ + file('a.R', lines('x <- 1', 'print(x)')), + file('b.R', ''), + file('c.R', lines('obsolete <- TRUE', 'print(obsolete)')) + ], + step( + file('a.R', lines('x <- 1', 'x <- x + 1', 'print(x)')), + file('b.R', lines('y <- 21', 'print(y)')), + file('c.R', '') + ) + )); + + executeAndCompareScenario(scenario( + 'making one file invalid while another file remains unchanged and valid', + [ + file('a.R', lines('f <- function(x) {', '\tprint(x)', '}')), + file('b.R', lines('y <- 21', 'print(y)')) + ], + step( + file('a.R', lines('f <- function(x) {', '\tprint(x)')), + file('b.R', lines('y <- 21', 'print(y)')) + ) + )); + + executeAndCompareScenario(scenario( + 'editing UTF-8 content in one file while another file stays unchanged', + [ + file('a.R', lines('msg <- "äöü"', 'print(msg)')), + file('b.R', lines('x <- 42', 'print(x)')) + ], + step( + file('a.R', lines('msg <- "äöü€"', 'print(msg)')), + file('b.R', lines('x <- 42', 'print(x)')) + ) + )); + + executeAndCompareScenario(scenario( + 'editing inside a nested construct in one file and at top level in another', + [ + file('a.R', lines('f <- function(x) {', '\ty <- x + 1', '\tprint(y)', '}')), + file('b.R', lines('z <- 3', 'print(z)')) + ], + step( + file('a.R', lines('f <- function(x) {', '\ty <- x * 2', '\tprint(y)', '}')), + file('b.R', lines('z <- 3', 'z <- z + 1', 'print(z)')) + ) + )); + + executeAndCompareScenario(scenario( + 'editing only one of two syntactically invalid files', + [ + file('a.R', 'print('), + file('b.R', 'x <-') + ], + step( + file('a.R', 'print(1)'), + file('b.R', 'x <-') + ) + )); + + }); + }); + + describe('multiple update sets', () => { + describe('single-file', () => { + executeAndCompareScenario(scenario( + 'keeps the cached pipeline across an empty update step before a later real edit', + [file('a.R', 'x <- 1')], + step(), + step(file('a.R', 'x <- 2')) + )); + + executeAndCompareScenario(scenario( + 'reuses the previous tree for a no-op invalidation after a prior real edit', + [file('a.R', 'x <- 1')], + step(file('a.R', 'x <- 10')), + step(file('a.R', 'x <- 10')) + )); + + executeAndCompareScenario(scenario( + 'handles multiple updates in one step whose final content matches the original content', + [file('a.R', 'x <- 1')], + step( + file('a.R', 'x <- 10'), + file('a.R', 'x <- 1') + ) + )); + + executeAndCompareScenario(scenario( + 'recovers across valid, invalid, cached, and valid states on the same analyzer instance', + [file('a.R', 'x <- 1')], + step(file('a.R', 'x <-')), + step(), + step(file('a.R', 'x <- 1')) + )); + }); + + describe('multi-file', () => { + executeAndCompareScenario(scenario( + 'keeps the cached pipeline on an empty step before changing only one file', + [ + file('a.R', 'x <- 1'), + file('b.R', 'y <- 2') + ], + step(), + step( + file('a.R', 'x <- 10'), + file('b.R', 'y <- 2') + ) + )); + + executeAndCompareScenario(scenario( + 'handles a no-op invalidation for one file while another file changes in the next step', + [ + file('a.R', 'x <- 1'), + file('b.R', 'y <- 2') + ], + step( + file('a.R', 'x <- 10'), + file('b.R', 'y <- 2') + ), + step( + file('a.R', 'x <- 10'), + file('b.R', 'y <- 20') + ) + )); + + executeAndCompareScenario(scenario( + 'handles repeated updates to one file in a step while another file ends up truly changed', + [ + file('a.R', 'x <- 1'), + file('b.R', 'y <- 2') + ], + step( + file('a.R', 'x <- 10'), + file('a.R', 'x <- 1'), + file('b.R', 'y <- 20') + ) + )); + + executeAndCompareScenario(scenario( + 'switches which file changes across successive steps while the other is reused', + [ + file('a.R', lines('x <- 1', 'print(x)')), + file('b.R', lines('y <- 2', 'print(y)')) + ], + step( + file('a.R', lines('x <- 10', 'print(x)')), + file('b.R', lines('y <- 2', 'print(y)')) + ), + step( + file('a.R', lines('x <- 10', 'print(x)')), + file('b.R', lines('y <- 20', 'print(y)')) + ) + )); }); }); });