diff --git a/src/control-flow/extract-cfg.ts b/src/control-flow/extract-cfg.ts index dcf39225c54..808eee50c32 100644 --- a/src/control-flow/extract-cfg.ts +++ b/src/control-flow/extract-cfg.ts @@ -17,7 +17,7 @@ import type { RAccess } from '../r-bridge/lang-4.x/ast/model/nodes/r-access'; import type { DataflowGraph } from '../dataflow/graph/graph'; import { getAllFunctionCallTargets } from '../dataflow/internal/linker'; import type { DataflowGraphVertexFunctionCall } from '../dataflow/graph/vertex'; -import { isFunctionCallVertex, isFunctionDefinitionVertex, VertexType } from '../dataflow/graph/vertex'; +import { isFunctionCallVertex, isFunctionDefinitionVertex } from '../dataflow/graph/vertex'; import type { RExpressionList } from '../r-bridge/lang-4.x/ast/model/nodes/r-expression-list'; import { type CfgExpressionVertex, CfgEdge, CfgVertex, @@ -33,8 +33,11 @@ import type { ReadOnlyFlowrAnalyzerContext } from '../project/context/flowr-anal import type { RIfThenElse } from '../r-bridge/lang-4.x/ast/model/nodes/r-if-then-else'; import type { StatefulFoldFunctions } from '../r-bridge/lang-4.x/ast/model/processing/stateful-fold'; import { foldAstStateful } from '../r-bridge/lang-4.x/ast/model/processing/stateful-fold'; +import type { RNode } from '../r-bridge/lang-4.x/ast/model/model'; import { RLoopConstructs } from '../r-bridge/lang-4.x/ast/model/model'; import { BuiltInProcName } from '../dataflow/environments/built-in-proc-name'; +import type { RBreak } from '../r-bridge/lang-4.x/ast/model/nodes/r-break'; +import { getOriginInDfg, OriginType } from '../dataflow/origin/dfg-get-origin'; type CfgDownState = [loop: boolean, fn: boolean]; @@ -76,6 +79,59 @@ const cfgFolds: StatefulFoldFunctions { + const functionCallWithDfg = cfgFunctionCallWithDataflow(dfg); + const originToCfgProcessor = { + [BuiltInProcName.Access]: (node: RNode, args: (ControlFlowInformation | typeof EmptyArgument)[], _: CfgDownState) => cfgAccess(node as RAccess, ensureCfg(args[0]), args.slice(1)), + [BuiltInProcName.Pipe]: (node: RNode, args: (ControlFlowInformation | typeof EmptyArgument)[], _: CfgDownState) => cfgBinaryOp(node as RBinaryOp, ensureCfg(args[0]), ensureCfg(args[1])), + [BuiltInProcName.ForLoop]: (node: RNode, args: (ControlFlowInformation | typeof EmptyArgument)[], _: CfgDownState) => cfgFor(node as RForLoop, ensureCfg(args[0]), ensureCfg(args[1]), ensureCfg(args[2])), + [BuiltInProcName.RepeatLoop]: (node: RNode, args: (ControlFlowInformation | typeof EmptyArgument)[], _: CfgDownState) => cfgRepeat(node as RRepeatLoop, ensureCfg(args[0])), + [BuiltInProcName.WhileLoop]: (node: RNode, args: (ControlFlowInformation | typeof EmptyArgument)[], _: CfgDownState) => cfgWhile(node as RWhileLoop, ensureCfg(args[0]), ensureCfg(args[1])), + [BuiltInProcName.Break]: (node: RNode, args: (ControlFlowInformation | typeof EmptyArgument)[], down: CfgDownState) => cfgBreak(node as RBreak, down), + [BuiltInProcName.Next]: (node: RNode, args: (ControlFlowInformation | typeof EmptyArgument)[], down: CfgDownState) => cfgNext(node as RBreak, down), + [BuiltInProcName.IfThenElse]: (node: RNode, args: (ControlFlowInformation | typeof EmptyArgument)[], _: CfgDownState) => cfgIfThenElse(node as RIfThenElse, ensureCfg(args[0]), ensureCfg(args[1]), args[2]), + [BuiltInProcName.Function]: (node: RNode, args: (ControlFlowInformation | typeof EmptyArgument)[], down: CfgDownState) => functionCallWithDfg(node as RFunctionCall, ensureCfg(args[0]), args.slice(1), down), + } as const; + + const withOrigin = (node: RNode, args: (ControlFlowInformation | typeof EmptyArgument)[], down: CfgDownState, defaultHandler: keyof typeof originToCfgProcessor) => { + const origin = getOriginInDfg(dfg, node.info.id); + + if(origin !== undefined && origin.length === 1 && origin[0].type === OriginType.BuiltInFunctionOrigin) { + const handler = originToCfgProcessor[origin[0].proc as keyof typeof originToCfgProcessor] ?? originToCfgProcessor[defaultHandler]; + return handler(node, args, down); + } + + return originToCfgProcessor[defaultHandler](node, args, down); + }; + + const newFolds: StatefulFoldFunctions = { + ...cfgFolds, + foldAccess: (node, name, access, down) => withOrigin(node, [name, ...access], down, BuiltInProcName.Access), + foldPipe: (op, lhs, rhs, down) => withOrigin(op, [lhs, rhs], down, BuiltInProcName.Pipe), + foldIfThenElse: (ifThenExpr, cond, then, otherwise, down) => withOrigin(ifThenExpr, [cond, then, otherwise ?? EmptyArgument], down, BuiltInProcName.IfThenElse), + loop: { + ...cfgFolds.loop, + foldFor: (loop, variable, vector, body, down) => withOrigin(loop, [variable, vector, body], down, BuiltInProcName.ForLoop), + foldRepeat: (loop, body, down) => withOrigin(loop, [body], down, BuiltInProcName.RepeatLoop), + foldWhile: (loop, condition, body, down) => withOrigin(loop, [condition, body], down, BuiltInProcName.WhileLoop), + foldBreak: (brk, down) => withOrigin(brk, [], down, BuiltInProcName.Break), + foldNext: (next, down) => withOrigin(next, [], down, BuiltInProcName.Next) + }, + functions: { + ...cfgFolds.functions, + foldFunctionCall: (call, name, args, down) => withOrigin(call, [name, ...args], down, BuiltInProcName.Function) + } + }; + + return newFolds; +} + const ignoreFunctDefCfgFolds: StatefulFoldFunctions = { ...cfgFolds, functions: { @@ -84,17 +140,6 @@ const ignoreFunctDefCfgFolds: StatefulFoldFunctions { - const newFolds = { - ...cfgFolds, - }; - newFolds.functions = { - ...cfgFolds.functions, - foldFunctionCall: cfgFunctionCallWithDataflow(dataflowGraph, newFolds) - }; - return newFolds; -} - /** * Given a normalized AST, this approximates the control flow graph of the program. * This view is different from the computation of the dataflow graph and may differ, @@ -217,7 +262,11 @@ function identifyMayStatementType(node: RNodeWithParent) { return node.info.role === RoleInParent.ExpressionListChild ? CfgVertexType.Statement : CfgVertexType.Expression; } -function cfgIfThenElse(ifNode: RNodeWithParent, condition: ControlFlowInformation, then: ControlFlowInformation, otherwise: ControlFlowInformation | undefined): ControlFlowInformation { +function cfgIfThenElse(ifNode: RNodeWithParent, condition: ControlFlowInformation, then: ControlFlowInformation, otherwise: ControlFlowInformation | undefined | typeof EmptyArgument): ControlFlowInformation { + if(otherwise === EmptyArgument) { + otherwise = undefined; + } + const ifId = ifNode.info.id; const graph = new ControlFlowGraph(); graph.addVertex(CfgVertex.makeExprOrStm(ifId, identifyMayStatementType(ifNode), { mid: condition.exitPoints, end: [CfgVertex.toExitId(ifId)] })); @@ -472,27 +521,8 @@ function cfgFunctionCall(call: RFunctionCall, name: ControlFl export const ResolvedCallSuffix = CfgVertex.toExitId('-resolved-call'); -const OriginToFoldTypeMap: Partial, call: RFunctionCall, args: (ControlFlowInformation | typeof EmptyArgument)[], down: CfgDownState, callVtx: DataflowGraphVertexFunctionCall) => ControlFlowInformation>> = { - [BuiltInProcName.IfThenElse]: (folds, call, args, down) => { - // arguments are in order! - return folds.foldIfThenElse( - call as RNodeWithParent as RIfThenElse, // we will have to this more sophisticated if we rewrite the dfg based generation - args[0] === EmptyArgument ? emptyControlFlowInformation() : args[0], - args[1] === EmptyArgument ? emptyControlFlowInformation() : args[1], - args[2] === EmptyArgument ? emptyControlFlowInformation() : args[2], - down - ); - } -}; -function cfgFunctionCallWithDataflow(graph: DataflowGraph, folds: StatefulFoldFunctions): typeof cfgFunctionCall { +function cfgFunctionCallWithDataflow(graph: DataflowGraph): typeof cfgFunctionCall { return (call: RFunctionCall, name: ControlFlowInformation, args: (ControlFlowInformation | typeof EmptyArgument)[], down: CfgDownState): ControlFlowInformation => { - const vtx = graph.getVertex(call.info.id); - if(vtx?.tag === VertexType.FunctionCall && vtx.onlyBuiltin && vtx.origin.length === 1) { - const mayMap = OriginToFoldTypeMap[vtx.origin[0] as BuiltInProcName]; - if(mayMap) { - return mayMap(folds, call, args, down, vtx); - } - } const baseCfg = cfgFunctionCall(call, name, args, down); /* try to resolve the call and link the target definitions */ diff --git a/src/control-flow/semantic-cfg-guided-visitor.ts b/src/control-flow/semantic-cfg-guided-visitor.ts index fb91e2de76a..6cdf25cf438 100644 --- a/src/control-flow/semantic-cfg-guided-visitor.ts +++ b/src/control-flow/semantic-cfg-guided-visitor.ts @@ -284,6 +284,8 @@ export class SemanticCfgGuidedVisitor< return this.onS7DispatchCall({ call }); case BuiltInProcName.Break: return this.onBreakCall({ call }); + case BuiltInProcName.Next: + return this.onNextCall({ call }); case BuiltInProcName.Return: return this.onReturnCall({ call }); case BuiltInProcName.Unnamed: @@ -683,7 +685,17 @@ export class SemanticCfgGuidedVisitor< * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * @protected */ - protected onBreakCall(_data: { call: DataflowGraphVertexFunctionCall }) {} + protected onBreakCall(_data: { call: DataflowGraphVertexFunctionCall }) { } + + /** + * This event triggers for every call to `next` in a loop. + * + * For example, this triggers for `next` in `repeat { next }`. + * + * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. + * @protected + */ + protected onNextCall(_data: { call: DataflowGraphVertexFunctionCall }) {} /** * This event triggers for every call to `return` to explicitly return a value in a function. diff --git a/src/dataflow/environments/built-in-proc-name.ts b/src/dataflow/environments/built-in-proc-name.ts index 998686db387..f2e521f5ef4 100644 --- a/src/dataflow/environments/built-in-proc-name.ts +++ b/src/dataflow/environments/built-in-proc-name.ts @@ -12,6 +12,8 @@ export enum BuiltInProcName { AssignmentLike = 'builtin:assignment-like', /** for `break` calls */ Break = 'builtin:break', + /**for next calls */ + Next = 'builtin:next', /** the default built-in processor, see {@link defaultBuiltInProcessor} */ Default = 'builtin:default', /** Just a more performant variant of the default processor for built-ins that need to read all their arguments, see {@link defaultBuiltInProcessor}, this will still produce the origin `BuiltIn.Default` */ @@ -86,4 +88,4 @@ export enum BuiltInProcName { Vector = 'builtin:vector', /** for `while` loops, see {@link processWhileLoop} */ WhileLoop = 'builtin:while-loop', -} \ No newline at end of file +} diff --git a/src/dataflow/environments/default-builtin-config.ts b/src/dataflow/environments/default-builtin-config.ts index 0d1ee9a1fc0..27535e30dc3 100644 --- a/src/dataflow/environments/default-builtin-config.ts +++ b/src/dataflow/environments/default-builtin-config.ts @@ -248,10 +248,10 @@ export const DefaultBuiltinConfig = [ assumePrimitive: false }, { type: 'function', names: ['try'], processor: BuiltInProcName.Try, config: { block: 'expr', handlers: {} }, assumePrimitive: true }, - { type: 'function', names: ['tryCatch', 'tryCatchLog'], processor: BuiltInProcName.Try, config: { block: 'expr', handlers: { error: 'error', finally: 'finally' } }, assumePrimitive: true }, - { type: 'function', names: ['stopifnot', 'assert_that'], processor: BuiltInProcName.StopIfNot, config: {}, assumePrimitive: false }, - { type: 'function', names: ['break'], processor: BuiltInProcName.Default, config: { useAsProcessor: BuiltInProcName.Break, cfg: ExitPointType.Break }, assumePrimitive: false }, - { type: 'function', names: ['next'], processor: BuiltInProcName.Default, config: { cfg: ExitPointType.Next }, assumePrimitive: false }, + { type: 'function', names: ['tryCatch', 'tryCatchLog'], processor: BuiltInProcName.Try, config: { block: 'expr', handlers: { error: 'error', finally: 'finally' } }, assumePrimitive: true }, + { type: 'function', names: ['stopifnot', 'assert_that'], processor: BuiltInProcName.StopIfNot, config: {}, assumePrimitive: false }, + { type: 'function', names: ['break'], processor: BuiltInProcName.Default, config: { useAsProcessor: BuiltInProcName.Break, cfg: ExitPointType.Break }, assumePrimitive: false }, + { type: 'function', names: ['next'], processor: BuiltInProcName.Default, config: { useAsProcessor: BuiltInProcName.Next, cfg: ExitPointType.Next }, assumePrimitive: false }, { type: 'function', names: ['{'], processor: BuiltInProcName.ExpressionList, config: {}, assumePrimitive: true }, { type: 'function', names: ['source'], processor: BuiltInProcName.Source, config: { includeFunctionCall: true, forceFollow: false }, assumePrimitive: false }, { type: 'function', names: ['[', '[['], processor: BuiltInProcName.Access, config: { treatIndicesAsString: false }, assumePrimitive: true }, diff --git a/src/r-bridge/lang-4.x/ast/model/nodes/r-access.ts b/src/r-bridge/lang-4.x/ast/model/nodes/r-access.ts index 5982b223437..dcd66eb5931 100644 --- a/src/r-bridge/lang-4.x/ast/model/nodes/r-access.ts +++ b/src/r-bridge/lang-4.x/ast/model/nodes/r-access.ts @@ -2,7 +2,7 @@ import type { RAstNodeBase, Location, NoInfo } from '../model'; import { RNode } from '../model'; import { RType } from '../type'; import type { RArgument, RUnnamedArgument } from './r-argument'; -import type { EmptyArgument } from './r-function-call'; +import type { EmptyArgument, EmptyArgument } from './r-function-call'; /** * Represents an R Indexing operation with `$`, `@`, `[[`, or `[`. @@ -54,5 +54,11 @@ export const RAccess = { */ isIndex(this: void, node: RNode | undefined): node is RIndexAccess { return RAccess.is(node) && (node.operator === '[' || node.operator === '[['); + }, + /** + * Desugar arguments from {@link StatefulFoldFunctions} from into plain array + */ + desugar(this: void, name: Arg, access: (Arg | typeof EmptyArgument)[]): (Arg | typeof EmptyArgument)[] { + return [name, ...access]; } -} as const; \ No newline at end of file +} as const; diff --git a/test/functionality/dataflow/main/loops/dataflow-for-loop.test.ts b/test/functionality/dataflow/main/loops/dataflow-for-loop.test.ts index c3a87faf997..5dcfc267865 100644 --- a/test/functionality/dataflow/main/loops/dataflow-for-loop.test.ts +++ b/test/functionality/dataflow/main/loops/dataflow-for-loop.test.ts @@ -233,7 +233,7 @@ print(x)`, emptyGraph() shell, `x <- 1 repeat { x <- 2; - if(foo) + if(foo) break } print(x)`, emptyGraph() @@ -288,7 +288,7 @@ print(x)`, emptyGraph() .call('7', '<-', [argumentInCall('5'), argumentInCall('6')], { origin: [BuiltInProcName.Assignment], returns: ['5'], reads: [NodeId.toBuiltIn('<-'), 6], onlyBuiltIn: true, environment: defaultEnv().defineVariable('x', '0', '2'), cds: [{ id: '15' }] }) .calls('7', NodeId.toBuiltIn('<-')) .argument('7', ['6', '5']) - .call('8', 'next', [], { origin: [BuiltInProcName.Default], returns: [], reads: [NodeId.toBuiltIn('next')], environment: defaultEnv().defineVariable('x', '5', '7'), cds: [{ id: '15' }] }) + .call('8', 'next', [], { origin: [BuiltInProcName.Next], returns: [], reads: [NodeId.toBuiltIn('next')], environment: defaultEnv().defineVariable('x', '5', '7'), cds: [{ id: '15' }] }) .calls('8', NodeId.toBuiltIn('next')) .argument('14', '7') .call('14', '{', [argumentInCall('7')], { origin: [BuiltInProcName.ExpressionList], returns: [], reads: [NodeId.toBuiltIn('{')], environment: defaultEnv().defineVariable('x', '0', '2'), cds: [{ id: '15' }] }) @@ -357,7 +357,7 @@ print(x)`, emptyGraph() shell, `x <- 1 for(i in 1:100) { x <- 2; - if(foo) + if(foo) break } print(x)`, emptyGraph() @@ -421,7 +421,7 @@ print(x)`, emptyGraph() .call('11', '<-', [argumentInCall('9', { cds: [] }), argumentInCall('10', { cds: [{ id: '19', when: true }] })], { returns: ['9'], reads: [NodeId.toBuiltIn('<-'), 10], onlyBuiltIn: true, cds: [{ id: '19', when: true }] }) .calls('11', NodeId.toBuiltIn('<-')) .argument('11', ['10', '9']) - .call('12', 'next', [], { returns: [], reads: [NodeId.toBuiltIn('next')], cds: [{ id: '19', when: true }], environment: defaultEnv().defineVariable('x', '9', '11', [{ id: '19', when: true }]) }) + .call('12', 'next', [], { origin: [BuiltInProcName.Next], returns: [], reads: [NodeId.toBuiltIn('next')], cds: [{ id: '19', when: true }], environment: defaultEnv().defineVariable('x', '9', '11', [{ id: '19', when: true }]) }) .calls('12', NodeId.toBuiltIn('next')) .argument('18', '11') .call('18', '{', [argumentInCall('11', { cds: [] })], { returns: [], reads: [NodeId.toBuiltIn('{')], cds: [{ id: '19', when: true }] }) @@ -489,7 +489,7 @@ print(x)`, emptyGraph() shell, `x <- 1 while(TRUE) { x <- 2; - if(foo) + if(foo) break } print(x)`, emptyGraph() @@ -544,7 +544,7 @@ print(x)`, emptyGraph() .call('8', '<-', [argumentInCall('6', { cds: [] }), argumentInCall('7', { cds: [{ id: '16', when: true }] })], { returns: ['6'], reads: [NodeId.toBuiltIn('<-'), 7], onlyBuiltIn: true, cds: [{ id: 16, when: true }], environment: defaultEnv().defineVariable('x', '0', '2') }) .calls('8', NodeId.toBuiltIn('<-')) .argument('8', ['7', '6']) - .call('9', 'next', [], { returns: [], reads: [NodeId.toBuiltIn('next')], cds: [{ id: 16, when: true }], environment: defaultEnv().defineVariable('x', '0', '2').defineVariable('x', '6', '8', [{ id: '16', when: true }]) }) + .call('9', 'next', [], { origin: [BuiltInProcName.Next], returns: [], reads: [NodeId.toBuiltIn('next')], cds: [{ id: 16, when: true }], environment: defaultEnv().defineVariable('x', '0', '2').defineVariable('x', '6', '8', [{ id: '16', when: true }]) }) .calls('9', NodeId.toBuiltIn('next')) .argument('15', '8') .call('15', '{', [argumentInCall('8', { cds: [{ id: 16, when: true }] })], { returns: [], reads: [NodeId.toBuiltIn('{')], cds: [{ id: 16, when: true }], environment: defaultEnv().defineVariable('x', '0', '2').defineVariable('x', '6', '8', [{ id: '16', when: true }]).defineVariable('x', '11', '13', []) })