diff options
| -rw-r--r-- | mesonbuild/ast/interpreter.py | 229 | ||||
| -rw-r--r-- | test cases/rewrite/1 basic/expected_dag.txt | 83 | ||||
| -rw-r--r-- | test cases/unit/120 rewrite/meson.build | 5 | ||||
| -rw-r--r-- | unittests/rewritetests.py | 21 |
4 files changed, 338 insertions, 0 deletions
diff --git a/mesonbuild/ast/interpreter.py b/mesonbuild/ast/interpreter.py index 6857987c3..e66decf60 100644 --- a/mesonbuild/ast/interpreter.py +++ b/mesonbuild/ast/interpreter.py @@ -23,6 +23,7 @@ from ..interpreterbase import ( Disabler, default_resolve_key, UnknownValue, + InterpreterObject, ) from ..interpreter import ( @@ -99,6 +100,56 @@ class IntrospectionBuildTarget(MesonInterpreterObject): kwargs: T.Dict[str, TYPE_var] node: FunctionNode +def is_ignored_edge(src: T.Union[BaseNode, UnknownValue]) -> bool: + return (isinstance(src, FunctionNode) and src.func_name.value not in {'files', 'get_variable'}) or isinstance(src, MethodNode) + +class DataflowDAG: + src_to_tgts: T.DefaultDict[T.Union[BaseNode, UnknownValue], T.Set[T.Union[BaseNode, UnknownValue]]] + tgt_to_srcs: T.DefaultDict[T.Union[BaseNode, UnknownValue], T.Set[T.Union[BaseNode, UnknownValue]]] + + def __init__(self) -> None: + self.src_to_tgts = defaultdict(set) + self.tgt_to_srcs = defaultdict(set) + + def add_edge(self, source: T.Union[BaseNode, UnknownValue], target: T.Union[BaseNode, UnknownValue]) -> None: + self.src_to_tgts[source].add(target) + self.tgt_to_srcs[target].add(source) + + # Returns all nodes in the DAG that are reachable from a node in `srcs`. + # In other words, A node `a` is part of the returned set exactly if data + # from `srcs` flows into `a`, directly or indirectly. + # Certain edges are ignored. + def reachable(self, srcs: T.Set[T.Union[BaseNode, UnknownValue]], reverse: bool) -> T.Set[T.Union[BaseNode, UnknownValue]]: + reachable = srcs.copy() + active = srcs.copy() + while active: + new: T.Set[T.Union[BaseNode, UnknownValue]] = set() + if reverse: + for tgt in active: + new.update(src for src in self.tgt_to_srcs[tgt] if not is_ignored_edge(src)) + else: + for src in active: + if is_ignored_edge(src): + continue + new.update(tgt for tgt in self.src_to_tgts[src]) + reachable.update(new) + active = new + return reachable + + # Returns all paths from src to target. + # Certain edges are ignored. + def find_all_paths(self, src: T.Union[BaseNode, UnknownValue], target: T.Union[BaseNode, UnknownValue]) -> T.List[T.List[T.Union[BaseNode, UnknownValue]]]: + queue = [(src, [src])] + paths = [] + while queue: + cur, path = queue.pop() + if cur == target: + paths.append(path) + if is_ignored_edge(cur): + continue + queue.extend((tgt, path + [tgt]) for tgt in self.src_to_tgts[cur]) + return paths + class AstInterpreter(InterpreterBase): def __init__(self, source_root: str, subdir: str, subproject: SubProject, subproject_dir: str, env: environment.Environment, visitors: T.Optional[T.List[AstVisitor]] = None): super().__init__(source_root, subdir, subproject, subproject_dir, env) @@ -106,6 +157,18 @@ class AstInterpreter(InterpreterBase): self.nesting: T.List[int] = [] self.cur_assignments: T.DefaultDict[str, T.List[T.Tuple[T.List[int], T.Union[BaseNode, UnknownValue]]]] = defaultdict(list) self.all_assignment_nodes: T.DefaultDict[str, T.List[AssignmentNode]] = defaultdict(list) + # dataflow_dag is an acyclic directed graph that contains an edge + # from one instance of `BaseNode` to another instance of `BaseNode` if + # data flows directly from one to the other. Example: If meson.build + # contains this: + # var = 'foo' + '123' + # executable(var, 'src.c') + # var = 'bar' + # dataflow_dag will contain an edge from the IdNode corresponding to + # 'var' in line 2 to the ArithmeticNode corresponding to 'foo' + '123'. + # This graph is crucial for e.g. node_to_runtime_value because we have + # to know that 'var' in line2 is 'foo123' and not 'bar'. + self.dataflow_dag = DataflowDAG() self.assign_vals: T.Dict[str, T.Any] = {} self.funcs.update({'project': self.func_do_nothing, 'test': self.func_do_nothing, @@ -361,6 +424,8 @@ class AstInterpreter(InterpreterBase): self.cur_assignments[var_name] = [(nesting, v) for (nesting, v) in self.cur_assignments[var_name] if len(nesting) <= len(self.nesting)] if len(potential_values) > 1 or (len(potential_values) > 0 and oldval is None): uv = UnknownValue() + for pv in potential_values: + self.dataflow_dag.add_edge(pv, uv) self.cur_assignments[var_name].append((self.nesting.copy(), uv)) def get_cur_value(self, var_name: str, allow_none: bool = False) -> T.Union[BaseNode, UnknownValue, None]: @@ -379,6 +444,144 @@ class AstInterpreter(InterpreterBase): def get_variable(self, varname: str) -> int: return 0 + # The function `node_to_runtime_value` takes a node of the ast as an + # argument and tries to return the same thing that would be passed to e.g. + # `func_message` if you put `message(node)` in your `meson.build` file and + # run `meson setup`. If this is not possible, `UnknownValue()` is returned. + # There are 3 Reasons why this is sometimes impossible: + # 1. Because the meson rewriter is imperfect and has not implemented everything yet + # 2. Because the value is different on different machines, example: + # ```meson + # node = somedep.found() + # message(node) + # ``` + # will print `true` on some machines and `false` on others, so + # `node_to_runtime_value` does not know whether to return `true` or + # `false` and will return `UnknownValue()`. + # 3. Here: + # ```meson + # foreach x : [1, 2] + # node = x + # message(node) + # endforeach + # ``` + # `node_to_runtime_value` does not know whether to return `1` or `2` and + # will return `UnknownValue()`. + # + # If you have something like + # ``` + # node = [123, somedep.found()] + # ``` + # `node_to_runtime_value` will return `[123, UnknownValue()]`. + def node_to_runtime_value(self, node: T.Union[UnknownValue, BaseNode, TYPE_var]) -> T.Any: + if isinstance(node, (mparser.StringNode, mparser.BooleanNode, mparser.NumberNode)): + return node.value + elif isinstance(node, mparser.StringNode): + if node.is_fstring: + return UnknownValue() + else: + return node.value + elif isinstance(node, list): + return [self.node_to_runtime_value(x) for x in node] + elif isinstance(node, ArrayNode): + return [self.node_to_runtime_value(x) for x in node.args.arguments] + elif isinstance(node, mparser.DictNode): + return {self.node_to_runtime_value(k): self.node_to_runtime_value(v) for k, v in node.args.kwargs.items()} + elif isinstance(node, IdNode): + assert len(self.dataflow_dag.tgt_to_srcs[node]) == 1 + val = next(iter(self.dataflow_dag.tgt_to_srcs[node])) + return self.node_to_runtime_value(val) + elif isinstance(node, (MethodNode, FunctionNode)): + return UnknownValue() + elif isinstance(node, ArithmeticNode): + left = self.node_to_runtime_value(node.left) + right = self.node_to_runtime_value(node.right) + if isinstance(left, list) and isinstance(right, UnknownValue): + return left + [right] + if isinstance(right, list) and isinstance(left, UnknownValue): + return [left] + right + if isinstance(left, UnknownValue) or isinstance(right, UnknownValue): + return UnknownValue() + if node.operation == 'add': + if isinstance(left, dict) and isinstance(right, dict): + ret = left.copy() + for k, v in right.items(): + ret[k] = v + return ret + if isinstance(left, list): + if not isinstance(right, list): + right = [right] + return left + right + return left + right + elif node.operation == 'sub': + return left - right + elif node.operation == 'mul': + return left * right + elif node.operation == 'div': + if isinstance(left, int) and isinstance(right, int): + return left // right + elif isinstance(left, str) and isinstance(right, str): + return os.path.join(left, right).replace('\\', '/') + elif node.operation == 'mod': + if isinstance(left, int) and isinstance(right, int): + return left % right + elif isinstance(node, (UnknownValue, IntrospectionBuildTarget, IntrospectionDependency, str, bool, int)): + return node + elif isinstance(node, mparser.IndexNode): + iobject = self.node_to_runtime_value(node.iobject) + index = self.node_to_runtime_value(node.index) + if isinstance(iobject, UnknownValue) or isinstance(index, UnknownValue): + return UnknownValue() + return iobject[index] + elif isinstance(node, mparser.ComparisonNode): + left = self.node_to_runtime_value(node.left) + right = self.node_to_runtime_value(node.right) + if isinstance(left, UnknownValue) or isinstance(right, UnknownValue): + return UnknownValue() + if node.ctype == '==': + return left == right + elif node.ctype == '!=': + return left != right + elif node.ctype == 'in': + return left in right + elif node.ctype == 'notin': + return left not in right + elif isinstance(node, mparser.TernaryNode): + cond = self.node_to_runtime_value(node.condition) + if isinstance(cond, UnknownValue): + return UnknownValue() + if cond is True: + return self.node_to_runtime_value(node.trueblock) + if cond is False: + return self.node_to_runtime_value(node.falseblock) + elif isinstance(node, mparser.OrNode): + left = self.node_to_runtime_value(node.left) + right = self.node_to_runtime_value(node.right) + if isinstance(left, UnknownValue) or isinstance(right, UnknownValue): + return UnknownValue() + return left or right + elif isinstance(node, mparser.AndNode): + left = self.node_to_runtime_value(node.left) + right = self.node_to_runtime_value(node.right) + if isinstance(left, UnknownValue) or isinstance(right, UnknownValue): + return UnknownValue() + return left and right + elif isinstance(node, mparser.UMinusNode): + val = self.node_to_runtime_value(node.value) + if isinstance(val, UnknownValue): + return val + if isinstance(val, (int, float)): + return -val + elif isinstance(node, mparser.NotNode): + val = self.node_to_runtime_value(node.value) + if isinstance(val, UnknownValue): + return val + if isinstance(val, bool): + return not val + elif isinstance(node, mparser.ParenthesizedNode): + return self.node_to_runtime_value(node.inner) + raise mesonlib.MesonBugException('Unhandled node type') + def assignment(self, node: AssignmentNode) -> None: assert isinstance(node, AssignmentNode) self.cur_assignments[node.var_name.value].append((self.nesting.copy(), node.value)) @@ -397,8 +600,18 @@ class AstInterpreter(InterpreterBase): self.cur_assignments[node.var_name.value].append((self.nesting.copy(), newval)) self.all_assignment_nodes[node.var_name.value].append(node) + self.dataflow_dag.add_edge(lhs, newval) + self.dataflow_dag.add_edge(node.value, newval) + self.assign_vals[node.var_name.value] = self.evaluate_statement(node.value) + def func_get_variable(self, node: BaseNode, args: T.List[TYPE_var], kwargs: T.Dict[str, TYPE_var]) -> None: + assert isinstance(node, FunctionNode) + var_name = args[0] + assert isinstance(var_name, str) + val = self.get_cur_value(var_name) + self.dataflow_dag.add_edge(val, node) + def resolve_node(self, node: BaseNode, include_unknown_args: bool = False, id_loop_detect: T.Optional[T.List[str]] = None) -> T.Optional[T.Any]: def quick_resolve(n: BaseNode, loop_detect: T.Optional[T.List[str]] = None) -> T.Any: if loop_detect is None: @@ -511,3 +724,19 @@ class AstInterpreter(InterpreterBase): def evaluate_testcase(self, node: TestCaseClauseNode) -> Disabler | None: return Disabler(subproject=self.subproject) + + def evaluate_statement(self, cur: mparser.BaseNode) -> T.Optional[InterpreterObject]: + if hasattr(cur, 'args'): + for arg in cur.args.arguments: + self.dataflow_dag.add_edge(arg, cur) + for k, v in cur.args.kwargs.items(): + self.dataflow_dag.add_edge(v, cur) + for attr in ['source_object', 'left', 'right', 'items', 'iobject', 'index', 'condition']: + if hasattr(cur, attr): + assert isinstance(getattr(cur, attr), mparser.BaseNode) + self.dataflow_dag.add_edge(getattr(cur, attr), cur) + if isinstance(cur, mparser.IdNode): + self.dataflow_dag.add_edge(self.get_cur_value(cur.value), cur) + return None + else: + return super().evaluate_statement(cur) diff --git a/test cases/rewrite/1 basic/expected_dag.txt b/test cases/rewrite/1 basic/expected_dag.txt new file mode 100644 index 000000000..abc04fc31 --- /dev/null +++ b/test cases/rewrite/1 basic/expected_dag.txt @@ -0,0 +1,83 @@ +Data flowing to FunctionNode(1:0): + StringNode(1:8) + StringNode(1:23) +Data flowing to ArrayNode(3:7): + StringNode(3:8) + StringNode(3:20) +Data flowing to FunctionNode(4:7): + ArrayNode(4:13) +Data flowing to ArrayNode(4:13): + StringNode(4:14) + StringNode(4:27) +Data flowing to IdNode(5:7): + ArrayNode(3:7) +Data flowing to ArrayNode(6:7): + IdNode(6:8) +Data flowing to IdNode(6:8): + IdNode(5:7) +Data flowing to FunctionNode(10:7): + StringNode(10:18) + ArithmeticNode(10:34) +Data flowing to ArithmeticNode(10:34): + IdNode(10:34) + IdNode(10:41) +Data flowing to IdNode(10:34): + ArrayNode(3:7) +Data flowing to IdNode(10:41): + FunctionNode(4:7) +Data flowing to FunctionNode(11:7): + StringNode(11:18) + IdNode(11:34) +Data flowing to IdNode(11:34): + ArrayNode(3:7) +Data flowing to FunctionNode(12:7): + StringNode(12:18) + ArrayNode(12:34) +Data flowing to ArrayNode(12:34): + IdNode(12:35) +Data flowing to IdNode(12:35): + FunctionNode(4:7) +Data flowing to FunctionNode(13:7): + StringNode(13:18) + ArrayNode(13:34) +Data flowing to ArrayNode(13:34): + StringNode(13:35) + StringNode(13:47) +Data flowing to FunctionNode(14:7): + StringNode(14:18) + ArrayNode(14:34) +Data flowing to ArrayNode(14:34): + StringNode(14:35) + ArrayNode(14:47) +Data flowing to ArrayNode(14:47): + StringNode(14:48) +Data flowing to FunctionNode(15:7): + StringNode(15:18) + ArrayNode(15:34) +Data flowing to ArrayNode(15:34): + IdNode(15:35) + StringNode(15:41) +Data flowing to IdNode(15:35): + FunctionNode(4:7) +Data flowing to FunctionNode(16:7): + StringNode(16:18) + StringNode(16:34) + StringNode(16:46) +Data flowing to FunctionNode(17:7): + StringNode(17:18) + StringNode(17:34) + IdNode(17:47) + StringNode(17:53) +Data flowing to IdNode(17:47): + ArrayNode(3:7) +Data flowing to FunctionNode(18:7): + StringNode(18:18) + IdNode(18:34) +Data flowing to IdNode(18:34): + IdNode(5:7) +Data flowing to FunctionNode(19:0): + StringNode(19:11) + IdNode(19:27) +Data flowing to IdNode(19:27): + ArrayNode(6:7) + diff --git a/test cases/unit/120 rewrite/meson.build b/test cases/unit/120 rewrite/meson.build index 7d0330b9e..545bb0fde 100644 --- a/test cases/unit/120 rewrite/meson.build +++ b/test cases/unit/120 rewrite/meson.build @@ -62,6 +62,7 @@ cppcoro = declare_dependency( ) +cpp_compiler = meson.get_compiler('cpp') if get_option('unicode') #if comment #if comment 2 mfc=cpp_compiler.find_library(get_option('debug')?'mfc140ud':'mfc140u') @@ -80,6 +81,10 @@ assert(not (3 in [1, 2]), '''3 shouldn't be in [1, 2]''') assert('b' in ['a', 'b'], ''''b' should be in ['a', 'b']''') assert('c' not in ['a', 'b'], ''''c' shouldn't be in ['a', 'b']''') +exe1 = 'exe1' +exe2 = 'exe2' +exe3 = 'exe3' + assert(exe1 in [exe1, exe2], ''''exe1 should be in [exe1, exe2]''') assert(exe3 not in [exe1, exe2], ''''exe3 shouldn't be in [exe1, exe2]''') diff --git a/unittests/rewritetests.py b/unittests/rewritetests.py index 0bd21c74f..70a80347e 100644 --- a/unittests/rewritetests.py +++ b/unittests/rewritetests.py @@ -433,3 +433,24 @@ class RewriterTests(BasePlatformTests): } } self.assertDictEqual(out, expected) + + # Asserts that AstInterpreter.dataflow_dag is what it should be + def test_dataflow_dag(self): + test_path = Path(self.rewrite_test_dir, '1 basic') + interpreter = IntrospectionInterpreter(test_path, '', 'ninja', visitors = [AstIDGenerator()]) + interpreter.analyze() + + def sortkey(node): + return (node.lineno, node.colno, node.end_lineno, node.end_colno) + + def node_to_str(node): + return f"{node.__class__.__name__}({node.lineno}:{node.colno})" + + dag_as_str = "" + for target in sorted(interpreter.dataflow_dag.tgt_to_srcs.keys(), key=sortkey): + dag_as_str += f"Data flowing to {node_to_str(target)}:\n" + for source in sorted(interpreter.dataflow_dag.tgt_to_srcs[target], key=sortkey): + dag_as_str += f" {node_to_str(source)}\n" + + expected = Path(test_path / "expected_dag.txt").read_text().strip() + self.assertEqual(dag_as_str.strip(), expected) |
