123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384 |
- #!/usr/bin/env python
- #
- # This file contains utilities for understanding dependencies between python
- # source files and tests.
- #
- # Utils are assumed to be used from top level ray/ folder, since that is how
- # our tests are defined today.
- #
- # Example usage:
- # To find all circular dependencies under ray/python/:
- # python ci/pipeline/py_dep_analysis.py --mode=circular-dep
- # To find all the RLlib tests that depend on a file:
- # python ci/pipeline/py_dep_analysis.py --mode=test-dep \
- # --file=python/ray/tune/tune.py
- # For testing, add --smoke-test to any commands, so it doesn't spend
- # tons of time querying for available RLlib tests.
- import argparse
- import ast
- import os
- import re
- import subprocess
- import sys
- from typing import Dict, List, Tuple
- class DepGraph(object):
- def __init__(self):
- self.edges: Dict[str, Dict[str, bool]] = {}
- self.ids: Dict[str, int] = {}
- self.inv_ids: Dict[int, str] = {}
- def _run_shell(args: List[str]) -> str:
- return subprocess.check_output(args).decode(sys.stdout.encoding)
- def list_rllib_tests(n: int = -1, test: str = None) -> Tuple[str, List[str]]:
- """List RLlib tests.
- Args:
- n: return at most n tests. all tests if n = -1.
- test: only return information about a specific test.
- """
- tests_res = _run_shell(
- ["bazel", "query", "tests(//python/ray/rllib:*)", "--output", "label"]
- )
- all_tests = []
- # Strip, also skip any empty lines
- tests = [t.strip() for t in tests_res.splitlines() if t.strip()]
- for t in tests:
- if test and t != test:
- continue
- src_out = _run_shell(
- [
- "bazel",
- "query",
- 'kind("source file", deps({}))'.format(t),
- "--output",
- "label",
- ]
- )
- srcs = [f.strip() for f in src_out.splitlines()]
- srcs = [f for f in srcs if f.startswith("//python") and f.endswith(".py")]
- if srcs:
- all_tests.append((t, srcs))
- # Break early if smoke test.
- if n > 0 and len(all_tests) >= n:
- break
- return all_tests
- def _new_dep(graph: DepGraph, src_module: str, dep: str):
- """Create a new dependency between src_module and dep."""
- if dep not in graph.ids:
- graph.ids[dep] = len(graph.ids)
- src_id = graph.ids[src_module]
- dep_id = graph.ids[dep]
- if src_id not in graph.edges:
- graph.edges[src_id] = {}
- graph.edges[src_id][dep_id] = True
- def _new_import(graph: DepGraph, src_module: str, dep_module: str):
- """Process a new import statement in src_module."""
- # We don't care about system imports.
- if not dep_module.startswith("ray"):
- return
- _new_dep(graph, src_module, dep_module)
- def _is_path_module(module: str, name: str, _base_dir: str) -> bool:
- """Figure out if base.sub is a python module or not."""
- # Special handling for _raylet, which is a C++ lib.
- if module == "ray._raylet":
- return False
- bps = ["python"] + module.split(".")
- path = os.path.join(_base_dir, os.path.join(*bps), name + ".py")
- if os.path.isfile(path):
- return True # file module
- return False
- def _new_from_import(
- graph: DepGraph, src_module: str, dep_module: str, dep_name: str, _base_dir: str
- ):
- """Process a new "from ... import ..." statement in src_module."""
- # We don't care about imports outside of ray package.
- if not dep_module or not dep_module.startswith("ray"):
- return
- if _is_path_module(dep_module, dep_name, _base_dir):
- # dep_module.dep_name points to a file.
- _new_dep(graph, src_module, _full_module_path(dep_module, dep_name))
- else:
- # sub is an obj on base dir/file.
- _new_dep(graph, src_module, dep_module)
- def _process_file(graph: DepGraph, src_path: str, src_module: str, _base_dir=""):
- """Create dependencies from src_module to all the valid imports in src_path.
- Args:
- graph: the DepGraph to be added to.
- src_path: .py file to be processed.
- src_module: full module path of the source file.
- _base_dir: use a different base dir than current dir. For unit testing.
- """
- with open(os.path.join(_base_dir, src_path), "r") as in_f:
- tree = ast.parse(in_f.read())
- for node in ast.walk(tree):
- if isinstance(node, ast.Import):
- for alias in node.names:
- _new_import(graph, src_module, alias.name)
- elif isinstance(node, ast.ImportFrom):
- for alias in node.names:
- _new_from_import(
- graph, src_module, node.module, alias.name, _base_dir
- )
- def build_dep_graph() -> DepGraph:
- """Build index from py files to their immediate dependees."""
- graph = DepGraph()
- # Assuming we run from root /ray directory.
- # Follow links since rllib is linked to /rllib.
- for root, sub_dirs, files in os.walk("python", followlinks=True):
- if _should_skip(root):
- continue
- module = _bazel_path_to_module_path(root)
- # Process files first.
- for f in files:
- if not f.endswith(".py"):
- continue
- full = _full_module_path(module, f)
- if full not in graph.ids:
- graph.ids[full] = len(graph.ids)
- # Process file:
- _process_file(graph, os.path.join(root, f), full)
- # Build reverse index for convenience.
- graph.inv_ids = {v: k for k, v in graph.ids.items()}
- return graph
- def _full_module_path(module, f) -> str:
- if f == "__init__.py":
- # __init__ file for this module.
- # Full path is the same as the module name.
- return module
- fn = re.sub(r"\.py$", "", f)
- if not module:
- return fn
- return module + "." + fn
- def _should_skip(d: str) -> bool:
- """Skip directories that should not contain py sources."""
- if d.startswith("python/.eggs/"):
- return True
- if d.startswith("python/."):
- return True
- if d.startswith("python/build"):
- return True
- if d.startswith("python/ray/cpp"):
- return True
- return False
- def _bazel_path_to_module_path(d: str) -> str:
- """Convert a Bazel file path to python module path.
- Example: //python/ray/rllib:xxx/yyy/dd -> ray.rllib.xxx.yyy.dd
- """
- # Do this in 3 steps, so all of 'python:', 'python/', or '//python', etc
- # will get stripped.
- d = re.sub(r"^\/\/", "", d)
- d = re.sub(r"^python", "", d)
- d = re.sub(r"^[\/:]", "", d)
- return d.replace("/", ".").replace(":", ".")
- def _file_path_to_module_path(f: str) -> str:
- """Return the corresponding module path for a .py file."""
- dir, fn = os.path.split(f)
- return _full_module_path(_bazel_path_to_module_path(dir), fn)
- def _depends(
- graph: DepGraph, visited: Dict[int, bool], tid: int, qid: int
- ) -> List[int]:
- """Whether there is a dependency path from module tid to module qid.
- Given graph, and without going through visited.
- """
- if tid not in graph.edges or qid not in graph.edges:
- return []
- if qid in graph.edges[tid]:
- # tid directly depends on qid.
- return [tid, qid]
- for c in graph.edges[tid]:
- if c in visited:
- continue
- visited[c] = True
- # Reduce to a question of whether there is a path from c to qid.
- ds = _depends(graph, visited, c, qid)
- if ds:
- # From tid -> c -> qid.
- return [tid] + ds
- return []
- def test_depends_on_file(
- graph: DepGraph, test: Tuple[str, Tuple[str]], path: str
- ) -> List[int]:
- """Give dependency graph, check if a test depends on a specific .py file.
- Args:
- graph: the dependency graph.
- test: information about a test, in the format of:
- [test_name, (src files for the test)]
- """
- query = _file_path_to_module_path(path)
- if query not in graph.ids:
- # Not a file that we care about.
- return []
- t, srcs = test
- # Skip tuned_examples/ and examples/ tests.
- if t.startswith("//python/ray/rllib:examples/"):
- return []
- for src in srcs:
- if src == "ray.rllib.tests.run_regression_tests":
- return []
- tid = _file_path_to_module_path(src)
- if tid not in graph.ids:
- # Not a test that we care about.
- # TODO(jungong): What tests are these?????
- continue
- branch = _depends(graph, {}, graph.ids[tid], graph.ids[query])
- if branch:
- return branch
- # Does not depend on file.
- return []
- def _find_circular_dep_impl(graph: DepGraph, id: str, branch: str) -> bool:
- if id not in graph.edges:
- return False
- for c in graph.edges[id]:
- if c in branch:
- # Found a circle.
- branch.append(c)
- return True
- branch.append(c)
- if _find_circular_dep_impl(graph, c, branch):
- return True
- branch.pop()
- return False
- def find_circular_dep(graph: DepGraph) -> Dict[str, List[int]]:
- """Find circular dependencies among a dependency graph."""
- known = {}
- circles = {}
- for m, id in graph.ids.items():
- branch = []
- if _find_circular_dep_impl(graph, id, branch):
- if branch[-1] in known:
- # Already knew, skip.
- continue
- # Since this is a cycle dependency, any step along the circle
- # will form a different circle.
- # So we mark every entry on this circle known.
- for n in branch:
- known[n] = True
- # Mark that module m contains a potential circular dep.
- circles[m] = branch
- return circles
- if __name__ == "__main__":
- parser = argparse.ArgumentParser()
- parser.add_argument(
- "--mode",
- type=str,
- default="test-dep",
- help=(
- "test-dep: find dependencies for a specified test. "
- "circular-dep: find circular dependencies in "
- "the specific codebase."
- ),
- )
- parser.add_argument(
- "--file", type=str, help="Path of a .py source file relative to --base_dir."
- )
- parser.add_argument("--test", type=str, help="Specific test to check.")
- parser.add_argument(
- "--smoke-test", action="store_true", help="Load only a few tests for testing."
- )
- args = parser.parse_args()
- print("building dep graph ...")
- graph = build_dep_graph()
- print(
- "done. total {} files, {} of which have dependencies.".format(
- len(graph.ids), len(graph.edges)
- )
- )
- if args.mode == "circular-dep":
- circles = find_circular_dep(graph)
- print("Found following circular dependencies: \n")
- for m, b in circles.items():
- print(m)
- for n in b:
- print(" ", graph.inv_ids[n])
- print()
- if args.mode == "test-dep":
- assert args.file, "Must specify --file for the query."
- # Only support RLlib tests for now.
- # The way Tune tests are defined, they all depend on
- # the entire tune codebase.
- tests = list_rllib_tests(5 if args.smoke_test else -1, args.test)
- print("Total # of tests: ", len(tests))
- for t in tests:
- branch = test_depends_on_file(graph, t, args.file)
- if branch:
- print("{} depends on {}".format(t[0], args.file))
- # Print some debugging info.
- for n in branch:
- print(" ", graph.inv_ids[n])
- else:
- print("{} does not depend on {}".format(t[0], args.file))
|