setup_chaos.py 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. import argparse
  2. import ray
  3. from ray._private.test_utils import get_and_run_node_killer
  4. def parse_script_args():
  5. parser = argparse.ArgumentParser()
  6. parser.add_argument("--node-kill-interval", type=int, default=60)
  7. parser.add_argument("--max-nodes-to-kill", type=int, default=2)
  8. parser.add_argument(
  9. "--no-start",
  10. action="store_true",
  11. default=False,
  12. help=(
  13. "If set, node killer won't be starting to kill nodes when "
  14. "the script is done. Driver needs to manually "
  15. "obtain the node killer handle and invoke run method to "
  16. "start killing nodes. If not set, as soon as "
  17. "the script is done, nodes will be killed every "
  18. "--node-kill-interval seconds."
  19. ),
  20. )
  21. return parser.parse_known_args()
  22. def main():
  23. """Start the chaos testing.
  24. Currently chaos testing only covers random node failures.
  25. """
  26. args, _ = parse_script_args()
  27. ray.init(address="auto")
  28. get_and_run_node_killer(
  29. args.node_kill_interval,
  30. namespace="release_test_namespace",
  31. lifetime="detached",
  32. no_start=args.no_start,
  33. max_nodes_to_kill=args.max_nodes_to_kill,
  34. )
  35. print("Successfully deployed a node killer.")
  36. main()