train_moderate.py 1.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. """Moderate cluster training
  2. This training run will start 32 workers on 32 nodes (including head node).
  3. Test owner: krfricke
  4. Acceptance criteria: Should run through and report final results.
  5. """
  6. import json
  7. import os
  8. import time
  9. import ray
  10. from xgboost_ray import RayParams
  11. from release_test_util import train_ray
  12. if __name__ == "__main__":
  13. ray.init(address="auto", runtime_env={"working_dir": os.path.dirname(__file__)})
  14. ray_params = RayParams(
  15. elastic_training=False,
  16. max_actor_restarts=2,
  17. num_actors=32,
  18. cpus_per_actor=4,
  19. gpus_per_actor=0,
  20. )
  21. start = time.time()
  22. train_ray(
  23. path="/data/classification.parquet",
  24. num_workers=None,
  25. num_boost_rounds=100,
  26. num_files=128,
  27. regression=False,
  28. use_gpu=False,
  29. ray_params=ray_params,
  30. xgboost_params=None,
  31. )
  32. taken = time.time() - start
  33. result = {
  34. "time_taken": taken,
  35. }
  36. test_output_json = os.environ.get("TEST_OUTPUT_JSON", "/tmp/train_moderate.json")
  37. with open(test_output_json, "wt") as f:
  38. json.dump(result, f)
  39. print("PASSED.")