simple-trainer.py 617 B

1234567891011121314151617181920212223242526272829303132
  1. import os
  2. import socket
  3. import sys
  4. import time
  5. # trainer.py
  6. from collections import Counter
  7. import ray
  8. num_cpus = int(sys.argv[1])
  9. ray.init(address=os.environ["ip_head"])
  10. print("Nodes in the Ray cluster:")
  11. print(ray.nodes())
  12. @ray.remote
  13. def f():
  14. time.sleep(1)
  15. return socket.gethostbyname(socket.gethostname())
  16. # The following takes one second (assuming that
  17. # ray was able to access all of the allocated nodes).
  18. for i in range(60):
  19. start = time.time()
  20. ip_addresses = ray.get([f.remote() for _ in range(num_cpus)])
  21. print(Counter(ip_addresses))
  22. end = time.time()
  23. print(end - start)