monitor.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. '''Copyright The Microsoft DeepSpeed Team'''
  2. """
  3. Support different forms of monitoring such as wandb and tensorboard
  4. """
  5. from abc import ABC, abstractmethod
  6. import deepspeed.comm as dist
  7. class Monitor(ABC):
  8. @abstractmethod
  9. def __init__(self, monitor_config):
  10. self.monitor_config = monitor_config
  11. @abstractmethod
  12. def write_events(self, event_list):
  13. pass
  14. from .wandb import WandbMonitor
  15. from .tensorboard import TensorBoardMonitor
  16. from .csv_monitor import csvMonitor
  17. class MonitorMaster(Monitor):
  18. def __init__(self, monitor_config):
  19. super().__init__(monitor_config)
  20. self.tb_monitor = None
  21. self.wandb_monitor = None
  22. self.csv_monitor = None
  23. self.enabled = monitor_config.enabled
  24. if dist.get_rank() == 0:
  25. if monitor_config.tensorboard.enabled:
  26. self.tb_monitor = TensorBoardMonitor(monitor_config.tensorboard)
  27. if monitor_config.wandb.enabled:
  28. self.wandb_monitor = WandbMonitor(monitor_config.wandb)
  29. if monitor_config.csv_monitor.enabled:
  30. self.csv_monitor = csvMonitor(monitor_config.csv_monitor)
  31. def write_events(self, event_list):
  32. if dist.get_rank() == 0:
  33. if self.tb_monitor is not None:
  34. self.tb_monitor.write_events(event_list)
  35. if self.wandb_monitor is not None:
  36. self.wandb_monitor.write_events(event_list)
  37. if self.csv_monitor is not None:
  38. self.csv_monitor.write_events(event_list)