monitor.py 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. # Copyright (c) Microsoft Corporation.
  2. # SPDX-License-Identifier: Apache-2.0
  3. # DeepSpeed Team
  4. """
  5. Support different forms of monitoring such as wandb and tensorboard
  6. """
  7. from abc import ABC, abstractmethod
  8. import deepspeed.comm as dist
  9. class Monitor(ABC):
  10. @abstractmethod
  11. def __init__(self, monitor_config):
  12. self.monitor_config = monitor_config
  13. @abstractmethod
  14. def write_events(self, event_list):
  15. pass
  16. from .wandb import WandbMonitor
  17. from .tensorboard import TensorBoardMonitor
  18. from .csv_monitor import csvMonitor
  19. class MonitorMaster(Monitor):
  20. def __init__(self, monitor_config):
  21. super().__init__(monitor_config)
  22. self.tb_monitor = None
  23. self.wandb_monitor = None
  24. self.csv_monitor = None
  25. self.enabled = monitor_config.enabled
  26. if dist.get_rank() == 0:
  27. if monitor_config.tensorboard.enabled:
  28. self.tb_monitor = TensorBoardMonitor(monitor_config.tensorboard)
  29. if monitor_config.wandb.enabled:
  30. self.wandb_monitor = WandbMonitor(monitor_config.wandb)
  31. if monitor_config.csv_monitor.enabled:
  32. self.csv_monitor = csvMonitor(monitor_config.csv_monitor)
  33. def write_events(self, event_list):
  34. if dist.get_rank() == 0:
  35. if self.tb_monitor is not None:
  36. self.tb_monitor.write_events(event_list)
  37. if self.wandb_monitor is not None:
  38. self.wandb_monitor.write_events(event_list)
  39. if self.csv_monitor is not None:
  40. self.csv_monitor.write_events(event_list)