deepspeed_aio_thread.h 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. // Copyright (c) Microsoft Corporation.
  2. // SPDX-License-Identifier: Apache-2.0
  3. // DeepSpeed Team
  4. /*
  5. Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
  6. */
  7. #include <condition_variable>
  8. #include <memory>
  9. #include <queue>
  10. #include "deepspeed_py_aio.h"
  11. struct io_op_desc_t {
  12. const bool _read_op;
  13. torch::Tensor _buffer;
  14. int _fd;
  15. const std::string _filename;
  16. const long long int _num_bytes;
  17. torch::Tensor _cpu_buffer;
  18. torch::Tensor _contiguous_buffer;
  19. const bool _validate;
  20. io_op_desc_t(const bool read_op,
  21. const torch::Tensor& buffer,
  22. const int fd,
  23. const char* filename,
  24. const long long int num_bytes,
  25. const bool validate);
  26. char* data_ptr() const;
  27. void fini();
  28. };
  29. struct thread_sync_t {
  30. std::mutex _mutex;
  31. std::condition_variable _cond_var;
  32. };
  33. struct deepspeed_aio_thread_t {
  34. const int _tid;
  35. deepspeed_aio_config_t& _aio_config;
  36. std::unique_ptr<struct aio_context> _aio_ctxt;
  37. std::queue<std::shared_ptr<struct io_op_desc_t>> _work_queue;
  38. std::queue<std::shared_ptr<struct io_op_desc_t>> _complete_queue;
  39. bool _time_to_exit;
  40. struct thread_sync_t _work_sync;
  41. struct thread_sync_t _complete_sync;
  42. deepspeed_aio_thread_t(const int tid, deepspeed_aio_config_t& aio_config);
  43. ~deepspeed_aio_thread_t();
  44. void run();
  45. };