deepspeed_py_aio_handle.h 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. // Copyright (c) Microsoft Corporation.
  2. // SPDX-License-Identifier: Apache-2.0
  3. // DeepSpeed Team
  4. /*
  5. Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
  6. */
  7. #include <condition_variable>
  8. #include <memory>
  9. #include "deepspeed_aio_thread.h"
  10. #include "deepspeed_pin_tensor.h"
  11. struct deepspeed_aio_handle_t {
  12. std::unique_ptr<struct aio_context> _aio_ctxt;
  13. const bool _single_submit;
  14. const bool _overlap_events;
  15. const int _num_threads;
  16. deepspeed_aio_config_t _aio_config;
  17. std::vector<std::shared_ptr<struct deepspeed_aio_thread_t>> _thread_contexts;
  18. std::vector<std::thread> _threads;
  19. int _num_pending_ops;
  20. std::unique_ptr<struct deepspeed_pin_tensor_t> _pinned_tensor_mgr;
  21. deepspeed_aio_handle_t(const int block_size,
  22. const int queue_depth,
  23. const bool single_submit,
  24. const bool overlap_events,
  25. const int num_threads);
  26. ~deepspeed_aio_handle_t();
  27. const int get_block_size() const;
  28. const int get_queue_depth() const;
  29. const bool get_single_submit() const;
  30. const bool get_overlap_events() const;
  31. const int get_thread_count() const;
  32. int read(torch::Tensor& buffer, const char* filename, const bool validate);
  33. int write(const torch::Tensor& buffer, const char* filename, const bool validate);
  34. int pread(const torch::Tensor& buffer,
  35. const char* filename,
  36. const bool validate,
  37. const bool async);
  38. int pwrite(const torch::Tensor& buffer,
  39. const char* filename,
  40. const bool validate,
  41. const bool async);
  42. int sync_pread(torch::Tensor& buffer, const char* filename);
  43. int sync_pwrite(const torch::Tensor& buffer, const char* filename);
  44. int async_pread(torch::Tensor& buffer, const char* filename);
  45. int async_pwrite(const torch::Tensor& buffer, const char* filename);
  46. // TODO: Make API's args to be shape and dtype.
  47. torch::Tensor new_cpu_locked_tensor(const size_t num_elem, const torch::Tensor& example_tensor);
  48. bool free_cpu_locked_tensor(torch::Tensor&);
  49. int wait();
  50. void _stop_threads();
  51. void _schedule_aio_work(std::shared_ptr<struct io_op_desc_t> scheduled_op);
  52. std::shared_ptr<struct io_op_desc_t> _wait_for_aio_work();
  53. bool _is_valid_parallel_aio_op(const bool read_op, const long long int num_bytes);
  54. };