deepspeed_py_aio.cpp 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. /*
  2. Copyright 2020 The Microsoft DeepSpeed Team
  3. Licensed under the MIT license.
  4. Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
  5. */
  6. #include <assert.h>
  7. #include <stdlib.h>
  8. #include <string.h>
  9. #include <fcntl.h>
  10. #include <sys/mman.h>
  11. #include <sys/stat.h>
  12. #include <sys/types.h>
  13. #include <unistd.h>
  14. #include <cassert>
  15. #include <chrono>
  16. #include <cstring>
  17. #include <fstream>
  18. #include <iostream>
  19. #include <memory>
  20. #include <string>
  21. #include <vector>
  22. #include "deepspeed_py_aio.h"
  23. using namespace std;
  24. using namespace std::chrono;
  25. #define DEBUG_DS_AIO_READ 0
  26. #define DEBUG_DS_AIO_WRITE 0
  27. static const std::string c_library_name = "deepspeed_aio";
  28. int deepspeed_py_aio_write(const torch::Tensor& buffer,
  29. const char* filename,
  30. const int block_size,
  31. const int queue_depth,
  32. const bool single_submit,
  33. const bool overlap_events,
  34. const bool validate)
  35. {
  36. const auto start_time = std::chrono::high_resolution_clock::now();
  37. deepspeed_aio_config_t config(block_size, queue_depth, single_submit, overlap_events, false);
  38. const auto fd = open_file(filename, false);
  39. if (fd == -1) { return -1; }
  40. auto write_buffer = (char*)buffer.data_ptr();
  41. const auto num_write_bytes = static_cast<long long int>(buffer.nbytes());
  42. std::unique_ptr<io_xfer_ctxt> xfer_ctxt(new io_xfer_ctxt(fd, 0, num_write_bytes, write_buffer));
  43. std::unique_ptr<aio_context> aio_ctxt(new aio_context(config._block_size, config._queue_depth));
  44. if (config._overlap_events) {
  45. do_aio_operation_overlap(false, aio_ctxt, xfer_ctxt, &config, nullptr);
  46. } else {
  47. do_aio_operation_sequential(false, aio_ctxt, xfer_ctxt, &config, nullptr);
  48. }
  49. const std::chrono::duration<double> aio_time =
  50. std::chrono::high_resolution_clock::now() - start_time;
  51. close(fd);
  52. if (validate) { validate_aio_operation(false, filename, write_buffer, num_write_bytes); }
  53. const std::chrono::duration<double> fn_time =
  54. std::chrono::high_resolution_clock::now() - start_time;
  55. std::cout << "Elapsed time(usec): "
  56. << "aio = " << aio_time.count() * 1e6 << " call = " << fn_time.count() * 1e6
  57. << std::endl;
  58. return 0;
  59. }
  60. int deepspeed_py_aio_read(torch::Tensor& buffer,
  61. const char* filename,
  62. const int block_size,
  63. const int queue_depth,
  64. const bool single_submit,
  65. const bool overlap_events,
  66. const bool validate)
  67. {
  68. const auto start_time = std::chrono::high_resolution_clock::now();
  69. long long num_file_bytes;
  70. if (-1 == get_file_size(filename, num_file_bytes)) {
  71. const auto error_code = errno;
  72. report_file_error(filename, " fstat for read", error_code);
  73. return -1;
  74. }
  75. deepspeed_aio_config_t config(block_size, queue_depth, single_submit, overlap_events, false);
  76. const auto fd = open_file(filename, true);
  77. if (fd == -1) { return -1; }
  78. auto read_buffer = (char*)buffer.data_ptr();
  79. assert(static_cast<long long int>(buffer.nbytes()) == num_file_bytes);
  80. std::unique_ptr<io_xfer_ctxt> xfer_ctxt(new io_xfer_ctxt(fd, 0, num_file_bytes, read_buffer));
  81. std::unique_ptr<aio_context> aio_ctxt(new aio_context(config._block_size, config._queue_depth));
  82. if (config._overlap_events) {
  83. do_aio_operation_overlap(true, aio_ctxt, xfer_ctxt, &config, nullptr);
  84. } else {
  85. do_aio_operation_sequential(true, aio_ctxt, xfer_ctxt, &config, nullptr);
  86. }
  87. const std::chrono::duration<double> aio_time =
  88. std::chrono::high_resolution_clock::now() - start_time;
  89. close(fd);
  90. if (validate) { validate_aio_operation(true, filename, read_buffer, num_file_bytes); }
  91. const std::chrono::duration<double> fn_time =
  92. std::chrono::high_resolution_clock::now() - start_time;
  93. std::cout << "Elapsed time(usec): "
  94. << "aio = " << aio_time.count() * 1e6 << " call = " << fn_time.count() * 1e6
  95. << std::endl;
  96. return 0;
  97. }