deepspeed_py_aio.cpp 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. // Copyright (c) Microsoft Corporation.
  2. // SPDX-License-Identifier: Apache-2.0
  3. // DeepSpeed Team
  4. /*
  5. Copyright 2020 The Microsoft DeepSpeed Team
  6. Licensed under the MIT license.
  7. Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
  8. */
  9. #include <assert.h>
  10. #include <stdlib.h>
  11. #include <string.h>
  12. #include <fcntl.h>
  13. #include <sys/mman.h>
  14. #include <sys/stat.h>
  15. #include <sys/types.h>
  16. #include <unistd.h>
  17. #include <cassert>
  18. #include <chrono>
  19. #include <cstring>
  20. #include <fstream>
  21. #include <iostream>
  22. #include <memory>
  23. #include <string>
  24. #include <vector>
  25. #include "deepspeed_py_aio.h"
  26. using namespace std;
  27. using namespace std::chrono;
  28. #define DEBUG_DS_AIO_READ 0
  29. #define DEBUG_DS_AIO_WRITE 0
  30. static const std::string c_library_name = "deepspeed_aio";
  31. int deepspeed_py_aio_write(const torch::Tensor& buffer,
  32. const char* filename,
  33. const int block_size,
  34. const int queue_depth,
  35. const bool single_submit,
  36. const bool overlap_events,
  37. const bool validate)
  38. {
  39. const auto start_time = std::chrono::high_resolution_clock::now();
  40. deepspeed_aio_config_t config(block_size, queue_depth, single_submit, overlap_events, false);
  41. const auto fd = open_file(filename, false);
  42. if (fd == -1) { return -1; }
  43. auto write_buffer = (char*)buffer.data_ptr();
  44. const auto num_write_bytes = static_cast<long long int>(buffer.nbytes());
  45. std::unique_ptr<io_xfer_ctxt> xfer_ctxt(new io_xfer_ctxt(fd, 0, num_write_bytes, write_buffer));
  46. std::unique_ptr<aio_context> aio_ctxt(new aio_context(config._block_size, config._queue_depth));
  47. if (config._overlap_events) {
  48. do_aio_operation_overlap(false, aio_ctxt, xfer_ctxt, &config, nullptr);
  49. } else {
  50. do_aio_operation_sequential(false, aio_ctxt, xfer_ctxt, &config, nullptr);
  51. }
  52. const std::chrono::duration<double> aio_time =
  53. std::chrono::high_resolution_clock::now() - start_time;
  54. close(fd);
  55. if (validate) { validate_aio_operation(false, filename, write_buffer, num_write_bytes); }
  56. const std::chrono::duration<double> fn_time =
  57. std::chrono::high_resolution_clock::now() - start_time;
  58. std::cout << "Elapsed time(usec): "
  59. << "aio = " << aio_time.count() * 1e6 << " call = " << fn_time.count() * 1e6
  60. << std::endl;
  61. return 0;
  62. }
  63. int deepspeed_py_aio_read(torch::Tensor& buffer,
  64. const char* filename,
  65. const int block_size,
  66. const int queue_depth,
  67. const bool single_submit,
  68. const bool overlap_events,
  69. const bool validate)
  70. {
  71. const auto start_time = std::chrono::high_resolution_clock::now();
  72. long long num_file_bytes;
  73. if (-1 == get_file_size(filename, num_file_bytes)) {
  74. const auto error_code = errno;
  75. report_file_error(filename, " fstat for read", error_code);
  76. return -1;
  77. }
  78. deepspeed_aio_config_t config(block_size, queue_depth, single_submit, overlap_events, false);
  79. const auto fd = open_file(filename, true);
  80. if (fd == -1) { return -1; }
  81. auto read_buffer = (char*)buffer.data_ptr();
  82. assert(static_cast<long long int>(buffer.nbytes()) == num_file_bytes);
  83. std::unique_ptr<io_xfer_ctxt> xfer_ctxt(new io_xfer_ctxt(fd, 0, num_file_bytes, read_buffer));
  84. std::unique_ptr<aio_context> aio_ctxt(new aio_context(config._block_size, config._queue_depth));
  85. if (config._overlap_events) {
  86. do_aio_operation_overlap(true, aio_ctxt, xfer_ctxt, &config, nullptr);
  87. } else {
  88. do_aio_operation_sequential(true, aio_ctxt, xfer_ctxt, &config, nullptr);
  89. }
  90. const std::chrono::duration<double> aio_time =
  91. std::chrono::high_resolution_clock::now() - start_time;
  92. close(fd);
  93. if (validate) { validate_aio_operation(true, filename, read_buffer, num_file_bytes); }
  94. const std::chrono::duration<double> fn_time =
  95. std::chrono::high_resolution_clock::now() - start_time;
  96. std::cout << "Elapsed time(usec): "
  97. << "aio = " << aio_time.count() * 1e6 << " call = " << fn_time.count() * 1e6
  98. << std::endl;
  99. return 0;
  100. }