ds_kernel_utils.h 1.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. // Copyright (c) Microsoft Corporation.
  2. // SPDX-License-Identifier: Apache-2.0
  3. // DeepSpeed Team
  4. /*
  5. Centralized header file for preprocessor macros and constants
  6. used throughout the codebase.
  7. */
  8. #pragma once
  9. #include <cuda.h>
  10. #define DS_HD_INLINE __host__ __device__ __forceinline__
  11. #define DS_D_INLINE __device__ __forceinline__
  12. #ifdef __HIP_PLATFORM_HCC__
  13. // constexpr variant of warpSize for templating
  14. constexpr int hw_warp_size = 64;
  15. #define HALF_PRECISION_AVAILABLE = 1
  16. #include <hip/hip_cooperative_groups.h>
  17. #else // !__HIP_PLATFORM_HCC__
  18. // constexpr variant of warpSize for templating
  19. constexpr int hw_warp_size = 32;
  20. #if __CUDA_ARCH__ >= 530
  21. #define HALF_PRECISION_AVAILABLE = 1
  22. #define PTX_AVAILABLE
  23. #endif // __CUDA_ARCH__ >= 530
  24. #if __CUDA_ARCH__ >= 800
  25. #define ASYNC_COPY_AVAILABLE
  26. #endif // __CUDA_ARCH__ >= 800
  27. #include <cooperative_groups.h>
  28. #endif //__HIP_PLATFORM_HCC__
  29. inline int next_pow2(const int val)
  30. {
  31. int rounded_val = val - 1;
  32. rounded_val |= rounded_val >> 1;
  33. rounded_val |= rounded_val >> 2;
  34. rounded_val |= rounded_val >> 4;
  35. rounded_val |= rounded_val >> 8;
  36. return rounded_val + 1;
  37. }