ds_kernel_utils.h 1.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. /*
  2. Copyright 2022 The Microsoft DeepSpeed Team
  3. Centralized header file for preprocessor macros and constants
  4. used throughout the codebase.
  5. */
  6. #pragma once
  7. #include <cuda.h>
  8. #define DS_HD_INLINE __host__ __device__ __forceinline__
  9. #define DS_D_INLINE __device__ __forceinline__
  10. #ifdef __HIP_PLATFORM_HCC__
  11. // constexpr variant of warpSize for templating
  12. constexpr int hw_warp_size = 64;
  13. #define HALF_PRECISION_AVAILABLE = 1
  14. #include <hip/hip_cooperative_groups.h>
  15. #else // !__HIP_PLATFORM_HCC__
  16. // constexpr variant of warpSize for templating
  17. constexpr int hw_warp_size = 32;
  18. #if __CUDA_ARCH__ >= 530
  19. #define HALF_PRECISION_AVAILABLE = 1
  20. #define PTX_AVAILABLE
  21. #endif // __CUDA_ARCH__ >= 530
  22. #if __CUDA_ARCH__ >= 800
  23. #define ASYNC_COPY_AVAILABLE
  24. #define BF16_AVAILABLE
  25. #endif // __CUDA_ARCH__ >= 800
  26. #include <cooperative_groups.h>
  27. #endif //__HIP_PLATFORM_HCC__
  28. inline int next_pow2(const int val)
  29. {
  30. int rounded_val = val - 1;
  31. rounded_val |= rounded_val >> 1;
  32. rounded_val |= rounded_val >> 2;
  33. rounded_val |= rounded_val >> 4;
  34. rounded_val |= rounded_val >> 8;
  35. return rounded_val + 1;
  36. }