spatial_cuda_layers.h 915 B

12345678910111213141516171819202122232425262728293031323334353637
  1. // Copyright (c) Microsoft Corporation.
  2. // SPDX-License-Identifier: Apache-2.0
  3. // DeepSpeed Team
  4. #pragma once
  5. #if __CUDA_ARCH__ >= 530
  6. #define HALF_PRECISION_AVAILABLE = 1
  7. #endif
  8. #ifdef __HIP_PLATFORM_AMD__
  9. #include <hip/hip_cooperative_groups.h>
  10. #else
  11. #include <cooperative_groups.h>
  12. #endif
  13. #include <cuda.h>
  14. #include <cuda_fp16.h>
  15. /*********** Group Norm Kernels, Structs, and Helpers ************/
  16. struct {
  17. int64_t batch_size;
  18. int64_t seq_len;
  19. int64_t channels;
  20. } typedef ChannelsLastProblem;
  21. void launch_opt_bias_add(__half* result,
  22. const __half* activation,
  23. const __half* bias,
  24. const __half* other,
  25. const __half* other_bias,
  26. int batch_size,
  27. int seq_len,
  28. int channels,
  29. cudaStream_t stream);