OMPGridValues.h 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. ///
  14. /// \file
  15. /// \brief Provides definitions for Target specific Grid Values
  16. ///
  17. //===----------------------------------------------------------------------===//
  18. #ifndef LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H
  19. #define LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H
  20. namespace llvm {
  21. namespace omp {
  22. /// \brief Defines various target-specific GPU grid values that must be
  23. /// consistent between host RTL (plugin), device RTL, and clang.
  24. /// We can change grid values for a "fat" binary so that different
  25. /// passes get the correct values when generating code for a
  26. /// multi-target binary. Both amdgcn and nvptx values are stored in
  27. /// this file. In the future, should there be differences between GPUs
  28. /// of the same architecture, then simply make a different array and
  29. /// use the new array name.
  30. ///
  31. /// Example usage in clang:
  32. /// const unsigned slot_size =
  33. /// ctx.GetTargetInfo().getGridValue().GV_Warp_Size;
  34. ///
  35. /// Example usage in libomptarget/deviceRTLs:
  36. /// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
  37. /// #ifdef __AMDGPU__
  38. /// #define GRIDVAL AMDGPUGridValues
  39. /// #else
  40. /// #define GRIDVAL NVPTXGridValues
  41. /// #endif
  42. /// ... Then use this reference for GV_Warp_Size in the deviceRTL source.
  43. /// llvm::omp::GRIDVAL().GV_Warp_Size
  44. ///
  45. /// Example usage in libomptarget hsa plugin:
  46. /// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
  47. /// #define GRIDVAL AMDGPUGridValues
  48. /// ... Then use this reference to access GV_Warp_Size in the hsa plugin.
  49. /// llvm::omp::GRIDVAL().GV_Warp_Size
  50. ///
  51. /// Example usage in libomptarget cuda plugin:
  52. /// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
  53. /// #define GRIDVAL NVPTXGridValues
  54. /// ... Then use this reference to access GV_Warp_Size in the cuda plugin.
  55. /// llvm::omp::GRIDVAL().GV_Warp_Size
  56. ///
  57. struct GV {
  58. /// The size reserved for data in a shared memory slot.
  59. const unsigned GV_Slot_Size;
  60. /// The default value of maximum number of threads in a worker warp.
  61. const unsigned GV_Warp_Size;
  62. constexpr unsigned warpSlotSize() const {
  63. return GV_Warp_Size * GV_Slot_Size;
  64. }
  65. /// the maximum number of teams.
  66. const unsigned GV_Max_Teams;
  67. // An alternative to the heavy data sharing infrastructure that uses global
  68. // memory is one that uses device __shared__ memory. The amount of such space
  69. // (in bytes) reserved by the OpenMP runtime is noted here.
  70. const unsigned GV_SimpleBufferSize;
  71. // The absolute maximum team size for a working group
  72. const unsigned GV_Max_WG_Size;
  73. // The default maximum team size for a working group
  74. const unsigned GV_Default_WG_Size;
  75. constexpr unsigned maxWarpNumber() const {
  76. return GV_Max_WG_Size / GV_Warp_Size;
  77. }
  78. };
  79. /// For AMDGPU GPUs
  80. static constexpr GV AMDGPUGridValues64 = {
  81. 256, // GV_Slot_Size
  82. 64, // GV_Warp_Size
  83. 128, // GV_Max_Teams
  84. 896, // GV_SimpleBufferSize
  85. 1024, // GV_Max_WG_Size,
  86. 256, // GV_Default_WG_Size
  87. };
  88. static constexpr GV AMDGPUGridValues32 = {
  89. 256, // GV_Slot_Size
  90. 32, // GV_Warp_Size
  91. 128, // GV_Max_Teams
  92. 896, // GV_SimpleBufferSize
  93. 1024, // GV_Max_WG_Size,
  94. 256, // GV_Default_WG_Size
  95. };
  96. template <unsigned wavesize> constexpr const GV &getAMDGPUGridValues() {
  97. static_assert(wavesize == 32 || wavesize == 64, "");
  98. return wavesize == 32 ? AMDGPUGridValues32 : AMDGPUGridValues64;
  99. }
  100. /// For Nvidia GPUs
  101. static constexpr GV NVPTXGridValues = {
  102. 256, // GV_Slot_Size
  103. 32, // GV_Warp_Size
  104. 1024, // GV_Max_Teams
  105. 896, // GV_SimpleBufferSize
  106. 1024, // GV_Max_WG_Size
  107. 128, // GV_Default_WG_Size
  108. };
  109. } // namespace omp
  110. } // namespace llvm
  111. #endif // LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H
  112. #ifdef __GNUC__
  113. #pragma GCC diagnostic pop
  114. #endif