OMPGridValues.h 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. ///
  14. /// \file
  15. /// \brief Provides definitions for Target specific Grid Values
  16. ///
  17. //===----------------------------------------------------------------------===//
  18. #ifndef LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H
  19. #define LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H
  20. namespace llvm {
  21. namespace omp {
  22. /// \brief Defines various target-specific GPU grid values that must be
  23. /// consistent between host RTL (plugin), device RTL, and clang.
  24. /// We can change grid values for a "fat" binary so that different
  25. /// passes get the correct values when generating code for a
  26. /// multi-target binary. Both amdgcn and nvptx values are stored in
  27. /// this file. In the future, should there be differences between GPUs
  28. /// of the same architecture, then simply make a different array and
  29. /// use the new array name.
  30. ///
  31. /// Example usage in clang:
  32. /// const unsigned slot_size =
  33. /// ctx.GetTargetInfo().getGridValue().GV_Warp_Size;
  34. ///
  35. /// Example usage in libomptarget/deviceRTLs:
  36. /// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
  37. /// #ifdef __AMDGPU__
  38. /// #define GRIDVAL AMDGPUGridValues
  39. /// #else
  40. /// #define GRIDVAL NVPTXGridValues
  41. /// #endif
  42. /// ... Then use this reference for GV_Warp_Size in the deviceRTL source.
  43. /// llvm::omp::GRIDVAL().GV_Warp_Size
  44. ///
  45. /// Example usage in libomptarget hsa plugin:
  46. /// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
  47. /// #define GRIDVAL AMDGPUGridValues
  48. /// ... Then use this reference to access GV_Warp_Size in the hsa plugin.
  49. /// llvm::omp::GRIDVAL().GV_Warp_Size
  50. ///
  51. /// Example usage in libomptarget cuda plugin:
  52. /// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
  53. /// #define GRIDVAL NVPTXGridValues
  54. /// ... Then use this reference to access GV_Warp_Size in the cuda plugin.
  55. /// llvm::omp::GRIDVAL().GV_Warp_Size
  56. ///
  57. struct GV {
  58. /// The size reserved for data in a shared memory slot.
  59. unsigned GV_Slot_Size;
  60. /// The default value of maximum number of threads in a worker warp.
  61. unsigned GV_Warp_Size;
  62. constexpr unsigned warpSlotSize() const {
  63. return GV_Warp_Size * GV_Slot_Size;
  64. }
  65. /// the maximum number of teams.
  66. unsigned GV_Max_Teams;
  67. // The default number of teams in the absence of any other information.
  68. unsigned GV_Default_Num_Teams;
  69. // An alternative to the heavy data sharing infrastructure that uses global
  70. // memory is one that uses device __shared__ memory. The amount of such space
  71. // (in bytes) reserved by the OpenMP runtime is noted here.
  72. unsigned GV_SimpleBufferSize;
  73. // The absolute maximum team size for a working group
  74. unsigned GV_Max_WG_Size;
  75. // The default maximum team size for a working group
  76. unsigned GV_Default_WG_Size;
  77. constexpr unsigned maxWarpNumber() const {
  78. return GV_Max_WG_Size / GV_Warp_Size;
  79. }
  80. };
  81. /// For AMDGPU GPUs
  82. static constexpr GV AMDGPUGridValues64 = {
  83. 256, // GV_Slot_Size
  84. 64, // GV_Warp_Size
  85. (1 << 16), // GV_Max_Teams
  86. 440, // GV_Default_Num_Teams
  87. 896, // GV_SimpleBufferSize
  88. 1024, // GV_Max_WG_Size,
  89. 256, // GV_Default_WG_Size
  90. };
  91. static constexpr GV AMDGPUGridValues32 = {
  92. 256, // GV_Slot_Size
  93. 32, // GV_Warp_Size
  94. (1 << 16), // GV_Max_Teams
  95. 440, // GV_Default_Num_Teams
  96. 896, // GV_SimpleBufferSize
  97. 1024, // GV_Max_WG_Size,
  98. 256, // GV_Default_WG_Size
  99. };
  100. template <unsigned wavesize> constexpr const GV &getAMDGPUGridValues() {
  101. static_assert(wavesize == 32 || wavesize == 64, "Unexpected wavesize");
  102. return wavesize == 32 ? AMDGPUGridValues32 : AMDGPUGridValues64;
  103. }
  104. /// For Nvidia GPUs
  105. static constexpr GV NVPTXGridValues = {
  106. 256, // GV_Slot_Size
  107. 32, // GV_Warp_Size
  108. (1 << 16), // GV_Max_Teams
  109. 3200, // GV_Default_Num_Teams
  110. 896, // GV_SimpleBufferSize
  111. 1024, // GV_Max_WG_Size
  112. 128, // GV_Default_WG_Size
  113. };
  114. } // namespace omp
  115. } // namespace llvm
  116. #endif // LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H
  117. #ifdef __GNUC__
  118. #pragma GCC diagnostic pop
  119. #endif