fp_trunc.h 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. //=== lib/fp_trunc.h - high precision -> low precision conversion *- C -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // Set source and destination precision setting
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #ifndef FP_TRUNC_HEADER
  13. #define FP_TRUNC_HEADER
  14. #include "int_lib.h"
  15. #if defined SRC_SINGLE
  16. typedef float src_t;
  17. typedef uint32_t src_rep_t;
  18. #define SRC_REP_C UINT32_C
  19. static const int srcBits = sizeof(src_t) * CHAR_BIT;
  20. static const int srcSigFracBits = 23;
  21. // -1 accounts for the sign bit.
  22. // srcBits - srcSigFracBits - 1
  23. static const int srcExpBits = 8;
  24. #elif defined SRC_DOUBLE
  25. typedef double src_t;
  26. typedef uint64_t src_rep_t;
  27. #define SRC_REP_C UINT64_C
  28. static const int srcBits = sizeof(src_t) * CHAR_BIT;
  29. static const int srcSigFracBits = 52;
  30. // -1 accounts for the sign bit.
  31. // srcBits - srcSigFracBits - 1
  32. static const int srcExpBits = 11;
  33. #elif defined SRC_QUAD
  34. typedef tf_float src_t;
  35. typedef __uint128_t src_rep_t;
  36. #define SRC_REP_C (__uint128_t)
  37. static const int srcBits = sizeof(src_t) * CHAR_BIT;
  38. static const int srcSigFracBits = 112;
  39. // -1 accounts for the sign bit.
  40. // srcBits - srcSigFracBits - 1
  41. static const int srcExpBits = 15;
  42. #else
  43. #error Source should be double precision or quad precision!
  44. #endif // end source precision
  45. #if defined DST_DOUBLE
  46. typedef double dst_t;
  47. typedef uint64_t dst_rep_t;
  48. #define DST_REP_C UINT64_C
  49. static const int dstBits = sizeof(dst_t) * CHAR_BIT;
  50. static const int dstSigFracBits = 52;
  51. // -1 accounts for the sign bit.
  52. // dstBits - dstSigFracBits - 1
  53. static const int dstExpBits = 11;
  54. #elif defined DST_80
  55. typedef xf_float dst_t;
  56. typedef __uint128_t dst_rep_t;
  57. #define DST_REP_C (__uint128_t)
  58. static const int dstBits = 80;
  59. static const int dstSigFracBits = 63;
  60. // -1 accounts for the sign bit.
  61. // -1 accounts for the explicitly stored integer bit.
  62. // dstBits - dstSigFracBits - 1 - 1
  63. static const int dstExpBits = 15;
  64. #elif defined DST_SINGLE
  65. typedef float dst_t;
  66. typedef uint32_t dst_rep_t;
  67. #define DST_REP_C UINT32_C
  68. static const int dstBits = sizeof(dst_t) * CHAR_BIT;
  69. static const int dstSigFracBits = 23;
  70. // -1 accounts for the sign bit.
  71. // dstBits - dstSigFracBits - 1
  72. static const int dstExpBits = 8;
  73. #elif defined DST_HALF
  74. #ifdef COMPILER_RT_HAS_FLOAT16
  75. typedef _Float16 dst_t;
  76. #else
  77. typedef uint16_t dst_t;
  78. #endif
  79. typedef uint16_t dst_rep_t;
  80. #define DST_REP_C UINT16_C
  81. static const int dstBits = sizeof(dst_t) * CHAR_BIT;
  82. static const int dstSigFracBits = 10;
  83. // -1 accounts for the sign bit.
  84. // dstBits - dstSigFracBits - 1
  85. static const int dstExpBits = 5;
  86. #elif defined DST_BFLOAT
  87. typedef __bf16 dst_t;
  88. typedef uint16_t dst_rep_t;
  89. #define DST_REP_C UINT16_C
  90. static const int dstBits = sizeof(dst_t) * CHAR_BIT;
  91. static const int dstSigFracBits = 7;
  92. // -1 accounts for the sign bit.
  93. // dstBits - dstSigFracBits - 1
  94. static const int dstExpBits = 8;
  95. #else
  96. #error Destination should be single precision or double precision!
  97. #endif // end destination precision
  98. // TODO: These helper routines should be placed into fp_lib.h
  99. // Currently they depend on macros/constants defined above.
  100. static inline src_rep_t extract_sign_from_src(src_rep_t x) {
  101. const src_rep_t srcSignMask = SRC_REP_C(1) << (srcBits - 1);
  102. return (x & srcSignMask) >> (srcBits - 1);
  103. }
  104. static inline src_rep_t extract_exp_from_src(src_rep_t x) {
  105. const int srcSigBits = srcBits - 1 - srcExpBits;
  106. const src_rep_t srcExpMask = ((SRC_REP_C(1) << srcExpBits) - 1) << srcSigBits;
  107. return (x & srcExpMask) >> srcSigBits;
  108. }
  109. static inline src_rep_t extract_sig_frac_from_src(src_rep_t x) {
  110. const src_rep_t srcSigFracMask = (SRC_REP_C(1) << srcSigFracBits) - 1;
  111. return x & srcSigFracMask;
  112. }
  113. static inline dst_rep_t construct_dst_rep(dst_rep_t sign, dst_rep_t exp, dst_rep_t sigFrac) {
  114. dst_rep_t result = (sign << (dstBits - 1)) | (exp << (dstBits - 1 - dstExpBits)) | sigFrac;
  115. // Set the explicit integer bit in F80 if present.
  116. if (dstBits == 80 && exp) {
  117. result |= (DST_REP_C(1) << dstSigFracBits);
  118. }
  119. return result;
  120. }
  121. // End of specialization parameters. Two helper routines for conversion to and
  122. // from the representation of floating-point data as integer values follow.
  123. static inline src_rep_t srcToRep(src_t x) {
  124. const union {
  125. src_t f;
  126. src_rep_t i;
  127. } rep = {.f = x};
  128. return rep.i;
  129. }
  130. static inline dst_t dstFromRep(dst_rep_t x) {
  131. const union {
  132. dst_t f;
  133. dst_rep_t i;
  134. } rep = {.i = x};
  135. return rep.f;
  136. }
  137. #endif // FP_TRUNC_HEADER