fp_trunc_impl.inc 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. //= lib/fp_trunc_impl.inc - high precision -> low precision conversion *-*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements a fairly generic conversion from a wider to a narrower
  10. // IEEE-754 floating-point type in the default (round to nearest, ties to even)
  11. // rounding mode. The constants and types defined following the includes below
  12. // parameterize the conversion.
  13. //
  14. // This routine can be trivially adapted to support conversions to
  15. // half-precision or from quad-precision. It does not support types that don't
  16. // use the usual IEEE-754 interchange formats; specifically, some work would be
  17. // needed to adapt it to (for example) the Intel 80-bit format or PowerPC
  18. // double-double format.
  19. //
  20. // Note please, however, that this implementation is only intended to support
  21. // *narrowing* operations; if you need to convert to a *wider* floating-point
  22. // type (e.g. float -> double), then this routine will not do what you want it
  23. // to.
  24. //
  25. // It also requires that integer types at least as large as both formats
  26. // are available on the target platform; this may pose a problem when trying
  27. // to add support for quad on some 32-bit systems, for example.
  28. //
  29. // Finally, the following assumptions are made:
  30. //
  31. // 1. Floating-point types and integer types have the same endianness on the
  32. // target platform.
  33. //
  34. // 2. Quiet NaNs, if supported, are indicated by the leading bit of the
  35. // significand field being set.
  36. //
  37. //===----------------------------------------------------------------------===//
  38. #include "fp_trunc.h"
  39. // The destination type may use a usual IEEE-754 interchange format or Intel
  40. // 80-bit format. In particular, for the destination type dstSigFracBits may be
  41. // not equal to dstSigBits. The source type is assumed to be one of IEEE-754
  42. // standard types.
  43. static __inline dst_t __truncXfYf2__(src_t a) {
  44. // Various constants whose values follow from the type parameters.
  45. // Any reasonable optimizer will fold and propagate all of these.
  46. const int srcInfExp = (1 << srcExpBits) - 1;
  47. const int srcExpBias = srcInfExp >> 1;
  48. const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigFracBits;
  49. const src_rep_t roundMask =
  50. (SRC_REP_C(1) << (srcSigFracBits - dstSigFracBits)) - 1;
  51. const src_rep_t halfway = SRC_REP_C(1)
  52. << (srcSigFracBits - dstSigFracBits - 1);
  53. const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigFracBits - 1);
  54. const src_rep_t srcNaNCode = srcQNaN - 1;
  55. const int dstInfExp = (1 << dstExpBits) - 1;
  56. const int dstExpBias = dstInfExp >> 1;
  57. const int overflowExponent = srcExpBias + dstInfExp - dstExpBias;
  58. const dst_rep_t dstQNaN = DST_REP_C(1) << (dstSigFracBits - 1);
  59. const dst_rep_t dstNaNCode = dstQNaN - 1;
  60. const src_rep_t aRep = srcToRep(a);
  61. const src_rep_t srcSign = extract_sign_from_src(aRep);
  62. const src_rep_t srcExp = extract_exp_from_src(aRep);
  63. const src_rep_t srcSigFrac = extract_sig_frac_from_src(aRep);
  64. dst_rep_t dstSign = srcSign;
  65. dst_rep_t dstExp;
  66. dst_rep_t dstSigFrac;
  67. // Same size exponents and a's significand tail is 0.
  68. // The significand can be truncated and the exponent can be copied over.
  69. const int sigFracTailBits = srcSigFracBits - dstSigFracBits;
  70. if (srcExpBits == dstExpBits &&
  71. ((aRep >> sigFracTailBits) << sigFracTailBits) == aRep) {
  72. dstExp = srcExp;
  73. dstSigFrac = (dst_rep_t)(srcSigFrac >> sigFracTailBits);
  74. return dstFromRep(construct_dst_rep(dstSign, dstExp, dstSigFrac));
  75. }
  76. const int dstExpCandidate = ((int)srcExp - srcExpBias) + dstExpBias;
  77. if (dstExpCandidate >= 1 && dstExpCandidate < dstInfExp) {
  78. // The exponent of a is within the range of normal numbers in the
  79. // destination format. We can convert by simply right-shifting with
  80. // rounding and adjusting the exponent.
  81. dstExp = dstExpCandidate;
  82. dstSigFrac = (dst_rep_t)(srcSigFrac >> sigFracTailBits);
  83. const src_rep_t roundBits = srcSigFrac & roundMask;
  84. // Round to nearest.
  85. if (roundBits > halfway)
  86. dstSigFrac++;
  87. // Tie to even.
  88. else if (roundBits == halfway)
  89. dstSigFrac += dstSigFrac & 1;
  90. // Rounding has changed the exponent.
  91. if (dstSigFrac >= (DST_REP_C(1) << dstSigFracBits)) {
  92. dstExp += 1;
  93. dstSigFrac ^= (DST_REP_C(1) << dstSigFracBits);
  94. }
  95. } else if (srcExp == srcInfExp && srcSigFrac) {
  96. // a is NaN.
  97. // Conjure the result by beginning with infinity, setting the qNaN
  98. // bit and inserting the (truncated) trailing NaN field.
  99. dstExp = dstInfExp;
  100. dstSigFrac = dstQNaN;
  101. dstSigFrac |= ((srcSigFrac & srcNaNCode) >> sigFracTailBits) & dstNaNCode;
  102. } else if ((int)srcExp >= overflowExponent) {
  103. dstExp = dstInfExp;
  104. dstSigFrac = 0;
  105. } else {
  106. // a underflows on conversion to the destination type or is an exact
  107. // zero. The result may be a denormal or zero. Extract the exponent
  108. // to get the shift amount for the denormalization.
  109. src_rep_t significand = srcSigFrac;
  110. int shift = srcExpBias - dstExpBias - srcExp;
  111. if (srcExp) {
  112. // Set the implicit integer bit if the source is a normal number.
  113. significand |= srcMinNormal;
  114. shift += 1;
  115. }
  116. // Right shift by the denormalization amount with sticky.
  117. if (shift > srcSigFracBits) {
  118. dstExp = 0;
  119. dstSigFrac = 0;
  120. } else {
  121. dstExp = 0;
  122. const bool sticky = shift && ((significand << (srcBits - shift)) != 0);
  123. src_rep_t denormalizedSignificand = significand >> shift | sticky;
  124. dstSigFrac = denormalizedSignificand >> sigFracTailBits;
  125. const src_rep_t roundBits = denormalizedSignificand & roundMask;
  126. // Round to nearest
  127. if (roundBits > halfway)
  128. dstSigFrac++;
  129. // Ties to even
  130. else if (roundBits == halfway)
  131. dstSigFrac += dstSigFrac & 1;
  132. // Rounding has changed the exponent.
  133. if (dstSigFrac >= (DST_REP_C(1) << dstSigFracBits)) {
  134. dstExp += 1;
  135. dstSigFrac ^= (DST_REP_C(1) << dstSigFracBits);
  136. }
  137. }
  138. }
  139. return dstFromRep(construct_dst_rep(dstSign, dstExp, dstSigFrac));
  140. }