APFixedPoint.cpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605
  1. //===- APFixedPoint.cpp - Fixed point constant handling ---------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. /// \file
  10. /// Defines the implementation for the fixed point number interface.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/ADT/APFixedPoint.h"
  14. #include "llvm/ADT/APFloat.h"
  15. #include <cmath>
  16. namespace llvm {
  17. void FixedPointSemantics::print(llvm::raw_ostream &OS) const {
  18. OS << "width=" << getWidth() << ", ";
  19. if (isValidLegacySema())
  20. OS << "scale=" << getScale() << ", ";
  21. OS << "msb=" << getMsbWeight() << ", ";
  22. OS << "lsb=" << getLsbWeight() << ", ";
  23. OS << "IsSigned=" << IsSigned << ", ";
  24. OS << "HasUnsignedPadding=" << HasUnsignedPadding << ", ";
  25. OS << "IsSaturated=" << IsSaturated;
  26. }
  27. APFixedPoint APFixedPoint::convert(const FixedPointSemantics &DstSema,
  28. bool *Overflow) const {
  29. APSInt NewVal = Val;
  30. int RelativeUpscale = getLsbWeight() - DstSema.getLsbWeight();
  31. if (Overflow)
  32. *Overflow = false;
  33. if (RelativeUpscale > 0)
  34. NewVal = NewVal.extend(NewVal.getBitWidth() + RelativeUpscale);
  35. NewVal = NewVal.relativeShl(RelativeUpscale);
  36. auto Mask = APInt::getBitsSetFrom(
  37. NewVal.getBitWidth(),
  38. std::min(DstSema.getIntegralBits() - DstSema.getLsbWeight(),
  39. NewVal.getBitWidth()));
  40. APInt Masked(NewVal & Mask);
  41. // Change in the bits above the sign
  42. if (!(Masked == Mask || Masked == 0)) {
  43. // Found overflow in the bits above the sign
  44. if (DstSema.isSaturated())
  45. NewVal = NewVal.isNegative() ? Mask : ~Mask;
  46. else if (Overflow)
  47. *Overflow = true;
  48. }
  49. // If the dst semantics are unsigned, but our value is signed and negative, we
  50. // clamp to zero.
  51. if (!DstSema.isSigned() && NewVal.isSigned() && NewVal.isNegative()) {
  52. // Found negative overflow for unsigned result
  53. if (DstSema.isSaturated())
  54. NewVal = 0;
  55. else if (Overflow)
  56. *Overflow = true;
  57. }
  58. NewVal = NewVal.extOrTrunc(DstSema.getWidth());
  59. NewVal.setIsSigned(DstSema.isSigned());
  60. return APFixedPoint(NewVal, DstSema);
  61. }
  62. int APFixedPoint::compare(const APFixedPoint &Other) const {
  63. APSInt ThisVal = getValue();
  64. APSInt OtherVal = Other.getValue();
  65. bool ThisSigned = Val.isSigned();
  66. bool OtherSigned = OtherVal.isSigned();
  67. int CommonLsb = std::min(getLsbWeight(), Other.getLsbWeight());
  68. int CommonMsb = std::max(getMsbWeight(), Other.getMsbWeight());
  69. unsigned CommonWidth = CommonMsb - CommonLsb + 1;
  70. ThisVal = ThisVal.extOrTrunc(CommonWidth);
  71. OtherVal = OtherVal.extOrTrunc(CommonWidth);
  72. ThisVal = ThisVal.shl(getLsbWeight() - CommonLsb);
  73. OtherVal = OtherVal.shl(Other.getLsbWeight() - CommonLsb);
  74. if (ThisSigned && OtherSigned) {
  75. if (ThisVal.sgt(OtherVal))
  76. return 1;
  77. else if (ThisVal.slt(OtherVal))
  78. return -1;
  79. } else if (!ThisSigned && !OtherSigned) {
  80. if (ThisVal.ugt(OtherVal))
  81. return 1;
  82. else if (ThisVal.ult(OtherVal))
  83. return -1;
  84. } else if (ThisSigned && !OtherSigned) {
  85. if (ThisVal.isSignBitSet())
  86. return -1;
  87. else if (ThisVal.ugt(OtherVal))
  88. return 1;
  89. else if (ThisVal.ult(OtherVal))
  90. return -1;
  91. } else {
  92. // !ThisSigned && OtherSigned
  93. if (OtherVal.isSignBitSet())
  94. return 1;
  95. else if (ThisVal.ugt(OtherVal))
  96. return 1;
  97. else if (ThisVal.ult(OtherVal))
  98. return -1;
  99. }
  100. return 0;
  101. }
  102. APFixedPoint APFixedPoint::getMax(const FixedPointSemantics &Sema) {
  103. bool IsUnsigned = !Sema.isSigned();
  104. auto Val = APSInt::getMaxValue(Sema.getWidth(), IsUnsigned);
  105. if (IsUnsigned && Sema.hasUnsignedPadding())
  106. Val = Val.lshr(1);
  107. return APFixedPoint(Val, Sema);
  108. }
  109. APFixedPoint APFixedPoint::getMin(const FixedPointSemantics &Sema) {
  110. auto Val = APSInt::getMinValue(Sema.getWidth(), !Sema.isSigned());
  111. return APFixedPoint(Val, Sema);
  112. }
  113. bool FixedPointSemantics::fitsInFloatSemantics(
  114. const fltSemantics &FloatSema) const {
  115. // A fixed point semantic fits in a floating point semantic if the maximum
  116. // and minimum values as integers of the fixed point semantic can fit in the
  117. // floating point semantic.
  118. // If these values do not fit, then a floating point rescaling of the true
  119. // maximum/minimum value will not fit either, so the floating point semantic
  120. // cannot be used to perform such a rescaling.
  121. APSInt MaxInt = APFixedPoint::getMax(*this).getValue();
  122. APFloat F(FloatSema);
  123. APFloat::opStatus Status = F.convertFromAPInt(MaxInt, MaxInt.isSigned(),
  124. APFloat::rmNearestTiesToAway);
  125. if ((Status & APFloat::opOverflow) || !isSigned())
  126. return !(Status & APFloat::opOverflow);
  127. APSInt MinInt = APFixedPoint::getMin(*this).getValue();
  128. Status = F.convertFromAPInt(MinInt, MinInt.isSigned(),
  129. APFloat::rmNearestTiesToAway);
  130. return !(Status & APFloat::opOverflow);
  131. }
  132. FixedPointSemantics FixedPointSemantics::getCommonSemantics(
  133. const FixedPointSemantics &Other) const {
  134. int CommonLsb = std::min(getLsbWeight(), Other.getLsbWeight());
  135. int CommonMSb = std::max(getMsbWeight() - hasSignOrPaddingBit(),
  136. Other.getMsbWeight() - Other.hasSignOrPaddingBit());
  137. unsigned CommonWidth = CommonMSb - CommonLsb + 1;
  138. bool ResultIsSigned = isSigned() || Other.isSigned();
  139. bool ResultIsSaturated = isSaturated() || Other.isSaturated();
  140. bool ResultHasUnsignedPadding = false;
  141. if (!ResultIsSigned) {
  142. // Both are unsigned.
  143. ResultHasUnsignedPadding = hasUnsignedPadding() &&
  144. Other.hasUnsignedPadding() && !ResultIsSaturated;
  145. }
  146. // If the result is signed, add an extra bit for the sign. Otherwise, if it is
  147. // unsigned and has unsigned padding, we only need to add the extra padding
  148. // bit back if we are not saturating.
  149. if (ResultIsSigned || ResultHasUnsignedPadding)
  150. CommonWidth++;
  151. return FixedPointSemantics(CommonWidth, Lsb{CommonLsb}, ResultIsSigned,
  152. ResultIsSaturated, ResultHasUnsignedPadding);
  153. }
  154. APFixedPoint APFixedPoint::add(const APFixedPoint &Other,
  155. bool *Overflow) const {
  156. auto CommonFXSema = Sema.getCommonSemantics(Other.getSemantics());
  157. APFixedPoint ConvertedThis = convert(CommonFXSema);
  158. APFixedPoint ConvertedOther = Other.convert(CommonFXSema);
  159. APSInt ThisVal = ConvertedThis.getValue();
  160. APSInt OtherVal = ConvertedOther.getValue();
  161. bool Overflowed = false;
  162. APSInt Result;
  163. if (CommonFXSema.isSaturated()) {
  164. Result = CommonFXSema.isSigned() ? ThisVal.sadd_sat(OtherVal)
  165. : ThisVal.uadd_sat(OtherVal);
  166. } else {
  167. Result = ThisVal.isSigned() ? ThisVal.sadd_ov(OtherVal, Overflowed)
  168. : ThisVal.uadd_ov(OtherVal, Overflowed);
  169. }
  170. if (Overflow)
  171. *Overflow = Overflowed;
  172. return APFixedPoint(Result, CommonFXSema);
  173. }
  174. APFixedPoint APFixedPoint::sub(const APFixedPoint &Other,
  175. bool *Overflow) const {
  176. auto CommonFXSema = Sema.getCommonSemantics(Other.getSemantics());
  177. APFixedPoint ConvertedThis = convert(CommonFXSema);
  178. APFixedPoint ConvertedOther = Other.convert(CommonFXSema);
  179. APSInt ThisVal = ConvertedThis.getValue();
  180. APSInt OtherVal = ConvertedOther.getValue();
  181. bool Overflowed = false;
  182. APSInt Result;
  183. if (CommonFXSema.isSaturated()) {
  184. Result = CommonFXSema.isSigned() ? ThisVal.ssub_sat(OtherVal)
  185. : ThisVal.usub_sat(OtherVal);
  186. } else {
  187. Result = ThisVal.isSigned() ? ThisVal.ssub_ov(OtherVal, Overflowed)
  188. : ThisVal.usub_ov(OtherVal, Overflowed);
  189. }
  190. if (Overflow)
  191. *Overflow = Overflowed;
  192. return APFixedPoint(Result, CommonFXSema);
  193. }
  194. APFixedPoint APFixedPoint::mul(const APFixedPoint &Other,
  195. bool *Overflow) const {
  196. auto CommonFXSema = Sema.getCommonSemantics(Other.getSemantics());
  197. APFixedPoint ConvertedThis = convert(CommonFXSema);
  198. APFixedPoint ConvertedOther = Other.convert(CommonFXSema);
  199. APSInt ThisVal = ConvertedThis.getValue();
  200. APSInt OtherVal = ConvertedOther.getValue();
  201. bool Overflowed = false;
  202. // Widen the LHS and RHS so we can perform a full multiplication.
  203. unsigned Wide = CommonFXSema.getWidth() * 2;
  204. if (CommonFXSema.isSigned()) {
  205. ThisVal = ThisVal.sext(Wide);
  206. OtherVal = OtherVal.sext(Wide);
  207. } else {
  208. ThisVal = ThisVal.zext(Wide);
  209. OtherVal = OtherVal.zext(Wide);
  210. }
  211. // Perform the full multiplication and downscale to get the same scale.
  212. //
  213. // Note that the right shifts here perform an implicit downwards rounding.
  214. // This rounding could discard bits that would technically place the result
  215. // outside the representable range. We interpret the spec as allowing us to
  216. // perform the rounding step first, avoiding the overflow case that would
  217. // arise.
  218. APSInt Result;
  219. if (CommonFXSema.isSigned())
  220. Result = ThisVal.smul_ov(OtherVal, Overflowed)
  221. .relativeAShl(CommonFXSema.getLsbWeight());
  222. else
  223. Result = ThisVal.umul_ov(OtherVal, Overflowed)
  224. .relativeLShl(CommonFXSema.getLsbWeight());
  225. assert(!Overflowed && "Full multiplication cannot overflow!");
  226. Result.setIsSigned(CommonFXSema.isSigned());
  227. // If our result lies outside of the representative range of the common
  228. // semantic, we either have overflow or saturation.
  229. APSInt Max = APFixedPoint::getMax(CommonFXSema).getValue()
  230. .extOrTrunc(Wide);
  231. APSInt Min = APFixedPoint::getMin(CommonFXSema).getValue()
  232. .extOrTrunc(Wide);
  233. if (CommonFXSema.isSaturated()) {
  234. if (Result < Min)
  235. Result = Min;
  236. else if (Result > Max)
  237. Result = Max;
  238. } else
  239. Overflowed = Result < Min || Result > Max;
  240. if (Overflow)
  241. *Overflow = Overflowed;
  242. return APFixedPoint(Result.sextOrTrunc(CommonFXSema.getWidth()),
  243. CommonFXSema);
  244. }
  245. APFixedPoint APFixedPoint::div(const APFixedPoint &Other,
  246. bool *Overflow) const {
  247. auto CommonFXSema = Sema.getCommonSemantics(Other.getSemantics());
  248. APFixedPoint ConvertedThis = convert(CommonFXSema);
  249. APFixedPoint ConvertedOther = Other.convert(CommonFXSema);
  250. APSInt ThisVal = ConvertedThis.getValue();
  251. APSInt OtherVal = ConvertedOther.getValue();
  252. bool Overflowed = false;
  253. // Widen the LHS and RHS so we can perform a full division.
  254. // Also make sure that there will be enough space for the shift below to not
  255. // overflow
  256. unsigned Wide =
  257. CommonFXSema.getWidth() * 2 + std::max(-CommonFXSema.getMsbWeight(), 0);
  258. if (CommonFXSema.isSigned()) {
  259. ThisVal = ThisVal.sext(Wide);
  260. OtherVal = OtherVal.sext(Wide);
  261. } else {
  262. ThisVal = ThisVal.zext(Wide);
  263. OtherVal = OtherVal.zext(Wide);
  264. }
  265. // Upscale to compensate for the loss of precision from division, and
  266. // perform the full division.
  267. if (CommonFXSema.getLsbWeight() < 0)
  268. ThisVal = ThisVal.shl(-CommonFXSema.getLsbWeight());
  269. else if (CommonFXSema.getLsbWeight() > 0)
  270. OtherVal = OtherVal.shl(CommonFXSema.getLsbWeight());
  271. APSInt Result;
  272. if (CommonFXSema.isSigned()) {
  273. APInt Rem;
  274. APInt::sdivrem(ThisVal, OtherVal, Result, Rem);
  275. // If the quotient is negative and the remainder is nonzero, round
  276. // towards negative infinity by subtracting epsilon from the result.
  277. if (ThisVal.isNegative() != OtherVal.isNegative() && !Rem.isZero())
  278. Result = Result - 1;
  279. } else
  280. Result = ThisVal.udiv(OtherVal);
  281. Result.setIsSigned(CommonFXSema.isSigned());
  282. // If our result lies outside of the representative range of the common
  283. // semantic, we either have overflow or saturation.
  284. APSInt Max = APFixedPoint::getMax(CommonFXSema).getValue()
  285. .extOrTrunc(Wide);
  286. APSInt Min = APFixedPoint::getMin(CommonFXSema).getValue()
  287. .extOrTrunc(Wide);
  288. if (CommonFXSema.isSaturated()) {
  289. if (Result < Min)
  290. Result = Min;
  291. else if (Result > Max)
  292. Result = Max;
  293. } else
  294. Overflowed = Result < Min || Result > Max;
  295. if (Overflow)
  296. *Overflow = Overflowed;
  297. return APFixedPoint(Result.sextOrTrunc(CommonFXSema.getWidth()),
  298. CommonFXSema);
  299. }
  300. APFixedPoint APFixedPoint::shl(unsigned Amt, bool *Overflow) const {
  301. APSInt ThisVal = Val;
  302. bool Overflowed = false;
  303. // Widen the LHS.
  304. unsigned Wide = Sema.getWidth() * 2;
  305. if (Sema.isSigned())
  306. ThisVal = ThisVal.sext(Wide);
  307. else
  308. ThisVal = ThisVal.zext(Wide);
  309. // Clamp the shift amount at the original width, and perform the shift.
  310. Amt = std::min(Amt, ThisVal.getBitWidth());
  311. APSInt Result = ThisVal << Amt;
  312. Result.setIsSigned(Sema.isSigned());
  313. // If our result lies outside of the representative range of the
  314. // semantic, we either have overflow or saturation.
  315. APSInt Max = APFixedPoint::getMax(Sema).getValue().extOrTrunc(Wide);
  316. APSInt Min = APFixedPoint::getMin(Sema).getValue().extOrTrunc(Wide);
  317. if (Sema.isSaturated()) {
  318. if (Result < Min)
  319. Result = Min;
  320. else if (Result > Max)
  321. Result = Max;
  322. } else
  323. Overflowed = Result < Min || Result > Max;
  324. if (Overflow)
  325. *Overflow = Overflowed;
  326. return APFixedPoint(Result.sextOrTrunc(Sema.getWidth()), Sema);
  327. }
  328. void APFixedPoint::toString(SmallVectorImpl<char> &Str) const {
  329. APSInt Val = getValue();
  330. int Lsb = getLsbWeight();
  331. int OrigWidth = getWidth();
  332. if (Lsb >= 0) {
  333. APSInt IntPart = Val;
  334. IntPart = IntPart.extend(IntPart.getBitWidth() + Lsb);
  335. IntPart <<= Lsb;
  336. IntPart.toString(Str, /*Radix=*/10);
  337. Str.push_back('.');
  338. Str.push_back('0');
  339. return;
  340. }
  341. if (Val.isSigned() && Val.isNegative()) {
  342. Val = -Val;
  343. Val.setIsUnsigned(true);
  344. Str.push_back('-');
  345. }
  346. int Scale = -getLsbWeight();
  347. APSInt IntPart = (OrigWidth > Scale) ? (Val >> Scale) : APSInt::get(0);
  348. // Add 4 digits to hold the value after multiplying 10 (the radix)
  349. unsigned Width = std::max(OrigWidth, Scale) + 4;
  350. APInt FractPart = Val.zextOrTrunc(Scale).zext(Width);
  351. APInt FractPartMask = APInt::getAllOnes(Scale).zext(Width);
  352. APInt RadixInt = APInt(Width, 10);
  353. IntPart.toString(Str, /*Radix=*/10);
  354. Str.push_back('.');
  355. do {
  356. (FractPart * RadixInt)
  357. .lshr(Scale)
  358. .toString(Str, /*Radix=*/10, Val.isSigned());
  359. FractPart = (FractPart * RadixInt) & FractPartMask;
  360. } while (FractPart != 0);
  361. }
  362. void APFixedPoint::print(raw_ostream &OS) const {
  363. OS << "APFixedPoint(" << toString() << ", {";
  364. Sema.print(OS);
  365. OS << "})";
  366. }
  367. LLVM_DUMP_METHOD void APFixedPoint::dump() const { print(llvm::errs()); }
  368. APFixedPoint APFixedPoint::negate(bool *Overflow) const {
  369. if (!isSaturated()) {
  370. if (Overflow)
  371. *Overflow =
  372. (!isSigned() && Val != 0) || (isSigned() && Val.isMinSignedValue());
  373. return APFixedPoint(-Val, Sema);
  374. }
  375. // We never overflow for saturation
  376. if (Overflow)
  377. *Overflow = false;
  378. if (isSigned())
  379. return Val.isMinSignedValue() ? getMax(Sema) : APFixedPoint(-Val, Sema);
  380. else
  381. return APFixedPoint(Sema);
  382. }
  383. APSInt APFixedPoint::convertToInt(unsigned DstWidth, bool DstSign,
  384. bool *Overflow) const {
  385. APSInt Result = getIntPart();
  386. unsigned SrcWidth = getWidth();
  387. APSInt DstMin = APSInt::getMinValue(DstWidth, !DstSign);
  388. APSInt DstMax = APSInt::getMaxValue(DstWidth, !DstSign);
  389. if (SrcWidth < DstWidth) {
  390. Result = Result.extend(DstWidth);
  391. } else if (SrcWidth > DstWidth) {
  392. DstMin = DstMin.extend(SrcWidth);
  393. DstMax = DstMax.extend(SrcWidth);
  394. }
  395. if (Overflow) {
  396. if (Result.isSigned() && !DstSign) {
  397. *Overflow = Result.isNegative() || Result.ugt(DstMax);
  398. } else if (Result.isUnsigned() && DstSign) {
  399. *Overflow = Result.ugt(DstMax);
  400. } else {
  401. *Overflow = Result < DstMin || Result > DstMax;
  402. }
  403. }
  404. Result.setIsSigned(DstSign);
  405. return Result.extOrTrunc(DstWidth);
  406. }
  407. const fltSemantics *APFixedPoint::promoteFloatSemantics(const fltSemantics *S) {
  408. if (S == &APFloat::BFloat())
  409. return &APFloat::IEEEdouble();
  410. else if (S == &APFloat::IEEEhalf())
  411. return &APFloat::IEEEsingle();
  412. else if (S == &APFloat::IEEEsingle())
  413. return &APFloat::IEEEdouble();
  414. else if (S == &APFloat::IEEEdouble())
  415. return &APFloat::IEEEquad();
  416. llvm_unreachable("Could not promote float type!");
  417. }
  418. APFloat APFixedPoint::convertToFloat(const fltSemantics &FloatSema) const {
  419. // For some operations, rounding mode has an effect on the result, while
  420. // other operations are lossless and should never result in rounding.
  421. // To signify which these operations are, we define two rounding modes here.
  422. APFloat::roundingMode RM = APFloat::rmNearestTiesToEven;
  423. APFloat::roundingMode LosslessRM = APFloat::rmTowardZero;
  424. // Make sure that we are operating in a type that works with this fixed-point
  425. // semantic.
  426. const fltSemantics *OpSema = &FloatSema;
  427. while (!Sema.fitsInFloatSemantics(*OpSema))
  428. OpSema = promoteFloatSemantics(OpSema);
  429. // Convert the fixed point value bits as an integer. If the floating point
  430. // value does not have the required precision, we will round according to the
  431. // given mode.
  432. APFloat Flt(*OpSema);
  433. APFloat::opStatus S = Flt.convertFromAPInt(Val, Sema.isSigned(), RM);
  434. // If we cared about checking for precision loss, we could look at this
  435. // status.
  436. (void)S;
  437. // Scale down the integer value in the float to match the correct scaling
  438. // factor.
  439. APFloat ScaleFactor(std::pow(2, Sema.getLsbWeight()));
  440. bool Ignored;
  441. ScaleFactor.convert(*OpSema, LosslessRM, &Ignored);
  442. Flt.multiply(ScaleFactor, LosslessRM);
  443. if (OpSema != &FloatSema)
  444. Flt.convert(FloatSema, RM, &Ignored);
  445. return Flt;
  446. }
  447. APFixedPoint APFixedPoint::getFromIntValue(const APSInt &Value,
  448. const FixedPointSemantics &DstFXSema,
  449. bool *Overflow) {
  450. FixedPointSemantics IntFXSema = FixedPointSemantics::GetIntegerSemantics(
  451. Value.getBitWidth(), Value.isSigned());
  452. return APFixedPoint(Value, IntFXSema).convert(DstFXSema, Overflow);
  453. }
  454. APFixedPoint
  455. APFixedPoint::getFromFloatValue(const APFloat &Value,
  456. const FixedPointSemantics &DstFXSema,
  457. bool *Overflow) {
  458. // For some operations, rounding mode has an effect on the result, while
  459. // other operations are lossless and should never result in rounding.
  460. // To signify which these operations are, we define two rounding modes here,
  461. // even though they are the same mode.
  462. APFloat::roundingMode RM = APFloat::rmTowardZero;
  463. APFloat::roundingMode LosslessRM = APFloat::rmTowardZero;
  464. const fltSemantics &FloatSema = Value.getSemantics();
  465. if (Value.isNaN()) {
  466. // Handle NaN immediately.
  467. if (Overflow)
  468. *Overflow = true;
  469. return APFixedPoint(DstFXSema);
  470. }
  471. // Make sure that we are operating in a type that works with this fixed-point
  472. // semantic.
  473. const fltSemantics *OpSema = &FloatSema;
  474. while (!DstFXSema.fitsInFloatSemantics(*OpSema))
  475. OpSema = promoteFloatSemantics(OpSema);
  476. APFloat Val = Value;
  477. bool Ignored;
  478. if (&FloatSema != OpSema)
  479. Val.convert(*OpSema, LosslessRM, &Ignored);
  480. // Scale up the float so that the 'fractional' part of the mantissa ends up in
  481. // the integer range instead. Rounding mode is irrelevant here.
  482. // It is fine if this overflows to infinity even for saturating types,
  483. // since we will use floating point comparisons to check for saturation.
  484. APFloat ScaleFactor(std::pow(2, -DstFXSema.getLsbWeight()));
  485. ScaleFactor.convert(*OpSema, LosslessRM, &Ignored);
  486. Val.multiply(ScaleFactor, LosslessRM);
  487. // Convert to the integral representation of the value. This rounding mode
  488. // is significant.
  489. APSInt Res(DstFXSema.getWidth(), !DstFXSema.isSigned());
  490. Val.convertToInteger(Res, RM, &Ignored);
  491. // Round the integral value and scale back. This makes the
  492. // overflow calculations below work properly. If we do not round here,
  493. // we risk checking for overflow with a value that is outside the
  494. // representable range of the fixed-point semantic even though no overflow
  495. // would occur had we rounded first.
  496. ScaleFactor = APFloat(std::pow(2, DstFXSema.getLsbWeight()));
  497. ScaleFactor.convert(*OpSema, LosslessRM, &Ignored);
  498. Val.roundToIntegral(RM);
  499. Val.multiply(ScaleFactor, LosslessRM);
  500. // Check for overflow/saturation by checking if the floating point value
  501. // is outside the range representable by the fixed-point value.
  502. APFloat FloatMax = getMax(DstFXSema).convertToFloat(*OpSema);
  503. APFloat FloatMin = getMin(DstFXSema).convertToFloat(*OpSema);
  504. bool Overflowed = false;
  505. if (DstFXSema.isSaturated()) {
  506. if (Val > FloatMax)
  507. Res = getMax(DstFXSema).getValue();
  508. else if (Val < FloatMin)
  509. Res = getMin(DstFXSema).getValue();
  510. } else
  511. Overflowed = Val > FloatMax || Val < FloatMin;
  512. if (Overflow)
  513. *Overflow = Overflowed;
  514. return APFixedPoint(Res, DstFXSema);
  515. }
  516. } // namespace llvm