accurate_accumulate.h 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. #pragma once
  2. #include <util/ysaveload.h>
  3. #include <util/generic/vector.h>
  4. #include <util/system/yassert.h>
  5. //! See more details here http://en.wikipedia.org/wiki/Kahan_summation_algorithm
  6. template <typename TAccumulateType>
  7. class TKahanAccumulator {
  8. public:
  9. using TValueType = TAccumulateType;
  10. template <typename TFloatType>
  11. explicit TKahanAccumulator(const TFloatType x)
  12. : Sum_(x)
  13. , Compensation_()
  14. {
  15. }
  16. TKahanAccumulator()
  17. : Sum_()
  18. , Compensation_()
  19. {
  20. }
  21. template <typename TFloatType>
  22. TKahanAccumulator& operator=(const TFloatType& rhs) {
  23. Sum_ = TValueType(rhs);
  24. Compensation_ = TValueType();
  25. return *this;
  26. }
  27. TValueType Get() const {
  28. return Sum_ + Compensation_;
  29. }
  30. template <typename TFloatType>
  31. inline operator TFloatType() const {
  32. return Get();
  33. }
  34. template <typename TFloatType>
  35. inline bool operator<(const TKahanAccumulator<TFloatType>& other) const {
  36. return Get() < other.Get();
  37. }
  38. template <typename TFloatType>
  39. inline bool operator<=(const TKahanAccumulator<TFloatType>& other) const {
  40. return !(other < *this);
  41. }
  42. template <typename TFloatType>
  43. inline bool operator>(const TKahanAccumulator<TFloatType>& other) const {
  44. return other < *this;
  45. }
  46. template <typename TFloatType>
  47. inline bool operator>=(const TKahanAccumulator<TFloatType>& other) const {
  48. return !(*this < other);
  49. }
  50. template <typename TFloatType>
  51. inline TKahanAccumulator& operator+=(const TFloatType x) {
  52. const TValueType y = TValueType(x) - Compensation_;
  53. const TValueType t = Sum_ + y;
  54. Compensation_ = (t - Sum_) - y;
  55. Sum_ = t;
  56. return *this;
  57. }
  58. template <typename TFloatType>
  59. inline TKahanAccumulator& operator-=(const TFloatType x) {
  60. return *this += -TValueType(x);
  61. }
  62. template <typename TFloatType>
  63. inline TKahanAccumulator& operator*=(const TFloatType x) {
  64. return *this = TValueType(*this) * TValueType(x);
  65. }
  66. template <typename TFloatType>
  67. inline TKahanAccumulator& operator/=(const TFloatType x) {
  68. return *this = TValueType(*this) / TValueType(x);
  69. }
  70. Y_SAVELOAD_DEFINE(Sum_, Compensation_);
  71. private:
  72. TValueType Sum_;
  73. TValueType Compensation_;
  74. };
  75. template <typename TAccumulateType, typename TFloatType>
  76. inline const TKahanAccumulator<TAccumulateType>
  77. operator+(TKahanAccumulator<TAccumulateType> lhs, const TFloatType rhs) {
  78. return lhs += rhs;
  79. }
  80. template <typename TAccumulateType, typename TFloatType>
  81. inline const TKahanAccumulator<TAccumulateType>
  82. operator-(TKahanAccumulator<TAccumulateType> lhs, const TFloatType rhs) {
  83. return lhs -= rhs;
  84. }
  85. template <typename TAccumulateType, typename TFloatType>
  86. inline const TKahanAccumulator<TAccumulateType>
  87. operator*(TKahanAccumulator<TAccumulateType> lhs, const TFloatType rhs) {
  88. return lhs *= rhs;
  89. }
  90. template <typename TAccumulateType, typename TFloatType>
  91. inline const TKahanAccumulator<TAccumulateType>
  92. operator/(TKahanAccumulator<TAccumulateType> lhs, const TFloatType rhs) {
  93. return lhs /= rhs;
  94. }
  95. template <typename TAccumulatorType, typename It>
  96. static inline TAccumulatorType TypedFastAccumulate(It begin, It end) {
  97. TAccumulatorType accumulator = TAccumulatorType();
  98. for (; begin + 15 < end; begin += 16) {
  99. accumulator += *(begin + 0) +
  100. *(begin + 1) +
  101. *(begin + 2) +
  102. *(begin + 3) +
  103. *(begin + 4) +
  104. *(begin + 5) +
  105. *(begin + 6) +
  106. *(begin + 7) +
  107. *(begin + 8) +
  108. *(begin + 9) +
  109. *(begin + 10) +
  110. *(begin + 11) +
  111. *(begin + 12) +
  112. *(begin + 13) +
  113. *(begin + 14) +
  114. *(begin + 15);
  115. }
  116. for (; begin != end; ++begin) {
  117. accumulator += *begin;
  118. }
  119. return accumulator;
  120. }
  121. template <class TOperation, typename TAccumulatorType, typename It1, typename It2>
  122. static inline TAccumulatorType TypedFastInnerOperation(It1 begin1, It1 end1, It2 begin2) {
  123. TAccumulatorType accumulator = TAccumulatorType();
  124. const TOperation op;
  125. for (; begin1 + 15 < end1; begin1 += 16, begin2 += 16) {
  126. accumulator += op(*(begin1 + 0), *(begin2 + 0)) +
  127. op(*(begin1 + 1), *(begin2 + 1)) +
  128. op(*(begin1 + 2), *(begin2 + 2)) +
  129. op(*(begin1 + 3), *(begin2 + 3)) +
  130. op(*(begin1 + 4), *(begin2 + 4)) +
  131. op(*(begin1 + 5), *(begin2 + 5)) +
  132. op(*(begin1 + 6), *(begin2 + 6)) +
  133. op(*(begin1 + 7), *(begin2 + 7)) +
  134. op(*(begin1 + 8), *(begin2 + 8)) +
  135. op(*(begin1 + 9), *(begin2 + 9)) +
  136. op(*(begin1 + 10), *(begin2 + 10)) +
  137. op(*(begin1 + 11), *(begin2 + 11)) +
  138. op(*(begin1 + 12), *(begin2 + 12)) +
  139. op(*(begin1 + 13), *(begin2 + 13)) +
  140. op(*(begin1 + 14), *(begin2 + 14)) +
  141. op(*(begin1 + 15), *(begin2 + 15));
  142. }
  143. for (; begin1 != end1; ++begin1, ++begin2) {
  144. accumulator += op(*begin1, *begin2);
  145. }
  146. return accumulator;
  147. }
  148. template <typename TAccumulatorType, typename It1, typename It2>
  149. static inline TAccumulatorType TypedFastInnerProduct(It1 begin1, It1 end1, It2 begin2) {
  150. return TypedFastInnerOperation<std::multiplies<>, TAccumulatorType>(begin1, end1, begin2);
  151. }
  152. template <typename It>
  153. static inline double FastAccumulate(It begin, It end) {
  154. return TypedFastAccumulate<double>(begin, end);
  155. }
  156. template <typename T>
  157. static inline double FastAccumulate(const TVector<T>& sequence) {
  158. return FastAccumulate(sequence.begin(), sequence.end());
  159. }
  160. template <typename It>
  161. static inline double FastKahanAccumulate(It begin, It end) {
  162. return TypedFastAccumulate<TKahanAccumulator<double>>(begin, end);
  163. }
  164. template <typename T>
  165. static inline double FastKahanAccumulate(const TVector<T>& sequence) {
  166. return FastKahanAccumulate(sequence.begin(), sequence.end());
  167. }
  168. template <typename It1, typename It2>
  169. static inline double FastInnerProduct(It1 begin1, It1 end1, It2 begin2) {
  170. return TypedFastInnerProduct<double>(begin1, end1, begin2);
  171. }
  172. template <typename T>
  173. static inline double FastInnerProduct(const TVector<T>& lhs, const TVector<T>& rhs) {
  174. Y_ASSERT(lhs.size() == rhs.size());
  175. return FastInnerProduct(lhs.begin(), lhs.end(), rhs.begin());
  176. }
  177. template <typename It1, typename It2>
  178. static inline double FastKahanInnerProduct(It1 begin1, It1 end1, It2 begin2) {
  179. return TypedFastInnerProduct<TKahanAccumulator<double>>(begin1, end1, begin2);
  180. }
  181. template <typename T>
  182. static inline double FastKahanInnerProduct(const TVector<T>& lhs, const TVector<T>& rhs) {
  183. Y_ASSERT(lhs.size() == rhs.size());
  184. return FastKahanInnerProduct(lhs.begin(), lhs.end(), rhs.begin());
  185. }